1 /* 2 * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 4 * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP 27 #define SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP 28 29 #include "gc/parallel/mutableSpace.hpp" 30 #include "gc/parallel/objectStartArray.hpp" 31 #include "gc/parallel/parallelScavengeHeap.hpp" 32 #include "gc/parallel/parMarkBitMap.hpp" 33 #include "gc/shared/collectedHeap.hpp" 34 #include "gc/shared/collectorCounters.hpp" 35 #include "gc/shared/referenceProcessor.hpp" 36 #include "gc/shared/taskTerminator.hpp" 37 #include "oops/oop.hpp" 38 #include "runtime/atomic.hpp" 39 #include "runtime/orderAccess.hpp" 40 41 class ParallelScavengeHeap; 42 class PSAdaptiveSizePolicy; 43 class PSYoungGen; 44 class PSOldGen; 45 class ParCompactionManagerNew; 46 class PSParallelCompactNew; 47 class ParallelOldTracer; 48 class STWGCTimer; 49 50 class SpaceInfoNew 51 { 52 public: 53 MutableSpace* space() const { return _space; } 54 55 // The start array for the (generation containing the) space, or null if there 56 // is no start array. 57 ObjectStartArray* start_array() const { return _start_array; } 58 59 void set_space(MutableSpace* s) { _space = s; } 60 void set_start_array(ObjectStartArray* s) { _start_array = s; } 61 62 private: 63 MutableSpace* _space; 64 ObjectStartArray* _start_array; 65 }; 66 67 // The Parallel compaction collector is a stop-the-world garbage collector that 68 // does parts of the collection using parallel threads. The collection includes 69 // the tenured generation and the young generation. 70 // 71 // A collection consists of the following phases. 72 // 73 // - marking phase 74 // - summary phase (single-threaded) 75 // - forward (to new address) phase 76 // - adjust pointers phase 77 // - compacting phase 78 // - clean up phase 79 // 80 // Roughly speaking these phases correspond, respectively, to 81 // 82 // - mark all the live objects 83 // - set-up temporary regions to enable parallelism in following phases 84 // - calculate the destination of each object at the end of the collection 85 // - adjust pointers to reflect new destination of objects 86 // - move the objects to their destination 87 // - update some references and reinitialize some variables 88 // 89 // A space that is being collected is divided into regions and with each region 90 // is associated an object of type PCRegionData. Regions are targeted to be of 91 // a mostly uniform size, but if an object would cross a region boundary, then 92 // the boundary is adjusted to be after the end of that object. 93 // 94 // The marking phase does a complete marking of all live objects in the heap. 95 // The marking phase uses multiple GC threads and marking is done in a bit 96 // array of type ParMarkBitMap. The marking of the bit map is done atomically. 97 // 98 // The summary phase sets up the regions, such that region covers roughly 99 // uniform memory regions (currently same size as SpaceAlignment). However, 100 // if that would result in an object crossing a region boundary, then 101 // the upper bounds is adjusted such that the region ends after that object. 102 // This way we can ensure that a GC worker thread can fully 'own' a region 103 // during the forwarding, adjustment and compaction phases, without worrying 104 // about other threads messing with parts of the object. The summary phase 105 // also sets up an alternative set of regions, where each region covers 106 // a single space. This is used for a serial compaction mode which achieves 107 // maximum compaction at the expense of parallelism during the forwarding 108 // compaction phases. 109 // 110 // The forwarding phase calculates the new address of each live 111 // object and records old-addr-to-new-addr association. It does this using 112 // multiple GC threads. Each thread 'claims' a source region and appends it to a 113 // local work-list. The region is also set as the current compaction region 114 // for that thread. All live objects in the region are then visited and its 115 // new location calculated by tracking the compaction point in the compaction 116 // region. Once the source region is exhausted, the next source region is 117 // claimed from the global pool and appended to the end of the local work-list. 118 // Once the compaction region is exhausted, the top of the old compaction region 119 // is recorded, and the next compaction region is fetched from the front of the 120 // local work-list (which is guaranteed to already have finished processing, or 121 // is the same as the source region). This way, each worker forms a local 122 // list of regions in which the worker can compact as if it were a serial 123 // compaction. 124 // 125 // The adjust pointers phase remaps all pointers to reflect the new address of each 126 // object. Again, this uses multiple GC worker threads. Each thread claims 127 // a region, processes all references in all live objects of that region. Then 128 // the thread proceeds to claim the next region from the global pool, until 129 // all regions have been processed. 130 // 131 // The compaction phase moves objects to their new location. Again, this uses 132 // multiple GC worker threads. Each worker processes the local work-list that 133 // has been set-up during the forwarding phase and processes it from bottom 134 // to top, copying each live object to its new location (which is guaranteed 135 // to be lower in that threads parts of the heap, and thus would never overwrite 136 // other objects). 137 // 138 // This algorithm will usually leave gaps of non-fillable memory at the end 139 // of regions, and potentially whole empty regions at the end of compaction. 140 // The post-compaction phase fills those gaps with filler objects to ensure 141 // that the heap remains parsable. 142 // 143 // In some situations, this inefficiency of leaving gaps can lead to a 144 // situation where after a full GC, it is still not possible to satisfy an 145 // allocation, even though there should be enough memory available. When 146 // that happens, the collector switches to a serial mode, where we only 147 // have 4 regions which correspond exaxtly to the 4 spaces, and the forwarding 148 // and compaction phases are executed using only a single thread. This 149 // achieves maximum compaction. This serial mode is also invoked when 150 // System.gc() is called *and* UseMaximumCompactionOnSystemGC is set to 151 // true (which is the default), or when the number of full GCs exceeds 152 // the HeapMaximumCompactionInterval. 153 // 154 // Possible improvements to the algorithm include: 155 // - Identify and ignore a 'dense prefix'. This requires that we collect 156 // liveness data during marking, or that we scan the prefix object-by-object 157 // during the summary phase. 158 // - When an object does not fit into a remaining gap of a region, and the 159 // object is rather large, we could attempt to forward/compact subsequent 160 // objects 'around' that large object in an attempt to minimize the 161 // resulting gap. This could be achieved by reconfiguring the regions 162 // to exclude the large object. 163 // - Instead of finding out *after* the whole compaction that an allocation 164 // can still not be satisfied, and then re-running the whole compaction 165 // serially, we could determine that after the forwarding phase, and 166 // re-do only forwarding serially, thus avoiding running marking, 167 // adjusting references and compaction twice. 168 class PCRegionData /*: public CHeapObj<mtGC> */ { 169 // A region index 170 size_t const _idx; 171 172 // The start of the region 173 HeapWord* const _bottom; 174 // The top of the region. (first word after last live object in containing space) 175 HeapWord* const _top; 176 // The end of the region (first word after last word of the region) 177 HeapWord* const _end; 178 179 // The next compaction address 180 HeapWord* _new_top; 181 182 // Points to the next region in the GC-worker-local work-list 183 PCRegionData* _local_next; 184 185 // Parallel workers claiming protocol, used during adjust-references phase. 186 volatile bool _claimed; 187 188 public: 189 190 PCRegionData(size_t idx, HeapWord* bottom, HeapWord* top, HeapWord* end) : 191 _idx(idx), _bottom(bottom), _top(top), _end(end), _new_top(bottom), 192 _local_next(nullptr), _claimed(false) {} 193 194 size_t idx() const { return _idx; }; 195 196 HeapWord* bottom() const { return _bottom; } 197 HeapWord* top() const { return _top; } 198 HeapWord* end() const { return _end; } 199 200 PCRegionData* local_next() const { return _local_next; } 201 PCRegionData** local_next_addr() { return &_local_next; } 202 203 HeapWord* new_top() const { 204 return _new_top; 205 } 206 void set_new_top(HeapWord* new_top) { 207 _new_top = new_top; 208 } 209 210 bool contains(oop obj) { 211 auto* obj_start = cast_from_oop<HeapWord*>(obj); 212 HeapWord* obj_end = obj_start + obj->size(); 213 return _bottom <= obj_start && obj_start < _end && _bottom < obj_end && obj_end <= _end; 214 } 215 216 bool claim() { 217 bool claimed = _claimed; 218 if (claimed) { 219 return false; 220 } 221 return !Atomic::cmpxchg(&_claimed, false, true); 222 } 223 }; 224 225 class PSParallelCompactNew : AllStatic { 226 public: 227 typedef enum { 228 old_space_id, eden_space_id, 229 from_space_id, to_space_id, last_space_id 230 } SpaceId; 231 232 public: 233 class IsAliveClosure: public BoolObjectClosure { 234 public: 235 bool do_object_b(oop p) final; 236 }; 237 238 private: 239 static STWGCTimer _gc_timer; 240 static ParallelOldTracer _gc_tracer; 241 static elapsedTimer _accumulated_time; 242 static unsigned int _maximum_compaction_gc_num; 243 static CollectorCounters* _counters; 244 static ParMarkBitMap _mark_bitmap; 245 static IsAliveClosure _is_alive_closure; 246 static SpaceInfoNew _space_info[last_space_id]; 247 248 // The head of the global region data list. 249 static size_t _num_regions; 250 static PCRegionData* _region_data_array; 251 static PCRegionData** _per_worker_region_data; 252 253 static size_t _num_regions_serial; 254 static PCRegionData* _region_data_array_serial; 255 static bool _serial; 256 257 // Reference processing (used in ...follow_contents) 258 static SpanSubjectToDiscoveryClosure _span_based_discoverer; 259 static ReferenceProcessor* _ref_processor; 260 261 static uint get_num_workers() { return _serial ? 1 : ParallelScavengeHeap::heap()->workers().active_workers(); } 262 static size_t get_num_regions() { return _serial ? _num_regions_serial : _num_regions; } 263 static PCRegionData* get_region_data_array() { return _serial ? _region_data_array_serial : _region_data_array; } 264 265 public: 266 static ParallelOldTracer* gc_tracer() { return &_gc_tracer; } 267 268 private: 269 270 static void initialize_space_info(); 271 272 // Clear the marking bitmap and summary data that cover the specified space. 273 static void clear_data_covering_space(SpaceId id); 274 275 static void pre_compact(); 276 277 static void post_compact(); 278 279 static bool check_maximum_compaction(); 280 281 // Mark live objects 282 static void marking_phase(ParallelOldTracer *gc_tracer); 283 284 static void summary_phase(); 285 static void setup_regions_parallel(); 286 static void setup_regions_serial(); 287 288 static void adjust_pointers(); 289 static void forward_to_new_addr(); 290 291 // Move objects to new locations. 292 static void compact(); 293 294 public: 295 static bool invoke(bool maximum_heap_compaction, bool serial); 296 static bool invoke_no_policy(bool maximum_heap_compaction, bool serial); 297 298 static void adjust_pointers_in_spaces(uint worker_id); 299 300 static void post_initialize(); 301 // Perform initialization for PSParallelCompactNew that requires 302 // allocations. This should be called during the VM initialization 303 // at a pointer where it would be appropriate to return a JNI_ENOMEM 304 // in the event of a failure. 305 static bool initialize_aux_data(); 306 307 // Closure accessors 308 static BoolObjectClosure* is_alive_closure() { return &_is_alive_closure; } 309 310 // Public accessors 311 static elapsedTimer* accumulated_time() { return &_accumulated_time; } 312 313 static CollectorCounters* counters() { return _counters; } 314 315 static inline bool is_marked(oop obj); 316 317 template <class T> static inline void adjust_pointer(T* p); 318 319 // Convenience wrappers for per-space data kept in _space_info. 320 static inline MutableSpace* space(SpaceId space_id); 321 static inline ObjectStartArray* start_array(SpaceId space_id); 322 323 static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; } 324 325 // Reference Processing 326 static ReferenceProcessor* ref_processor() { return _ref_processor; } 327 328 static STWGCTimer* gc_timer() { return &_gc_timer; } 329 330 // Return the SpaceId for the given address. 331 static SpaceId space_id(HeapWord* addr); 332 333 static void print_on_error(outputStream* st); 334 }; 335 336 void steal_marking_work_new(TaskTerminator& terminator, uint worker_id); 337 338 #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP