1 /*
  2  * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  4  * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP
 27 #define SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP
 28 
 29 #include "gc/parallel/mutableSpace.hpp"
 30 #include "gc/parallel/objectStartArray.hpp"
 31 #include "gc/parallel/parallelScavengeHeap.hpp"
 32 #include "gc/parallel/parMarkBitMap.hpp"
 33 #include "gc/shared/collectedHeap.hpp"
 34 #include "gc/shared/collectorCounters.hpp"
 35 #include "gc/shared/referenceProcessor.hpp"
 36 #include "gc/shared/taskTerminator.hpp"
 37 #include "oops/oop.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/orderAccess.hpp"
 40 
 41 class ParallelScavengeHeap;
 42 class PSAdaptiveSizePolicy;
 43 class PSYoungGen;
 44 class PSOldGen;
 45 class ParCompactionManagerNew;
 46 class PSParallelCompactNew;
 47 class ParallelOldTracer;
 48 class STWGCTimer;
 49 
 50 class SpaceInfoNew
 51 {
 52 public:
 53   MutableSpace* space() const { return _space; }
 54 
 55   // The start array for the (generation containing the) space, or null if there
 56   // is no start array.
 57   ObjectStartArray* start_array() const { return _start_array; }
 58 
 59   void set_space(MutableSpace* s)           { _space = s; }
 60   void set_start_array(ObjectStartArray* s) { _start_array = s; }
 61 
 62 private:
 63   MutableSpace*     _space;
 64   ObjectStartArray* _start_array;
 65 };
 66 
 67 // The Parallel compaction collector is a stop-the-world garbage collector that
 68 // does parts of the collection using parallel threads.  The collection includes
 69 // the tenured generation and the young generation.
 70 //
 71 // A collection consists of the following phases.
 72 //
 73 //      - marking phase
 74 //      - summary phase (single-threaded)
 75 //      - forward (to new address) phase
 76 //      - adjust pointers phase
 77 //      - compacting phase
 78 //      - clean up phase
 79 //
 80 // Roughly speaking these phases correspond, respectively, to
 81 //
 82 //      - mark all the live objects
 83 //      - set-up temporary regions to enable parallelism in following phases
 84 //      - calculate the destination of each object at the end of the collection
 85 //      - adjust pointers to reflect new destination of objects
 86 //      - move the objects to their destination
 87 //      - update some references and reinitialize some variables
 88 //
 89 // A space that is being collected is divided into regions and with each region
 90 // is associated an object of type PCRegionData. Regions are targeted to be of
 91 // a mostly uniform size, but if an object would cross a region boundary, then
 92 // the boundary is adjusted to be after the end of that object.
 93 //
 94 // The marking phase does a complete marking of all live objects in the heap.
 95 // The marking phase uses multiple GC threads and marking is done in a bit
 96 // array of type ParMarkBitMap.  The marking of the bit map is done atomically.
 97 //
 98 // The summary phase sets up the regions, such that region covers roughly
 99 // uniform memory regions (currently same size as SpaceAlignment). However,
100 // if that would result in an object crossing a region boundary, then
101 // the upper bounds is adjusted such that the region ends after that object.
102 // This way we can ensure that a GC worker thread can fully 'own' a region
103 // during the forwarding, adjustment and compaction phases, without worrying
104 // about other threads messing with parts of the object. The summary phase
105 // also sets up an alternative set of regions, where each region covers
106 // a single space. This is used for a serial compaction mode which achieves
107 // maximum compaction at the expense of parallelism during the forwarding
108 // compaction phases.
109 //
110 // The forwarding phase calculates the new address of each live
111 // object and records old-addr-to-new-addr association. It does this using
112 // multiple GC threads. Each thread 'claims' a source region and appends it to a
113 // local work-list. The region is also set as the current compaction region
114 // for that thread. All live objects in the region are then visited and its
115 // new location calculated by tracking the compaction point in the compaction
116 // region. Once the source region is exhausted, the next source region is
117 // claimed from the global pool and appended to the end of the local work-list.
118 // Once the compaction region is exhausted, the top of the old compaction region
119 // is recorded, and the next compaction region is fetched from the front of the
120 // local work-list (which is guaranteed to already have finished processing, or
121 // is the same as the source region). This way, each worker forms a local
122 // list of regions in which the worker can compact as if it were a serial
123 // compaction.
124 //
125 // The adjust pointers phase remaps all pointers to reflect the new address of each
126 // object. Again, this uses multiple GC worker threads. Each thread claims
127 // a region, processes all references in all live objects of that region. Then
128 // the thread proceeds to claim the next region from the global pool, until
129 // all regions have been processed.
130 //
131 // The compaction phase moves objects to their new location. Again, this uses
132 // multiple GC worker threads. Each worker processes the local work-list that
133 // has been set-up during the forwarding phase and processes it from bottom
134 // to top, copying each live object to its new location (which is guaranteed
135 // to be lower in that threads parts of the heap, and thus would never overwrite
136 // other objects).
137 //
138 // This algorithm will usually leave gaps of non-fillable memory at the end
139 // of regions, and potentially whole empty regions at the end of compaction.
140 // The post-compaction phase fills those gaps with filler objects to ensure
141 // that the heap remains parsable.
142 //
143 // In some situations, this inefficiency of leaving gaps can lead to a
144 // situation where after a full GC, it is still not possible to satisfy an
145 // allocation, even though there should be enough memory available. When
146 // that happens, the collector switches to a serial mode, where we only
147 // have 4 regions which correspond exaxtly to the 4 spaces, and the forwarding
148 // and compaction phases are executed using only a single thread. This
149 // achieves maximum compaction. This serial mode is also invoked when
150 // System.gc() is called *and* UseMaximumCompactionOnSystemGC is set to
151 // true (which is the default), or when the number of full GCs exceeds
152 // the HeapMaximumCompactionInterval.
153 //
154 // Possible improvements to the algorithm include:
155 // - Identify and ignore a 'dense prefix'. This requires that we collect
156 //   liveness data during marking, or that we scan the prefix object-by-object
157 //   during the summary phase.
158 // - When an object does not fit into a remaining gap of a region, and the
159 //   object is rather large, we could attempt to forward/compact subsequent
160 //   objects 'around' that large object in an attempt to minimize the
161 //   resulting gap. This could be achieved by reconfiguring the regions
162 //   to exclude the large object.
163 // - Instead of finding out *after* the whole compaction that an allocation
164 //   can still not be satisfied, and then re-running the whole compaction
165 //   serially, we could determine that after the forwarding phase, and
166 //   re-do only forwarding serially, thus avoiding running marking,
167 //   adjusting references and compaction twice.
168 class PCRegionData /*: public CHeapObj<mtGC> */ {
169   // A region index
170   size_t const _idx;
171 
172   // The start of the region
173   HeapWord* const _bottom;
174   // The top of the region. (first word after last live object in containing space)
175   HeapWord* const _top;
176   // The end of the region (first word after last word of the region)
177   HeapWord* const _end;
178 
179   // The next compaction address
180   HeapWord* _new_top;
181 
182   // Points to the next region in the GC-worker-local work-list
183   PCRegionData* _local_next;
184 
185   // Parallel workers claiming protocol, used during adjust-references phase.
186   volatile bool _claimed;
187 
188 public:
189 
190   PCRegionData(size_t idx, HeapWord* bottom, HeapWord* top, HeapWord* end) :
191     _idx(idx), _bottom(bottom), _top(top), _end(end), _new_top(bottom),
192           _local_next(nullptr), _claimed(false) {}
193 
194   size_t idx() const { return _idx; };
195 
196   HeapWord* bottom() const { return _bottom; }
197   HeapWord* top() const { return _top; }
198   HeapWord* end()   const { return _end;   }
199 
200   PCRegionData*  local_next() const { return _local_next; }
201   PCRegionData** local_next_addr() { return &_local_next; }
202 
203   HeapWord* new_top() const {
204     return _new_top;
205   }
206   void set_new_top(HeapWord* new_top) {
207     _new_top = new_top;
208   }
209 
210   bool contains(oop obj) {
211     auto* obj_start = cast_from_oop<HeapWord*>(obj);
212     HeapWord* obj_end = obj_start + obj->size();
213     return _bottom <= obj_start && obj_start < _end && _bottom < obj_end && obj_end <= _end;
214   }
215 
216   bool claim() {
217     bool claimed =  _claimed;
218     if (claimed) {
219       return false;
220     }
221     return !Atomic::cmpxchg(&_claimed, false, true);
222   }
223 };
224 
225 class PSParallelCompactNew : AllStatic {
226 public:
227   typedef enum {
228     old_space_id, eden_space_id,
229     from_space_id, to_space_id, last_space_id
230   } SpaceId;
231 
232 public:
233   class IsAliveClosure: public BoolObjectClosure {
234   public:
235     bool do_object_b(oop p) final;
236   };
237 
238 private:
239   static STWGCTimer           _gc_timer;
240   static ParallelOldTracer    _gc_tracer;
241   static elapsedTimer         _accumulated_time;
242   static unsigned int         _maximum_compaction_gc_num;
243   static CollectorCounters*   _counters;
244   static ParMarkBitMap        _mark_bitmap;
245   static IsAliveClosure       _is_alive_closure;
246   static SpaceInfoNew         _space_info[last_space_id];
247 
248   // The head of the global region data list.
249   static size_t               _num_regions;
250   static PCRegionData*        _region_data_array;
251   static PCRegionData**       _per_worker_region_data;
252 
253   static size_t               _num_regions_serial;
254   static PCRegionData*        _region_data_array_serial;
255   static bool                 _serial;
256 
257   // Reference processing (used in ...follow_contents)
258   static SpanSubjectToDiscoveryClosure  _span_based_discoverer;
259   static ReferenceProcessor*  _ref_processor;
260 
261   static uint get_num_workers() { return _serial ? 1 : ParallelScavengeHeap::heap()->workers().active_workers(); }
262   static size_t get_num_regions() { return _serial ? _num_regions_serial : _num_regions; }
263   static PCRegionData* get_region_data_array() { return _serial ? _region_data_array_serial : _region_data_array; }
264 
265 public:
266   static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
267 
268 private:
269 
270   static void initialize_space_info();
271 
272   // Clear the marking bitmap and summary data that cover the specified space.
273   static void clear_data_covering_space(SpaceId id);
274 
275   static void pre_compact();
276 
277   static void post_compact();
278 
279   static bool check_maximum_compaction();
280 
281   // Mark live objects
282   static void marking_phase(ParallelOldTracer *gc_tracer);
283 
284   static void summary_phase();
285   static void setup_regions_parallel();
286   static void setup_regions_serial();
287 
288   static void adjust_pointers();
289   static void forward_to_new_addr();
290 
291   // Move objects to new locations.
292   static void compact();
293 
294 public:
295   static bool invoke(bool maximum_heap_compaction, bool serial);
296   static bool invoke_no_policy(bool maximum_heap_compaction, bool serial);
297 
298   static void adjust_pointers_in_spaces(uint worker_id);
299 
300   static void post_initialize();
301   // Perform initialization for PSParallelCompactNew that requires
302   // allocations.  This should be called during the VM initialization
303   // at a pointer where it would be appropriate to return a JNI_ENOMEM
304   // in the event of a failure.
305   static bool initialize_aux_data();
306 
307   // Closure accessors
308   static BoolObjectClosure* is_alive_closure()     { return &_is_alive_closure; }
309 
310   // Public accessors
311   static elapsedTimer* accumulated_time() { return &_accumulated_time; }
312 
313   static CollectorCounters* counters()    { return _counters; }
314 
315   static inline bool is_marked(oop obj);
316 
317   template <class T> static inline void adjust_pointer(T* p);
318 
319   // Convenience wrappers for per-space data kept in _space_info.
320   static inline MutableSpace*     space(SpaceId space_id);
321   static inline ObjectStartArray* start_array(SpaceId space_id);
322 
323   static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
324 
325   // Reference Processing
326   static ReferenceProcessor* ref_processor() { return _ref_processor; }
327 
328   static STWGCTimer* gc_timer() { return &_gc_timer; }
329 
330   // Return the SpaceId for the given address.
331   static SpaceId space_id(HeapWord* addr);
332 
333   static void print_on_error(outputStream* st);
334 };
335 
336 void steal_marking_work_new(TaskTerminator& terminator, uint worker_id);
337 
338 #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP