1 /*
  2  * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "classfile/classLoaderData.hpp"
 26 #include "gc/shared/barrierSet.hpp"
 27 #include "gc/shared/barrierSetAssembler.hpp"
 28 #include "gc/shared/barrierSetNMethod.hpp"
 29 #include "gc/shared/collectedHeap.hpp"
 30 #include "interpreter/interp_masm.hpp"
 31 #include "memory/universe.hpp"
 32 #include "runtime/javaThread.hpp"
 33 #include "runtime/jniHandles.hpp"
 34 #include "runtime/sharedRuntime.hpp"
 35 #include "runtime/stubRoutines.hpp"
 36 #ifdef COMPILER2
 37 #include "gc/shared/c2/barrierSetC2.hpp"
 38 #endif // COMPILER2
 39 
 40 #define __ masm->
 41 
 42 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 43                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 44   bool in_heap = (decorators & IN_HEAP) != 0;
 45   bool in_native = (decorators & IN_NATIVE) != 0;
 46   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 47   bool atomic = (decorators & MO_RELAXED) != 0;
 48 
 49   switch (type) {
 50   case T_OBJECT:
 51   case T_ARRAY: {
 52     if (in_heap) {
 53 #ifdef _LP64
 54       if (UseCompressedOops) {
 55         __ movl(dst, src);
 56         if (is_not_null) {
 57           __ decode_heap_oop_not_null(dst);
 58         } else {
 59           __ decode_heap_oop(dst);
 60         }
 61       } else
 62 #endif
 63       {
 64         __ movptr(dst, src);
 65       }
 66     } else {
 67       assert(in_native, "why else?");
 68       __ movptr(dst, src);
 69     }
 70     break;
 71   }
 72   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 73   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 74   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 75   case T_SHORT:   __ load_signed_short(dst, src);   break;
 76   case T_INT:     __ movl  (dst, src);              break;
 77   case T_ADDRESS: __ movptr(dst, src);              break;
 78   case T_FLOAT:
 79     assert(dst == noreg, "only to ftos");
 80     __ load_float(src);
 81     break;
 82   case T_DOUBLE:
 83     assert(dst == noreg, "only to dtos");
 84     __ load_double(src);
 85     break;
 86   case T_LONG:
 87     assert(dst == noreg, "only to ltos");
 88 #ifdef _LP64
 89     __ movq(rax, src);
 90 #else
 91     if (atomic) {
 92       __ fild_d(src);               // Must load atomically
 93       __ subptr(rsp,2*wordSize);    // Make space for store
 94       __ fistp_d(Address(rsp,0));
 95       __ pop(rax);
 96       __ pop(rdx);
 97     } else {
 98       __ movl(rax, src);
 99       __ movl(rdx, src.plus_disp(wordSize));
100     }
101 #endif
102     break;
103   default: Unimplemented();
104   }
105 }
106 
107 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
108                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
109   bool in_heap = (decorators & IN_HEAP) != 0;
110   bool in_native = (decorators & IN_NATIVE) != 0;
111   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
112   bool atomic = (decorators & MO_RELAXED) != 0;
113 
114   switch (type) {
115   case T_OBJECT:
116   case T_ARRAY: {
117     if (in_heap) {
118       if (val == noreg) {
119         assert(!is_not_null, "inconsistent access");
120 #ifdef _LP64
121         if (UseCompressedOops) {
122           __ movl(dst, NULL_WORD);
123         } else {
124           __ movslq(dst, NULL_WORD);
125         }
126 #else
127         __ movl(dst, NULL_WORD);
128 #endif
129       } else {
130 #ifdef _LP64
131         if (UseCompressedOops) {
132           assert(!dst.uses(val), "not enough registers");
133           if (is_not_null) {
134             __ encode_heap_oop_not_null(val);
135           } else {
136             __ encode_heap_oop(val);
137           }
138           __ movl(dst, val);
139         } else
140 #endif
141         {
142           __ movptr(dst, val);
143         }
144       }
145     } else {
146       assert(in_native, "why else?");
147       assert(val != noreg, "not supported");
148       __ movptr(dst, val);
149     }
150     break;
151   }
152   case T_BOOLEAN:
153     __ andl(val, 0x1);  // boolean is true if LSB is 1
154     __ movb(dst, val);
155     break;
156   case T_BYTE:
157     __ movb(dst, val);
158     break;
159   case T_SHORT:
160     __ movw(dst, val);
161     break;
162   case T_CHAR:
163     __ movw(dst, val);
164     break;
165   case T_INT:
166     __ movl(dst, val);
167     break;
168   case T_LONG:
169     assert(val == noreg, "only tos");
170 #ifdef _LP64
171     __ movq(dst, rax);
172 #else
173     if (atomic) {
174       __ push(rdx);
175       __ push(rax);                 // Must update atomically with FIST
176       __ fild_d(Address(rsp,0));    // So load into FPU register
177       __ fistp_d(dst);              // and put into memory atomically
178       __ addptr(rsp, 2*wordSize);
179     } else {
180       __ movptr(dst, rax);
181       __ movptr(dst.plus_disp(wordSize), rdx);
182     }
183 #endif
184     break;
185   case T_FLOAT:
186     assert(val == noreg, "only tos");
187     __ store_float(dst);
188     break;
189   case T_DOUBLE:
190     assert(val == noreg, "only tos");
191     __ store_double(dst);
192     break;
193   case T_ADDRESS:
194     __ movptr(dst, val);
195     break;
196   default: Unimplemented();
197   }
198 }
199 
200 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
201                                        DecoratorSet decorators,
202                                        BasicType type,
203                                        size_t bytes,
204                                        Register dst,
205                                        Address src,
206                                        Register tmp) {
207   assert(bytes <= 8, "can only deal with non-vector registers");
208   switch (bytes) {
209   case 1:
210     __ movb(dst, src);
211     break;
212   case 2:
213     __ movw(dst, src);
214     break;
215   case 4:
216     __ movl(dst, src);
217     break;
218   case 8:
219 #ifdef _LP64
220     __ movq(dst, src);
221 #else
222     fatal("No support for 8 bytes copy");
223 #endif
224     break;
225   default:
226     fatal("Unexpected size");
227   }
228 #ifdef _LP64
229   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
230     __ decode_heap_oop(dst);
231   }
232 #endif
233 }
234 
235 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
236                                         DecoratorSet decorators,
237                                         BasicType type,
238                                         size_t bytes,
239                                         Address dst,
240                                         Register src,
241                                         Register tmp) {
242 #ifdef _LP64
243   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
244     __ encode_heap_oop(src);
245   }
246 #endif
247   assert(bytes <= 8, "can only deal with non-vector registers");
248   switch (bytes) {
249   case 1:
250     __ movb(dst, src);
251     break;
252   case 2:
253     __ movw(dst, src);
254     break;
255   case 4:
256     __ movl(dst, src);
257     break;
258   case 8:
259 #ifdef _LP64
260     __ movq(dst, src);
261 #else
262     fatal("No support for 8 bytes copy");
263 #endif
264     break;
265   default:
266     fatal("Unexpected size");
267   }
268 }
269 
270 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
271                                        DecoratorSet decorators,
272                                        BasicType type,
273                                        size_t bytes,
274                                        XMMRegister dst,
275                                        Address src,
276                                        Register tmp,
277                                        XMMRegister xmm_tmp) {
278   assert(bytes > 8, "can only deal with vector registers");
279   if (bytes == 16) {
280     __ movdqu(dst, src);
281   } else if (bytes == 32) {
282     __ vmovdqu(dst, src);
283   } else {
284     fatal("No support for >32 bytes copy");
285   }
286 }
287 
288 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
289                                         DecoratorSet decorators,
290                                         BasicType type,
291                                         size_t bytes,
292                                         Address dst,
293                                         XMMRegister src,
294                                         Register tmp1,
295                                         Register tmp2,
296                                         XMMRegister xmm_tmp) {
297   assert(bytes > 8, "can only deal with vector registers");
298   if (bytes == 16) {
299     __ movdqu(dst, src);
300   } else if (bytes == 32) {
301     __ vmovdqu(dst, src);
302   } else {
303     fatal("No support for >32 bytes copy");
304   }
305 }
306 
307 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
308                                                         Register obj, Register tmp, Label& slowpath) {
309   __ clear_jobject_tag(obj);
310   __ movptr(obj, Address(obj, 0));
311 }
312 
313 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
314                                         Register thread, Register obj,
315                                         Register var_size_in_bytes,
316                                         int con_size_in_bytes,
317                                         Register t1,
318                                         Register t2,
319                                         Label& slow_case) {
320   assert_different_registers(obj, t1, t2);
321   assert_different_registers(obj, var_size_in_bytes, t1);
322   Register end = t2;
323   if (!thread->is_valid()) {
324 #ifdef _LP64
325     thread = r15_thread;
326 #else
327     assert(t1->is_valid(), "need temp reg");
328     thread = t1;
329     __ get_thread(thread);
330 #endif
331   }
332 
333   __ verify_tlab();
334 
335   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
336   if (var_size_in_bytes == noreg) {
337     __ lea(end, Address(obj, con_size_in_bytes));
338   } else {
339     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
340   }
341   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
342   __ jcc(Assembler::above, slow_case);
343 
344   // update the tlab top pointer
345   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
346 
347   // recover var_size_in_bytes if necessary
348   if (var_size_in_bytes == end) {
349     __ subptr(var_size_in_bytes, obj);
350   }
351   __ verify_tlab();
352 }
353 
354 #ifdef _LP64
355 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
356   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
357   Register thread = r15_thread;
358   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
359   // The immediate is the last 4 bytes, so if we align the start of the cmp
360   // instruction to 4 bytes, we know that the second half of it is also 4
361   // byte aligned, which means that the immediate will not cross a cache line
362   __ align(4);
363   uintptr_t before_cmp = (uintptr_t)__ pc();
364   __ cmpl_imm32(disarmed_addr, 0);
365   uintptr_t after_cmp = (uintptr_t)__ pc();
366   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
367 
368   if (slow_path != nullptr) {
369     __ jcc(Assembler::notEqual, *slow_path);
370     __ bind(*continuation);
371   } else {
372     Label done;
373     __ jccb(Assembler::equal, done);
374     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
375     __ bind(done);
376   }
377 }
378 #else
379 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
380   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
381   Label continuation;
382 
383   Register tmp = rdi;
384   __ push(tmp);
385   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
386   Address disarmed_addr(tmp, 0);
387   __ align(4);
388   __ cmpl_imm32(disarmed_addr, 0);
389   __ pop(tmp);
390   __ jcc(Assembler::equal, continuation);
391   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
392   __ bind(continuation);
393 }
394 #endif
395 
396 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
397   Label bad_call;
398   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
399   __ jcc(Assembler::equal, bad_call);
400 
401   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
402   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
403 #ifndef _LP64
404   __ push(tmp1);
405   __ push(tmp2);
406 #endif // !_LP64
407 
408   // Pointer chase to the method holder to find out if the method is concurrently unloading.
409   Label method_live;
410   __ load_method_holder_cld(tmp1, rbx);
411 
412    // Is it a strong CLD?
413   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0);
414   __ jcc(Assembler::greater, method_live);
415 
416    // Is it a weak but alive CLD?
417   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
418   __ resolve_weak_handle(tmp1, tmp2);
419   __ cmpptr(tmp1, 0);
420   __ jcc(Assembler::notEqual, method_live);
421 
422 #ifndef _LP64
423   __ pop(tmp2);
424   __ pop(tmp1);
425 #endif
426 
427   __ bind(bad_call);
428   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
429   __ bind(method_live);
430 
431 #ifndef _LP64
432   __ pop(tmp2);
433   __ pop(tmp1);
434 #endif
435 }
436 
437 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
438   // Check if the oop is in the right area of memory
439   __ movptr(tmp1, obj);
440   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
441   __ andptr(tmp1, tmp2);
442   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
443   __ cmpptr(tmp1, tmp2);
444   __ jcc(Assembler::notZero, error);
445 
446   // make sure klass is 'reasonable', which is not zero.
447   __ load_klass(obj, obj, tmp1);  // get klass
448   __ testptr(obj, obj);
449   __ jcc(Assembler::zero, error); // if klass is null it is broken
450 }
451 
452 #ifdef COMPILER2
453 
454 #ifdef _LP64
455 
456 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
457   if (!OptoReg::is_reg(opto_reg)) {
458     return OptoReg::Bad;
459   }
460 
461   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
462   if (vm_reg->is_XMMRegister()) {
463     opto_reg &= ~15;
464     switch (node->ideal_reg()) {
465     case Op_VecX:
466       opto_reg |= 2;
467       break;
468     case Op_VecY:
469       opto_reg |= 4;
470       break;
471     case Op_VecZ:
472       opto_reg |= 8;
473       break;
474     default:
475       opto_reg |= 1;
476       break;
477     }
478   }
479 
480   return opto_reg;
481 }
482 
483 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
484 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
485                             int stack_offset, int reg, uint ireg, outputStream* st);
486 
487 #undef __
488 #define __ _masm->
489 
490 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
491   if (left->_size == right->_size) {
492     return 0;
493   }
494 
495   return (left->_size < right->_size) ? -1 : 1;
496 }
497 
498 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) {
499   // The low order 4 bytes denote what size of the XMM register is live
500   return (opto_reg & 15) << 3;
501 }
502 
503 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) {
504   switch (reg_size) {
505   case 8:
506     return Op_VecD;
507   case 16:
508     return Op_VecX;
509   case 32:
510     return Op_VecY;
511   case 64:
512     return Op_VecZ;
513   default:
514     fatal("Invalid register size %d", reg_size);
515     return 0;
516   }
517 }
518 
519 bool SaveLiveRegisters::xmm_needs_vzeroupper() const {
520   return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
521 }
522 
523 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) {
524   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
525   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
526   _spill_offset -= reg_data._size;
527   C2_MacroAssembler c2_masm(__ code());
528   vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
529 }
530 
531 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) {
532   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
533   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
534   C2_MacroAssembler c2_masm(__ code());
535   vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
536   _spill_offset += reg_data._size;
537 }
538 
539 void SaveLiveRegisters::gp_register_save(Register reg) {
540   _spill_offset -= 8;
541   __ movq(Address(rsp, _spill_offset), reg);
542 }
543 
544 void SaveLiveRegisters::opmask_register_save(KRegister reg) {
545   _spill_offset -= 8;
546   __ kmov(Address(rsp, _spill_offset), reg);
547 }
548 
549 void SaveLiveRegisters::gp_register_restore(Register reg) {
550   __ movq(reg, Address(rsp, _spill_offset));
551   _spill_offset += 8;
552 }
553 
554 void SaveLiveRegisters::opmask_register_restore(KRegister reg) {
555   __ kmov(reg, Address(rsp, _spill_offset));
556   _spill_offset += 8;
557 }
558 
559 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
560   // Create mask of caller saved registers that need to
561   // be saved/restored if live
562   RegMask caller_saved;
563   caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
564   caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
565   caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
566   caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
567   caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
568   caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
569   caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
570   caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
571   caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
572 
573   if (UseAPX) {
574     caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
575     caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
576     caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
577     caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
578     caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
579     caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
580     caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
581     caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
582     caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
583     caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
584     caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
585     caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
586     caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
587     caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
588     caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
589     caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
590   }
591 
592   int gp_spill_size = 0;
593   int opmask_spill_size = 0;
594   int xmm_spill_size = 0;
595 
596   // Record registers that needs to be saved/restored
597   RegMaskIterator rmi(stub->preserve_set());
598   while (rmi.has_next()) {
599     const OptoReg::Name opto_reg = rmi.next();
600     const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
601 
602     if (vm_reg->is_Register()) {
603       if (caller_saved.Member(opto_reg)) {
604         _gp_registers.append(vm_reg->as_Register());
605         gp_spill_size += 8;
606       }
607     } else if (vm_reg->is_KRegister()) {
608       // All opmask registers are caller saved, thus spill the ones
609       // which are live.
610       if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
611         _opmask_registers.append(vm_reg->as_KRegister());
612         opmask_spill_size += 8;
613       }
614     } else if (vm_reg->is_XMMRegister()) {
615       // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
616       const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
617       const int reg_size = xmm_slot_size(opto_reg);
618       const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
619       const int reg_index = _xmm_registers.find(reg_data);
620       if (reg_index == -1) {
621         // Not previously appended
622         _xmm_registers.append(reg_data);
623         xmm_spill_size += reg_size;
624       } else {
625         // Previously appended, update size
626         const int reg_size_prev = _xmm_registers.at(reg_index)._size;
627         if (reg_size > reg_size_prev) {
628           _xmm_registers.at_put(reg_index, reg_data);
629           xmm_spill_size += reg_size - reg_size_prev;
630         }
631       }
632     } else {
633       fatal("Unexpected register type");
634     }
635   }
636 
637   // Sort by size, largest first
638   _xmm_registers.sort(xmm_compare_register_size);
639 
640   // On Windows, the caller reserves stack space for spilling register arguments
641   const int arg_spill_size = frame::arg_reg_save_area_bytes;
642 
643   // Stack pointer must be 16 bytes aligned for the call
644   _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
645 }
646 
647 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
648   : _masm(masm),
649     _gp_registers(),
650     _opmask_registers(),
651     _xmm_registers(),
652     _spill_size(0),
653     _spill_offset(0) {
654 
655   //
656   // Stack layout after registers have been spilled:
657   //
658   // | ...            | original rsp, 16 bytes aligned
659   // ------------------
660   // | zmm0 high      |
661   // | ...            |
662   // | zmm0 low       | 16 bytes aligned
663   // | ...            |
664   // | ymm1 high      |
665   // | ...            |
666   // | ymm1 low       | 16 bytes aligned
667   // | ...            |
668   // | xmmN high      |
669   // | ...            |
670   // | xmmN low       | 8 bytes aligned
671   // | reg0           | 8 bytes aligned
672   // | reg1           |
673   // | ...            |
674   // | regN           | new rsp, if 16 bytes aligned
675   // | <padding>      | else new rsp, 16 bytes aligned
676   // ------------------
677   //
678 
679   // Figure out what registers to save/restore
680   initialize(stub);
681 
682   // Allocate stack space
683   if (_spill_size > 0) {
684     __ subptr(rsp, _spill_size);
685   }
686 
687   // Save XMM/YMM/ZMM registers
688   for (int i = 0; i < _xmm_registers.length(); i++) {
689     xmm_register_save(_xmm_registers.at(i));
690   }
691 
692   if (xmm_needs_vzeroupper()) {
693     __ vzeroupper();
694   }
695 
696   // Save general purpose registers
697   for (int i = 0; i < _gp_registers.length(); i++) {
698     gp_register_save(_gp_registers.at(i));
699   }
700 
701   // Save opmask registers
702   for (int i = 0; i < _opmask_registers.length(); i++) {
703     opmask_register_save(_opmask_registers.at(i));
704   }
705 }
706 
707 SaveLiveRegisters::~SaveLiveRegisters() {
708   // Restore opmask registers
709   for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
710     opmask_register_restore(_opmask_registers.at(i));
711   }
712 
713   // Restore general purpose registers
714   for (int i = _gp_registers.length() - 1; i >= 0; i--) {
715     gp_register_restore(_gp_registers.at(i));
716   }
717 
718   __ vzeroupper();
719 
720   // Restore XMM/YMM/ZMM registers
721   for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
722     xmm_register_restore(_xmm_registers.at(i));
723   }
724 
725   // Free stack space
726   if (_spill_size > 0) {
727     __ addptr(rsp, _spill_size);
728   }
729 }
730 
731 #else // !_LP64
732 
733 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
734   Unimplemented(); // This must be implemented to support late barrier expansion.
735 }
736 
737 #endif // _LP64
738 
739 #endif // COMPILER2