1 /*
  2  * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "asm/macroAssembler.inline.hpp"
 26 #include "classfile/classLoaderData.hpp"
 27 #include "gc/shared/barrierSet.hpp"
 28 #include "gc/shared/barrierSetAssembler.hpp"
 29 #include "gc/shared/barrierSetNMethod.hpp"
 30 #include "gc/shared/barrierSetRuntime.hpp"
 31 #include "gc/shared/collectedHeap.hpp"
 32 #include "interpreter/interp_masm.hpp"
 33 #include "memory/universe.hpp"
 34 #include "runtime/javaThread.hpp"
 35 #include "runtime/jniHandles.hpp"
 36 #include "runtime/sharedRuntime.hpp"
 37 #include "runtime/stubRoutines.hpp"
 38 #ifdef COMPILER2
 39 #include "gc/shared/c2/barrierSetC2.hpp"
 40 #endif // COMPILER2
 41 
 42 #define __ masm->
 43 
 44 void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 45                                   Register dst, Address src, Register tmp1, Register tmp_thread) {
 46   bool in_heap = (decorators & IN_HEAP) != 0;
 47   bool in_native = (decorators & IN_NATIVE) != 0;
 48   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
 49   bool atomic = (decorators & MO_RELAXED) != 0;
 50 
 51   switch (type) {
 52   case T_OBJECT:
 53   case T_ARRAY: {
 54     if (in_heap) {
 55 #ifdef _LP64
 56       if (UseCompressedOops) {
 57         __ movl(dst, src);
 58         if (is_not_null) {
 59           __ decode_heap_oop_not_null(dst);
 60         } else {
 61           __ decode_heap_oop(dst);
 62         }
 63       } else
 64 #endif
 65       {
 66         __ movptr(dst, src);
 67       }
 68     } else {
 69       assert(in_native, "why else?");
 70       __ movptr(dst, src);
 71     }
 72     break;
 73   }
 74   case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
 75   case T_BYTE:    __ load_signed_byte(dst, src);    break;
 76   case T_CHAR:    __ load_unsigned_short(dst, src); break;
 77   case T_SHORT:   __ load_signed_short(dst, src);   break;
 78   case T_INT:     __ movl  (dst, src);              break;
 79   case T_ADDRESS: __ movptr(dst, src);              break;
 80   case T_FLOAT:
 81     assert(dst == noreg, "only to ftos");
 82     __ load_float(src);
 83     break;
 84   case T_DOUBLE:
 85     assert(dst == noreg, "only to dtos");
 86     __ load_double(src);
 87     break;
 88   case T_LONG:
 89     assert(dst == noreg, "only to ltos");
 90 #ifdef _LP64
 91     __ movq(rax, src);
 92 #else
 93     if (atomic) {
 94       __ fild_d(src);               // Must load atomically
 95       __ subptr(rsp,2*wordSize);    // Make space for store
 96       __ fistp_d(Address(rsp,0));
 97       __ pop(rax);
 98       __ pop(rdx);
 99     } else {
100       __ movl(rax, src);
101       __ movl(rdx, src.plus_disp(wordSize));
102     }
103 #endif
104     break;
105   default: Unimplemented();
106   }
107 }
108 
109 void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
110                                    Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
111   bool in_heap = (decorators & IN_HEAP) != 0;
112   bool in_native = (decorators & IN_NATIVE) != 0;
113   bool is_not_null = (decorators & IS_NOT_NULL) != 0;
114   bool atomic = (decorators & MO_RELAXED) != 0;
115 
116   switch (type) {
117   case T_OBJECT:
118   case T_ARRAY: {
119     if (in_heap) {
120       if (val == noreg) {
121         assert(!is_not_null, "inconsistent access");
122 #ifdef _LP64
123         if (UseCompressedOops) {
124           __ movl(dst, NULL_WORD);
125         } else {
126           __ movslq(dst, NULL_WORD);
127         }
128 #else
129         __ movl(dst, NULL_WORD);
130 #endif
131       } else {
132 #ifdef _LP64
133         if (UseCompressedOops) {
134           assert(!dst.uses(val), "not enough registers");
135           if (is_not_null) {
136             __ encode_heap_oop_not_null(val);
137           } else {
138             __ encode_heap_oop(val);
139           }
140           __ movl(dst, val);
141         } else
142 #endif
143         {
144           __ movptr(dst, val);
145         }
146       }
147     } else {
148       assert(in_native, "why else?");
149       assert(val != noreg, "not supported");
150       __ movptr(dst, val);
151     }
152     break;
153   }
154   case T_BOOLEAN:
155     __ andl(val, 0x1);  // boolean is true if LSB is 1
156     __ movb(dst, val);
157     break;
158   case T_BYTE:
159     __ movb(dst, val);
160     break;
161   case T_SHORT:
162     __ movw(dst, val);
163     break;
164   case T_CHAR:
165     __ movw(dst, val);
166     break;
167   case T_INT:
168     __ movl(dst, val);
169     break;
170   case T_LONG:
171     assert(val == noreg, "only tos");
172 #ifdef _LP64
173     __ movq(dst, rax);
174 #else
175     if (atomic) {
176       __ push(rdx);
177       __ push(rax);                 // Must update atomically with FIST
178       __ fild_d(Address(rsp,0));    // So load into FPU register
179       __ fistp_d(dst);              // and put into memory atomically
180       __ addptr(rsp, 2*wordSize);
181     } else {
182       __ movptr(dst, rax);
183       __ movptr(dst.plus_disp(wordSize), rdx);
184     }
185 #endif
186     break;
187   case T_FLOAT:
188     assert(val == noreg, "only tos");
189     __ store_float(dst);
190     break;
191   case T_DOUBLE:
192     assert(val == noreg, "only tos");
193     __ store_double(dst);
194     break;
195   case T_ADDRESS:
196     __ movptr(dst, val);
197     break;
198   default: Unimplemented();
199   }
200 }
201 
202 void BarrierSetAssembler::flat_field_copy(MacroAssembler* masm, DecoratorSet decorators,
203                                      Register src, Register dst, Register inline_layout_info) {
204   // flat_field_copy implementation is fairly complex, and there are not any
205   // "short-cuts" to be made from asm. What there is, appears to have the same
206   // cost in C++, so just "call_VM_leaf" for now rather than maintain hundreds
207   // of hand-rolled instructions...
208   if (decorators & IS_DEST_UNINITIALIZED) {
209     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy_is_dest_uninitialized), src, dst, inline_layout_info);
210   } else {
211     __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetRuntime::value_copy), src, dst, inline_layout_info);
212   }
213 }
214 
215 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
216                                        DecoratorSet decorators,
217                                        BasicType type,
218                                        size_t bytes,
219                                        Register dst,
220                                        Address src,
221                                        Register tmp) {
222   assert(bytes <= 8, "can only deal with non-vector registers");
223   switch (bytes) {
224   case 1:
225     __ movb(dst, src);
226     break;
227   case 2:
228     __ movw(dst, src);
229     break;
230   case 4:
231     __ movl(dst, src);
232     break;
233   case 8:
234 #ifdef _LP64
235     __ movq(dst, src);
236 #else
237     fatal("No support for 8 bytes copy");
238 #endif
239     break;
240   default:
241     fatal("Unexpected size");
242   }
243 #ifdef _LP64
244   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
245     __ decode_heap_oop(dst);
246   }
247 #endif
248 }
249 
250 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
251                                         DecoratorSet decorators,
252                                         BasicType type,
253                                         size_t bytes,
254                                         Address dst,
255                                         Register src,
256                                         Register tmp) {
257 #ifdef _LP64
258   if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
259     __ encode_heap_oop(src);
260   }
261 #endif
262   assert(bytes <= 8, "can only deal with non-vector registers");
263   switch (bytes) {
264   case 1:
265     __ movb(dst, src);
266     break;
267   case 2:
268     __ movw(dst, src);
269     break;
270   case 4:
271     __ movl(dst, src);
272     break;
273   case 8:
274 #ifdef _LP64
275     __ movq(dst, src);
276 #else
277     fatal("No support for 8 bytes copy");
278 #endif
279     break;
280   default:
281     fatal("Unexpected size");
282   }
283 }
284 
285 void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
286                                        DecoratorSet decorators,
287                                        BasicType type,
288                                        size_t bytes,
289                                        XMMRegister dst,
290                                        Address src,
291                                        Register tmp,
292                                        XMMRegister xmm_tmp) {
293   assert(bytes > 8, "can only deal with vector registers");
294   if (bytes == 16) {
295     __ movdqu(dst, src);
296   } else if (bytes == 32) {
297     __ vmovdqu(dst, src);
298   } else {
299     fatal("No support for >32 bytes copy");
300   }
301 }
302 
303 void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
304                                         DecoratorSet decorators,
305                                         BasicType type,
306                                         size_t bytes,
307                                         Address dst,
308                                         XMMRegister src,
309                                         Register tmp1,
310                                         Register tmp2,
311                                         XMMRegister xmm_tmp) {
312   assert(bytes > 8, "can only deal with vector registers");
313   if (bytes == 16) {
314     __ movdqu(dst, src);
315   } else if (bytes == 32) {
316     __ vmovdqu(dst, src);
317   } else {
318     fatal("No support for >32 bytes copy");
319   }
320 }
321 
322 void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
323                                                         Register obj, Register tmp, Label& slowpath) {
324   __ clear_jobject_tag(obj);
325   __ movptr(obj, Address(obj, 0));
326 }
327 
328 void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
329                                         Register thread, Register obj,
330                                         Register var_size_in_bytes,
331                                         int con_size_in_bytes,
332                                         Register t1,
333                                         Register t2,
334                                         Label& slow_case) {
335   assert_different_registers(obj, t1, t2);
336   assert_different_registers(obj, var_size_in_bytes, t1);
337   Register end = t2;
338   if (!thread->is_valid()) {
339 #ifdef _LP64
340     thread = r15_thread;
341 #else
342     assert(t1->is_valid(), "need temp reg");
343     thread = t1;
344     __ get_thread(thread);
345 #endif
346   }
347 
348   __ verify_tlab();
349 
350   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
351   if (var_size_in_bytes == noreg) {
352     __ lea(end, Address(obj, con_size_in_bytes));
353   } else {
354     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
355   }
356   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
357   __ jcc(Assembler::above, slow_case);
358 
359   // update the tlab top pointer
360   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
361 
362   // recover var_size_in_bytes if necessary
363   if (var_size_in_bytes == end) {
364     __ subptr(var_size_in_bytes, obj);
365   }
366   __ verify_tlab();
367 }
368 
369 #ifdef _LP64
370 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) {
371   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
372   Register thread = r15_thread;
373   Address disarmed_addr(thread, in_bytes(bs_nm->thread_disarmed_guard_value_offset()));
374   // The immediate is the last 4 bytes, so if we align the start of the cmp
375   // instruction to 4 bytes, we know that the second half of it is also 4
376   // byte aligned, which means that the immediate will not cross a cache line
377   __ align(4);
378   uintptr_t before_cmp = (uintptr_t)__ pc();
379   __ cmpl_imm32(disarmed_addr, 0);
380   uintptr_t after_cmp = (uintptr_t)__ pc();
381   guarantee(after_cmp - before_cmp == 8, "Wrong assumed instruction length");
382 
383   if (slow_path != nullptr) {
384     __ jcc(Assembler::notEqual, *slow_path);
385     __ bind(*continuation);
386   } else {
387     Label done;
388     __ jccb(Assembler::equal, done);
389     __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
390     __ bind(done);
391   }
392 }
393 #else
394 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) {
395   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
396   Label continuation;
397 
398   Register tmp = rdi;
399   __ push(tmp);
400   __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address());
401   Address disarmed_addr(tmp, 0);
402   __ align(4);
403   __ cmpl_imm32(disarmed_addr, 0);
404   __ pop(tmp);
405   __ jcc(Assembler::equal, continuation);
406   __ call(RuntimeAddress(StubRoutines::method_entry_barrier()));
407   __ bind(continuation);
408 }
409 #endif
410 
411 void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
412   Label bad_call;
413   __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters.
414   __ jcc(Assembler::equal, bad_call);
415 
416   Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax );
417   Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx );
418 #ifndef _LP64
419   __ push(tmp1);
420   __ push(tmp2);
421 #endif // !_LP64
422 
423   // Pointer chase to the method holder to find out if the method is concurrently unloading.
424   Label method_live;
425   __ load_method_holder_cld(tmp1, rbx);
426 
427    // Is it a strong CLD?
428   __ cmpl(Address(tmp1, ClassLoaderData::keep_alive_ref_count_offset()), 0);
429   __ jcc(Assembler::greater, method_live);
430 
431    // Is it a weak but alive CLD?
432   __ movptr(tmp1, Address(tmp1, ClassLoaderData::holder_offset()));
433   __ resolve_weak_handle(tmp1, tmp2);
434   __ cmpptr(tmp1, 0);
435   __ jcc(Assembler::notEqual, method_live);
436 
437 #ifndef _LP64
438   __ pop(tmp2);
439   __ pop(tmp1);
440 #endif
441 
442   __ bind(bad_call);
443   __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
444   __ bind(method_live);
445 
446 #ifndef _LP64
447   __ pop(tmp2);
448   __ pop(tmp1);
449 #endif
450 }
451 
452 void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) {
453   // Check if the oop is in the right area of memory
454   __ movptr(tmp1, obj);
455   __ movptr(tmp2, (intptr_t) Universe::verify_oop_mask());
456   __ andptr(tmp1, tmp2);
457   __ movptr(tmp2, (intptr_t) Universe::verify_oop_bits());
458   __ cmpptr(tmp1, tmp2);
459   __ jcc(Assembler::notZero, error);
460 
461   // make sure klass is 'reasonable', which is not zero.
462   __ load_klass(obj, obj, tmp1);  // get klass
463   __ testptr(obj, obj);
464   __ jcc(Assembler::zero, error); // if klass is null it is broken
465 }
466 
467 #ifdef COMPILER2
468 
469 #ifdef _LP64
470 
471 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
472   if (!OptoReg::is_reg(opto_reg)) {
473     return OptoReg::Bad;
474   }
475 
476   const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
477   if (vm_reg->is_XMMRegister()) {
478     opto_reg &= ~15;
479     switch (node->ideal_reg()) {
480     case Op_VecX:
481       opto_reg |= 2;
482       break;
483     case Op_VecY:
484       opto_reg |= 4;
485       break;
486     case Op_VecZ:
487       opto_reg |= 8;
488       break;
489     default:
490       opto_reg |= 1;
491       break;
492     }
493   }
494 
495   return opto_reg;
496 }
497 
498 // We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
499 extern void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
500                             int stack_offset, int reg, uint ireg, outputStream* st);
501 
502 #undef __
503 #define __ _masm->
504 
505 int SaveLiveRegisters::xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
506   if (left->_size == right->_size) {
507     return 0;
508   }
509 
510   return (left->_size < right->_size) ? -1 : 1;
511 }
512 
513 int SaveLiveRegisters::xmm_slot_size(OptoReg::Name opto_reg) {
514   // The low order 4 bytes denote what size of the XMM register is live
515   return (opto_reg & 15) << 3;
516 }
517 
518 uint SaveLiveRegisters::xmm_ideal_reg_for_size(int reg_size) {
519   switch (reg_size) {
520   case 8:
521     return Op_VecD;
522   case 16:
523     return Op_VecX;
524   case 32:
525     return Op_VecY;
526   case 64:
527     return Op_VecZ;
528   default:
529     fatal("Invalid register size %d", reg_size);
530     return 0;
531   }
532 }
533 
534 bool SaveLiveRegisters::xmm_needs_vzeroupper() const {
535   return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
536 }
537 
538 void SaveLiveRegisters::xmm_register_save(const XMMRegisterData& reg_data) {
539   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
540   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
541   _spill_offset -= reg_data._size;
542   C2_MacroAssembler c2_masm(__ code());
543   vec_spill_helper(&c2_masm, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
544 }
545 
546 void SaveLiveRegisters::xmm_register_restore(const XMMRegisterData& reg_data) {
547   const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
548   const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
549   C2_MacroAssembler c2_masm(__ code());
550   vec_spill_helper(&c2_masm, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
551   _spill_offset += reg_data._size;
552 }
553 
554 void SaveLiveRegisters::gp_register_save(Register reg) {
555   _spill_offset -= 8;
556   __ movq(Address(rsp, _spill_offset), reg);
557 }
558 
559 void SaveLiveRegisters::opmask_register_save(KRegister reg) {
560   _spill_offset -= 8;
561   __ kmov(Address(rsp, _spill_offset), reg);
562 }
563 
564 void SaveLiveRegisters::gp_register_restore(Register reg) {
565   __ movq(reg, Address(rsp, _spill_offset));
566   _spill_offset += 8;
567 }
568 
569 void SaveLiveRegisters::opmask_register_restore(KRegister reg) {
570   __ kmov(reg, Address(rsp, _spill_offset));
571   _spill_offset += 8;
572 }
573 
574 void SaveLiveRegisters::initialize(BarrierStubC2* stub) {
575   // Create mask of caller saved registers that need to
576   // be saved/restored if live
577   RegMask caller_saved;
578   caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
579   caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
580   caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
581   caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
582   caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
583   caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
584   caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
585   caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
586   caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
587 
588   if (UseAPX) {
589     caller_saved.Insert(OptoReg::as_OptoReg(r16->as_VMReg()));
590     caller_saved.Insert(OptoReg::as_OptoReg(r17->as_VMReg()));
591     caller_saved.Insert(OptoReg::as_OptoReg(r18->as_VMReg()));
592     caller_saved.Insert(OptoReg::as_OptoReg(r19->as_VMReg()));
593     caller_saved.Insert(OptoReg::as_OptoReg(r20->as_VMReg()));
594     caller_saved.Insert(OptoReg::as_OptoReg(r21->as_VMReg()));
595     caller_saved.Insert(OptoReg::as_OptoReg(r22->as_VMReg()));
596     caller_saved.Insert(OptoReg::as_OptoReg(r23->as_VMReg()));
597     caller_saved.Insert(OptoReg::as_OptoReg(r24->as_VMReg()));
598     caller_saved.Insert(OptoReg::as_OptoReg(r25->as_VMReg()));
599     caller_saved.Insert(OptoReg::as_OptoReg(r26->as_VMReg()));
600     caller_saved.Insert(OptoReg::as_OptoReg(r27->as_VMReg()));
601     caller_saved.Insert(OptoReg::as_OptoReg(r28->as_VMReg()));
602     caller_saved.Insert(OptoReg::as_OptoReg(r29->as_VMReg()));
603     caller_saved.Insert(OptoReg::as_OptoReg(r30->as_VMReg()));
604     caller_saved.Insert(OptoReg::as_OptoReg(r31->as_VMReg()));
605   }
606 
607   int gp_spill_size = 0;
608   int opmask_spill_size = 0;
609   int xmm_spill_size = 0;
610 
611   // Record registers that needs to be saved/restored
612   RegMaskIterator rmi(stub->preserve_set());
613   while (rmi.has_next()) {
614     const OptoReg::Name opto_reg = rmi.next();
615     const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
616 
617     if (vm_reg->is_Register()) {
618       if (caller_saved.Member(opto_reg)) {
619         _gp_registers.append(vm_reg->as_Register());
620         gp_spill_size += 8;
621       }
622     } else if (vm_reg->is_KRegister()) {
623       // All opmask registers are caller saved, thus spill the ones
624       // which are live.
625       if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
626         _opmask_registers.append(vm_reg->as_KRegister());
627         opmask_spill_size += 8;
628       }
629     } else if (vm_reg->is_XMMRegister()) {
630       // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
631       const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
632       const int reg_size = xmm_slot_size(opto_reg);
633       const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
634       const int reg_index = _xmm_registers.find(reg_data);
635       if (reg_index == -1) {
636         // Not previously appended
637         _xmm_registers.append(reg_data);
638         xmm_spill_size += reg_size;
639       } else {
640         // Previously appended, update size
641         const int reg_size_prev = _xmm_registers.at(reg_index)._size;
642         if (reg_size > reg_size_prev) {
643           _xmm_registers.at_put(reg_index, reg_data);
644           xmm_spill_size += reg_size - reg_size_prev;
645         }
646       }
647     } else {
648       fatal("Unexpected register type");
649     }
650   }
651 
652   // Sort by size, largest first
653   _xmm_registers.sort(xmm_compare_register_size);
654 
655   // On Windows, the caller reserves stack space for spilling register arguments
656   const int arg_spill_size = frame::arg_reg_save_area_bytes;
657 
658   // Stack pointer must be 16 bytes aligned for the call
659   _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
660 }
661 
662 SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub)
663   : _masm(masm),
664     _gp_registers(),
665     _opmask_registers(),
666     _xmm_registers(),
667     _spill_size(0),
668     _spill_offset(0) {
669 
670   //
671   // Stack layout after registers have been spilled:
672   //
673   // | ...            | original rsp, 16 bytes aligned
674   // ------------------
675   // | zmm0 high      |
676   // | ...            |
677   // | zmm0 low       | 16 bytes aligned
678   // | ...            |
679   // | ymm1 high      |
680   // | ...            |
681   // | ymm1 low       | 16 bytes aligned
682   // | ...            |
683   // | xmmN high      |
684   // | ...            |
685   // | xmmN low       | 8 bytes aligned
686   // | reg0           | 8 bytes aligned
687   // | reg1           |
688   // | ...            |
689   // | regN           | new rsp, if 16 bytes aligned
690   // | <padding>      | else new rsp, 16 bytes aligned
691   // ------------------
692   //
693 
694   // Figure out what registers to save/restore
695   initialize(stub);
696 
697   // Allocate stack space
698   if (_spill_size > 0) {
699     __ subptr(rsp, _spill_size);
700   }
701 
702   // Save XMM/YMM/ZMM registers
703   for (int i = 0; i < _xmm_registers.length(); i++) {
704     xmm_register_save(_xmm_registers.at(i));
705   }
706 
707   if (xmm_needs_vzeroupper()) {
708     __ vzeroupper();
709   }
710 
711   // Save general purpose registers
712   for (int i = 0; i < _gp_registers.length(); i++) {
713     gp_register_save(_gp_registers.at(i));
714   }
715 
716   // Save opmask registers
717   for (int i = 0; i < _opmask_registers.length(); i++) {
718     opmask_register_save(_opmask_registers.at(i));
719   }
720 }
721 
722 SaveLiveRegisters::~SaveLiveRegisters() {
723   // Restore opmask registers
724   for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
725     opmask_register_restore(_opmask_registers.at(i));
726   }
727 
728   // Restore general purpose registers
729   for (int i = _gp_registers.length() - 1; i >= 0; i--) {
730     gp_register_restore(_gp_registers.at(i));
731   }
732 
733   __ vzeroupper();
734 
735   // Restore XMM/YMM/ZMM registers
736   for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
737     xmm_register_restore(_xmm_registers.at(i));
738   }
739 
740   // Free stack space
741   if (_spill_size > 0) {
742     __ addptr(rsp, _spill_size);
743   }
744 }
745 
746 #else // !_LP64
747 
748 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
749   Unimplemented(); // This must be implemented to support late barrier expansion.
750 }
751 
752 #endif // _LP64
753 
754 #endif // COMPILER2