< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {




  606   int offset = 13; // movq r10,#addr; callq (r10)
  607   if (this->ideal_Opcode() != Op_CallLeafVector) {
  608     offset += clear_avx_size();
  609   }
  610   return offset;
  611 }

  612 //
  613 // Compute padding required for nodes which need alignment
  614 //
  615 
  616 // The address of the call instruction needs to be 4-byte aligned to
  617 // ensure that it does not span a cache line so that it can be patched.
  618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  619 {
  620   current_offset += clear_avx_size(); // skip vzeroupper
  621   current_offset += 1; // skip call opcode byte
  622   return align_up(current_offset, alignment_required()) - current_offset;
  623 }
  624 
  625 // The address of the call instruction needs to be 4-byte aligned to
  626 // ensure that it does not span a cache line so that it can be patched.
  627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  628 {
  629   current_offset += clear_avx_size(); // skip vzeroupper
  630   current_offset += 11; // skip movq instruction + call opcode byte
  631   return align_up(current_offset, alignment_required()) - current_offset;

  817     st->print("\n\t");
  818     st->print("# stack alignment check");
  819 #endif
  820   }
  821   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  822     st->print("\n\t");
  823     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  824     st->print("\n\t");
  825     st->print("je      fast_entry\t");
  826     st->print("\n\t");
  827     st->print("call    #nmethod_entry_barrier_stub\t");
  828     st->print("\n\tfast_entry:");
  829   }
  830   st->cr();
  831 }
  832 #endif
  833 
  834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  835   Compile* C = ra_->C;
  836 
  837   int framesize = C->output()->frame_size_in_bytes();
  838   int bangsize = C->output()->bang_size_in_bytes();
  839 
  840   if (C->clinit_barrier_on_entry()) {
  841     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  842     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  843 
  844     Label L_skip_barrier;
  845     Register klass = rscratch1;
  846 
  847     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  848     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  849 
  850     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  851 
  852     __ bind(L_skip_barrier);

  853   }
  854 
  855   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  856 
  857   C->output()->set_frame_complete(__ offset());
  858 
  859   if (C->has_mach_constant_base_node()) {
  860     // NOTE: We set the table base offset here because users might be
  861     // emitted before MachConstantBaseNode.
  862     ConstantTable& constant_table = C->output()->constant_table();
  863     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  864   }
  865 }
  866 
  867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  868 {
  869   return MachNode::size(ra_); // too many variables; just compute it
  870                               // the hard way
  871 }
  872 
  873 int MachPrologNode::reloc() const
  874 {
  875   return 0; // a large enough number
  876 }
  877 
  878 //=============================================================================
  879 #ifndef PRODUCT
  880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  881 {
  882   Compile* C = ra_->C;
  883   if (generate_vzeroupper(C)) {
  884     st->print("vzeroupper");
  885     st->cr(); st->print("\t");
  886   }
  887 
  888   int framesize = C->output()->frame_size_in_bytes();
  889   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  890   // Remove word for return adr already pushed
  891   // and RBP
  892   framesize -= 2*wordSize;

  899   st->print_cr("popq    rbp");
  900   if (do_polling() && C->is_method_compilation()) {
  901     st->print("\t");
  902     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  903                  "ja      #safepoint_stub\t"
  904                  "# Safepoint: poll for GC");
  905   }
  906 }
  907 #endif
  908 
  909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  910 {
  911   Compile* C = ra_->C;
  912 
  913   if (generate_vzeroupper(C)) {
  914     // Clear upper bits of YMM registers when current compiled code uses
  915     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  916     __ vzeroupper();
  917   }
  918 
  919   int framesize = C->output()->frame_size_in_bytes();
  920   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  921   // Remove word for return adr already pushed
  922   // and RBP
  923   framesize -= 2*wordSize;
  924 
  925   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  926 
  927   if (framesize) {
  928     __ addq(rsp, framesize);
  929   }
  930 
  931   __ popq(rbp);
  932 
  933   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  934     __ reserved_stack_check();
  935   }
  936 
  937   if (do_polling() && C->is_method_compilation()) {
  938     Label dummy_label;
  939     Label* code_stub = &dummy_label;
  940     if (!C->output()->in_scratch_emit_size()) {
  941       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  942       C->output()->add_stub(stub);
  943       code_stub = &stub->entry();
  944     }
  945     __ relocate(relocInfo::poll_return_type);
  946     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  947   }
  948 }
  949 
  950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  951 {
  952   return MachNode::size(ra_); // too many variables; just compute it
  953                               // the hard way
  954 }
  955 
  956 int MachEpilogNode::reloc() const
  957 {
  958   return 2; // a large enough number
  959 }
  960 
  961 const Pipeline* MachEpilogNode::pipeline() const
  962 {
  963   return MachNode::pipeline_class();
  964 }
  965 
  966 //=============================================================================
  967 
  968 enum RC {
  969   rc_bad,
  970   rc_int,
  971   rc_kreg,
  972   rc_float,
  973   rc_stack
  974 };
  975 

 1533   int reg = ra_->get_reg_first(this);
 1534   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1535             Matcher::regName[reg], offset);
 1536 }
 1537 #endif
 1538 
 1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1540 {
 1541   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1542   int reg = ra_->get_encode(this);
 1543 
 1544   __ lea(as_Register(reg), Address(rsp, offset));
 1545 }
 1546 
 1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1548 {
 1549   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1550   return (offset < 0x80) ? 5 : 8; // REX
 1551 }
 1552 











































 1553 //=============================================================================
 1554 #ifndef PRODUCT
 1555 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1556 {
 1557   if (UseCompressedClassPointers) {
 1558     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1559     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1560   } else {
 1561     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1562     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1563   }
 1564   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1565 }
 1566 #endif
 1567 
 1568 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1569 {
 1570   __ ic_check(InteriorEntryAlignment);
 1571 }
 1572 
 1573 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1574 {
 1575   return MachNode::size(ra_); // too many variables; just compute it
 1576                               // the hard way
 1577 }
 1578 
 1579 
 1580 //=============================================================================
 1581 
 1582 bool Matcher::supports_vector_calling_convention(void) {
 1583   if (EnableVectorSupport && UseVectorStubs) {
 1584     return true;
 1585   }
 1586   return false;
 1587 }
 1588 
 1589 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1590   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1591   int lo = XMM0_num;
 1592   int hi = XMM0b_num;
 1593   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1594   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1595   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1596   return OptoRegPair(hi, lo);
 1597 }
 1598 
 1599 // Is this branch offset short enough that a short branch can be used?

 3024   %}
 3025 %}
 3026 
 3027 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3028 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3029 %{
 3030   constraint(ALLOC_IN_RC(ptr_reg));
 3031   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3032   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3033 
 3034   op_cost(10);
 3035   format %{"[$reg + $off + $idx << $scale]" %}
 3036   interface(MEMORY_INTER) %{
 3037     base($reg);
 3038     index($idx);
 3039     scale($scale);
 3040     disp($off);
 3041   %}
 3042 %}
 3043 
















 3044 // Indirect Narrow Oop Plus Offset Operand
 3045 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3046 // we can't free r12 even with CompressedOops::base() == nullptr.
 3047 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3048   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3049   constraint(ALLOC_IN_RC(ptr_reg));
 3050   match(AddP (DecodeN reg) off);
 3051 
 3052   op_cost(10);
 3053   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3054   interface(MEMORY_INTER) %{
 3055     base(0xc); // R12
 3056     index($reg);
 3057     scale(0x3);
 3058     disp($off);
 3059   %}
 3060 %}
 3061 
 3062 // Indirect Memory Operand
 3063 operand indirectNarrow(rRegN reg)

 3370     equal(0x4, "e");
 3371     not_equal(0x5, "ne");
 3372     less(0x2, "b");
 3373     greater_equal(0x3, "ae");
 3374     less_equal(0x6, "be");
 3375     greater(0x7, "a");
 3376     overflow(0x0, "o");
 3377     no_overflow(0x1, "no");
 3378   %}
 3379 %}
 3380 
 3381 //----------OPERAND CLASSES----------------------------------------------------
 3382 // Operand Classes are groups of operands that are used as to simplify
 3383 // instruction definitions by not requiring the AD writer to specify separate
 3384 // instructions for every form of operand when the instruction accepts
 3385 // multiple operand types with the same basic encoding and format.  The classic
 3386 // case of this is memory operands.
 3387 
 3388 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3389                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3390                indCompressedOopOffset,
 3391                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3392                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3393                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3394 
 3395 //----------PIPELINE-----------------------------------------------------------
 3396 // Rules which define the behavior of the target architectures pipeline.
 3397 pipeline %{
 3398 
 3399 //----------ATTRIBUTES---------------------------------------------------------
 3400 attributes %{
 3401   variable_size_instructions;        // Fixed size instructions
 3402   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3403   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3404   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3405   instruction_fetch_units = 1;       // of 16 bytes
 3406 
 3407   // List of nop instructions
 3408   nops( MachNop );
 3409 %}
 3410 

 5878   format %{ "MEMBAR-storestore (empty encoding)" %}
 5879   ins_encode( );
 5880   ins_pipe(empty);
 5881 %}
 5882 
 5883 //----------Move Instructions--------------------------------------------------
 5884 
 5885 instruct castX2P(rRegP dst, rRegL src)
 5886 %{
 5887   match(Set dst (CastX2P src));
 5888 
 5889   format %{ "movq    $dst, $src\t# long->ptr" %}
 5890   ins_encode %{
 5891     if ($dst$$reg != $src$$reg) {
 5892       __ movptr($dst$$Register, $src$$Register);
 5893     }
 5894   %}
 5895   ins_pipe(ialu_reg_reg); // XXX
 5896 %}
 5897 













 5898 instruct castP2X(rRegL dst, rRegP src)
 5899 %{
 5900   match(Set dst (CastP2X src));
 5901 
 5902   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5903   ins_encode %{
 5904     if ($dst$$reg != $src$$reg) {
 5905       __ movptr($dst$$Register, $src$$Register);
 5906     }
 5907   %}
 5908   ins_pipe(ialu_reg_reg); // XXX
 5909 %}
 5910 
 5911 // Convert oop into int for vectors alignment masking
 5912 instruct convP2I(rRegI dst, rRegP src)
 5913 %{
 5914   match(Set dst (ConvL2I (CastP2X src)));
 5915 
 5916   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5917   ins_encode %{

10426   effect(DEF dst, USE src);
10427   ins_cost(100);
10428   format %{ "movd    $dst,$src\t# MoveI2F" %}
10429   ins_encode %{
10430     __ movdl($dst$$XMMRegister, $src$$Register);
10431   %}
10432   ins_pipe( pipe_slow );
10433 %}
10434 
10435 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10436   match(Set dst (MoveL2D src));
10437   effect(DEF dst, USE src);
10438   ins_cost(100);
10439   format %{ "movd    $dst,$src\t# MoveL2D" %}
10440   ins_encode %{
10441      __ movdq($dst$$XMMRegister, $src$$Register);
10442   %}
10443   ins_pipe( pipe_slow );
10444 %}
10445 

10446 // Fast clearing of an array
10447 // Small non-constant lenght ClearArray for non-AVX512 targets.
10448 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10449                   Universe dummy, rFlagsReg cr)
10450 %{
10451   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10452   match(Set dummy (ClearArray cnt base));
10453   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































10454 
10455   format %{ $$template
10456     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10457     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10458     $$emit$$"jg      LARGE\n\t"
10459     $$emit$$"dec     rcx\n\t"
10460     $$emit$$"js      DONE\t# Zero length\n\t"
10461     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10462     $$emit$$"dec     rcx\n\t"
10463     $$emit$$"jge     LOOP\n\t"
10464     $$emit$$"jmp     DONE\n\t"
10465     $$emit$$"# LARGE:\n\t"
10466     if (UseFastStosb) {
10467        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10468        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10469     } else if (UseXMMForObjInit) {
10470        $$emit$$"mov     rdi,rax\n\t"
10471        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10472        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10473        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10481        $$emit$$"jl      L_tail\n\t"
10482        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10483        $$emit$$"add     0x20,rax\n\t"
10484        $$emit$$"sub     0x4,rcx\n\t"
10485        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10486        $$emit$$"add     0x4,rcx\n\t"
10487        $$emit$$"jle     L_end\n\t"
10488        $$emit$$"dec     rcx\n\t"
10489        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10490        $$emit$$"vmovq   xmm0,(rax)\n\t"
10491        $$emit$$"add     0x8,rax\n\t"
10492        $$emit$$"dec     rcx\n\t"
10493        $$emit$$"jge     L_sloop\n\t"
10494        $$emit$$"# L_end:\n\t"
10495     } else {
10496        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10497     }
10498     $$emit$$"# DONE"
10499   %}
10500   ins_encode %{
10501     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10502                  $tmp$$XMMRegister, false, knoreg);
10503   %}
10504   ins_pipe(pipe_slow);
10505 %}
10506 
10507 // Small non-constant length ClearArray for AVX512 targets.
10508 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10509                        Universe dummy, rFlagsReg cr)
10510 %{
10511   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10512   match(Set dummy (ClearArray cnt base));
10513   ins_cost(125);
10514   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10515 
10516   format %{ $$template
10517     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10518     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10519     $$emit$$"jg      LARGE\n\t"
10520     $$emit$$"dec     rcx\n\t"
10521     $$emit$$"js      DONE\t# Zero length\n\t"
10522     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10523     $$emit$$"dec     rcx\n\t"
10524     $$emit$$"jge     LOOP\n\t"
10525     $$emit$$"jmp     DONE\n\t"
10526     $$emit$$"# LARGE:\n\t"
10527     if (UseFastStosb) {
10528        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10529        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10530     } else if (UseXMMForObjInit) {
10531        $$emit$$"mov     rdi,rax\n\t"
10532        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10533        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10534        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10542        $$emit$$"jl      L_tail\n\t"
10543        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10544        $$emit$$"add     0x20,rax\n\t"
10545        $$emit$$"sub     0x4,rcx\n\t"
10546        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10547        $$emit$$"add     0x4,rcx\n\t"
10548        $$emit$$"jle     L_end\n\t"
10549        $$emit$$"dec     rcx\n\t"
10550        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10551        $$emit$$"vmovq   xmm0,(rax)\n\t"
10552        $$emit$$"add     0x8,rax\n\t"
10553        $$emit$$"dec     rcx\n\t"
10554        $$emit$$"jge     L_sloop\n\t"
10555        $$emit$$"# L_end:\n\t"
10556     } else {
10557        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10558     }
10559     $$emit$$"# DONE"
10560   %}
10561   ins_encode %{
10562     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10563                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
10564   %}
10565   ins_pipe(pipe_slow);
10566 %}
10567 
10568 // Large non-constant length ClearArray for non-AVX512 targets.
10569 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10570                         Universe dummy, rFlagsReg cr)
10571 %{
10572   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10573   match(Set dummy (ClearArray cnt base));
10574   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































10575 
10576   format %{ $$template
10577     if (UseFastStosb) {
10578        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10579        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10580        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10581     } else if (UseXMMForObjInit) {
10582        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10583        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10584        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10585        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10586        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10587        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10588        $$emit$$"add     0x40,rax\n\t"
10589        $$emit$$"# L_zero_64_bytes:\n\t"
10590        $$emit$$"sub     0x8,rcx\n\t"
10591        $$emit$$"jge     L_loop\n\t"
10592        $$emit$$"add     0x4,rcx\n\t"
10593        $$emit$$"jl      L_tail\n\t"
10594        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10595        $$emit$$"add     0x20,rax\n\t"
10596        $$emit$$"sub     0x4,rcx\n\t"
10597        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10598        $$emit$$"add     0x4,rcx\n\t"
10599        $$emit$$"jle     L_end\n\t"
10600        $$emit$$"dec     rcx\n\t"
10601        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10602        $$emit$$"vmovq   xmm0,(rax)\n\t"
10603        $$emit$$"add     0x8,rax\n\t"
10604        $$emit$$"dec     rcx\n\t"
10605        $$emit$$"jge     L_sloop\n\t"
10606        $$emit$$"# L_end:\n\t"
10607     } else {
10608        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10609        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10610     }
10611   %}
10612   ins_encode %{
10613     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10614                  $tmp$$XMMRegister, true, knoreg);
10615   %}
10616   ins_pipe(pipe_slow);
10617 %}
10618 
10619 // Large non-constant length ClearArray for AVX512 targets.
10620 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10621                              Universe dummy, rFlagsReg cr)
10622 %{
10623   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10624   match(Set dummy (ClearArray cnt base));
10625   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10626 
10627   format %{ $$template
10628     if (UseFastStosb) {
10629        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10630        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10631        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10632     } else if (UseXMMForObjInit) {
10633        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10634        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10635        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10636        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10637        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10638        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10639        $$emit$$"add     0x40,rax\n\t"
10640        $$emit$$"# L_zero_64_bytes:\n\t"
10641        $$emit$$"sub     0x8,rcx\n\t"
10642        $$emit$$"jge     L_loop\n\t"
10643        $$emit$$"add     0x4,rcx\n\t"
10644        $$emit$$"jl      L_tail\n\t"
10645        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10646        $$emit$$"add     0x20,rax\n\t"
10647        $$emit$$"sub     0x4,rcx\n\t"
10648        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10649        $$emit$$"add     0x4,rcx\n\t"
10650        $$emit$$"jle     L_end\n\t"
10651        $$emit$$"dec     rcx\n\t"
10652        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10653        $$emit$$"vmovq   xmm0,(rax)\n\t"
10654        $$emit$$"add     0x8,rax\n\t"
10655        $$emit$$"dec     rcx\n\t"
10656        $$emit$$"jge     L_sloop\n\t"
10657        $$emit$$"# L_end:\n\t"
10658     } else {
10659        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10660        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10661     }
10662   %}
10663   ins_encode %{
10664     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10665                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
10666   %}
10667   ins_pipe(pipe_slow);
10668 %}
10669 
10670 // Small constant length ClearArray for AVX512 targets.
10671 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10672 %{
10673   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
10674   match(Set dummy (ClearArray cnt base));

10675   ins_cost(100);
10676   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10677   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
10678   ins_encode %{
10679    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10680   %}
10681   ins_pipe(pipe_slow);
10682 %}
10683 
10684 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10685                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
10686 %{
10687   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10688   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10689   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10690 
10691   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10692   ins_encode %{
10693     __ string_compare($str1$$Register, $str2$$Register,
10694                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10695                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10696   %}
10697   ins_pipe( pipe_slow );
10698 %}
10699 

12477 
12478   ins_cost(300);
12479   format %{ "call_leaf,runtime " %}
12480   ins_encode(clear_avx, Java_To_Runtime(meth));
12481   ins_pipe(pipe_slow);
12482 %}
12483 
12484 // Call runtime without safepoint and with vector arguments
12485 instruct CallLeafDirectVector(method meth)
12486 %{
12487   match(CallLeafVector);
12488   effect(USE meth);
12489 
12490   ins_cost(300);
12491   format %{ "call_leaf,vector " %}
12492   ins_encode(Java_To_Runtime(meth));
12493   ins_pipe(pipe_slow);
12494 %}
12495 
12496 // Call runtime without safepoint















12497 instruct CallLeafNoFPDirect(method meth)
12498 %{

12499   match(CallLeafNoFP);
12500   effect(USE meth);
12501 
12502   ins_cost(300);
12503   format %{ "call_leaf_nofp,runtime " %}
12504   ins_encode(clear_avx, Java_To_Runtime(meth));
12505   ins_pipe(pipe_slow);
12506 %}
12507 
12508 // Return Instruction
12509 // Remove the return address & jump to it.
12510 // Notice: We always emit a nop after a ret to make sure there is room
12511 // for safepoint patching
12512 instruct Ret()
12513 %{
12514   match(Return);
12515 
12516   format %{ "ret" %}
12517   ins_encode %{
12518     __ ret(0);

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {
  606   if (_entry_point == nullptr) {
  607     // CallLeafNoFPInDirect
  608     return 3; // callq (register)
  609   }
  610   int offset = 13; // movq r10,#addr; callq (r10)
  611   if (this->ideal_Opcode() != Op_CallLeafVector) {
  612     offset += clear_avx_size();
  613   }
  614   return offset;
  615 }
  616 
  617 //
  618 // Compute padding required for nodes which need alignment
  619 //
  620 
  621 // The address of the call instruction needs to be 4-byte aligned to
  622 // ensure that it does not span a cache line so that it can be patched.
  623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  624 {
  625   current_offset += clear_avx_size(); // skip vzeroupper
  626   current_offset += 1; // skip call opcode byte
  627   return align_up(current_offset, alignment_required()) - current_offset;
  628 }
  629 
  630 // The address of the call instruction needs to be 4-byte aligned to
  631 // ensure that it does not span a cache line so that it can be patched.
  632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  633 {
  634   current_offset += clear_avx_size(); // skip vzeroupper
  635   current_offset += 11; // skip movq instruction + call opcode byte
  636   return align_up(current_offset, alignment_required()) - current_offset;

  822     st->print("\n\t");
  823     st->print("# stack alignment check");
  824 #endif
  825   }
  826   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  827     st->print("\n\t");
  828     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  829     st->print("\n\t");
  830     st->print("je      fast_entry\t");
  831     st->print("\n\t");
  832     st->print("call    #nmethod_entry_barrier_stub\t");
  833     st->print("\n\tfast_entry:");
  834   }
  835   st->cr();
  836 }
  837 #endif
  838 
  839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  840   Compile* C = ra_->C;
  841 
  842   __ verified_entry(C);













  843 
  844   if (ra_->C->stub_function() == nullptr) {
  845     __ entry_barrier();
  846   }
  847 
  848   if (!Compile::current()->output()->in_scratch_emit_size()) {
  849     __ bind(*_verified_entry);
  850   }
  851 
  852   C->output()->set_frame_complete(__ offset());
  853 
  854   if (C->has_mach_constant_base_node()) {
  855     // NOTE: We set the table base offset here because users might be
  856     // emitted before MachConstantBaseNode.
  857     ConstantTable& constant_table = C->output()->constant_table();
  858     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  859   }
  860 }
  861 






  862 int MachPrologNode::reloc() const
  863 {
  864   return 0; // a large enough number
  865 }
  866 
  867 //=============================================================================
  868 #ifndef PRODUCT
  869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  870 {
  871   Compile* C = ra_->C;
  872   if (generate_vzeroupper(C)) {
  873     st->print("vzeroupper");
  874     st->cr(); st->print("\t");
  875   }
  876 
  877   int framesize = C->output()->frame_size_in_bytes();
  878   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  879   // Remove word for return adr already pushed
  880   // and RBP
  881   framesize -= 2*wordSize;

  888   st->print_cr("popq    rbp");
  889   if (do_polling() && C->is_method_compilation()) {
  890     st->print("\t");
  891     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  892                  "ja      #safepoint_stub\t"
  893                  "# Safepoint: poll for GC");
  894   }
  895 }
  896 #endif
  897 
  898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  899 {
  900   Compile* C = ra_->C;
  901 
  902   if (generate_vzeroupper(C)) {
  903     // Clear upper bits of YMM registers when current compiled code uses
  904     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  905     __ vzeroupper();
  906   }
  907 
  908   // Subtract two words to account for return address and rbp
  909   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  910   __ remove_frame(initial_framesize, C->needs_stack_repair());










  911 
  912   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  913     __ reserved_stack_check();
  914   }
  915 
  916   if (do_polling() && C->is_method_compilation()) {
  917     Label dummy_label;
  918     Label* code_stub = &dummy_label;
  919     if (!C->output()->in_scratch_emit_size()) {
  920       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  921       C->output()->add_stub(stub);
  922       code_stub = &stub->entry();
  923     }
  924     __ relocate(relocInfo::poll_return_type);
  925     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  926   }
  927 }
  928 






  929 int MachEpilogNode::reloc() const
  930 {
  931   return 2; // a large enough number
  932 }
  933 
  934 const Pipeline* MachEpilogNode::pipeline() const
  935 {
  936   return MachNode::pipeline_class();
  937 }
  938 
  939 //=============================================================================
  940 
  941 enum RC {
  942   rc_bad,
  943   rc_int,
  944   rc_kreg,
  945   rc_float,
  946   rc_stack
  947 };
  948 

 1506   int reg = ra_->get_reg_first(this);
 1507   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1508             Matcher::regName[reg], offset);
 1509 }
 1510 #endif
 1511 
 1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1513 {
 1514   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1515   int reg = ra_->get_encode(this);
 1516 
 1517   __ lea(as_Register(reg), Address(rsp, offset));
 1518 }
 1519 
 1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1521 {
 1522   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1523   return (offset < 0x80) ? 5 : 8; // REX
 1524 }
 1525 
 1526 //=============================================================================
 1527 #ifndef PRODUCT
 1528 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1529 {
 1530   st->print_cr("MachVEPNode");
 1531 }
 1532 #endif
 1533 
 1534 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1535 {
 1536   CodeBuffer* cbuf = masm->code();
 1537   uint insts_size = cbuf->insts_size();
 1538   if (!_verified) {
 1539     __ ic_check(1);
 1540   } else {
 1541     // TODO 8284443 Avoid creation of temporary frame
 1542     if (ra_->C->stub_function() == nullptr) {
 1543       __ verified_entry(ra_->C, 0);
 1544       __ entry_barrier();
 1545       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1546       __ remove_frame(initial_framesize, false);
 1547     }
 1548     // Unpack inline type args passed as oop and then jump to
 1549     // the verified entry point (skipping the unverified entry).
 1550     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1551     // Emit code for verified entry and save increment for stack repair on return
 1552     __ verified_entry(ra_->C, sp_inc);
 1553     if (Compile::current()->output()->in_scratch_emit_size()) {
 1554       Label dummy_verified_entry;
 1555       __ jmp(dummy_verified_entry);
 1556     } else {
 1557       __ jmp(*_verified_entry);
 1558     }
 1559   }
 1560   /* WARNING these NOPs are critical so that verified entry point is properly
 1561      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1562   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1563   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1564   if (nops_cnt > 0) {
 1565     __ nop(nops_cnt);
 1566   }
 1567 }
 1568 
 1569 //=============================================================================
 1570 #ifndef PRODUCT
 1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1572 {
 1573   if (UseCompressedClassPointers) {
 1574     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1575     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1576   } else {
 1577     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1578     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1579   }
 1580   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1581 }
 1582 #endif
 1583 
 1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1585 {
 1586   __ ic_check(InteriorEntryAlignment);
 1587 }
 1588 







 1589 //=============================================================================
 1590 
 1591 bool Matcher::supports_vector_calling_convention(void) {
 1592   if (EnableVectorSupport && UseVectorStubs) {
 1593     return true;
 1594   }
 1595   return false;
 1596 }
 1597 
 1598 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1599   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1600   int lo = XMM0_num;
 1601   int hi = XMM0b_num;
 1602   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1603   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1604   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1605   return OptoRegPair(hi, lo);
 1606 }
 1607 
 1608 // Is this branch offset short enough that a short branch can be used?

 3033   %}
 3034 %}
 3035 
 3036 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3037 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3038 %{
 3039   constraint(ALLOC_IN_RC(ptr_reg));
 3040   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3041   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3042 
 3043   op_cost(10);
 3044   format %{"[$reg + $off + $idx << $scale]" %}
 3045   interface(MEMORY_INTER) %{
 3046     base($reg);
 3047     index($idx);
 3048     scale($scale);
 3049     disp($off);
 3050   %}
 3051 %}
 3052 
 3053 // Indirect Narrow Oop Operand
 3054 operand indCompressedOop(rRegN reg) %{
 3055   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3056   constraint(ALLOC_IN_RC(ptr_reg));
 3057   match(DecodeN reg);
 3058 
 3059   op_cost(10);
 3060   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3061   interface(MEMORY_INTER) %{
 3062     base(0xc); // R12
 3063     index($reg);
 3064     scale(0x3);
 3065     disp(0x0);
 3066   %}
 3067 %}
 3068 
 3069 // Indirect Narrow Oop Plus Offset Operand
 3070 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3071 // we can't free r12 even with CompressedOops::base() == nullptr.
 3072 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3073   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3074   constraint(ALLOC_IN_RC(ptr_reg));
 3075   match(AddP (DecodeN reg) off);
 3076 
 3077   op_cost(10);
 3078   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3079   interface(MEMORY_INTER) %{
 3080     base(0xc); // R12
 3081     index($reg);
 3082     scale(0x3);
 3083     disp($off);
 3084   %}
 3085 %}
 3086 
 3087 // Indirect Memory Operand
 3088 operand indirectNarrow(rRegN reg)

 3395     equal(0x4, "e");
 3396     not_equal(0x5, "ne");
 3397     less(0x2, "b");
 3398     greater_equal(0x3, "ae");
 3399     less_equal(0x6, "be");
 3400     greater(0x7, "a");
 3401     overflow(0x0, "o");
 3402     no_overflow(0x1, "no");
 3403   %}
 3404 %}
 3405 
 3406 //----------OPERAND CLASSES----------------------------------------------------
 3407 // Operand Classes are groups of operands that are used as to simplify
 3408 // instruction definitions by not requiring the AD writer to specify separate
 3409 // instructions for every form of operand when the instruction accepts
 3410 // multiple operand types with the same basic encoding and format.  The classic
 3411 // case of this is memory operands.
 3412 
 3413 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3414                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3415                indCompressedOop, indCompressedOopOffset,
 3416                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3417                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3418                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3419 
 3420 //----------PIPELINE-----------------------------------------------------------
 3421 // Rules which define the behavior of the target architectures pipeline.
 3422 pipeline %{
 3423 
 3424 //----------ATTRIBUTES---------------------------------------------------------
 3425 attributes %{
 3426   variable_size_instructions;        // Fixed size instructions
 3427   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3428   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3429   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3430   instruction_fetch_units = 1;       // of 16 bytes
 3431 
 3432   // List of nop instructions
 3433   nops( MachNop );
 3434 %}
 3435 

 5903   format %{ "MEMBAR-storestore (empty encoding)" %}
 5904   ins_encode( );
 5905   ins_pipe(empty);
 5906 %}
 5907 
 5908 //----------Move Instructions--------------------------------------------------
 5909 
 5910 instruct castX2P(rRegP dst, rRegL src)
 5911 %{
 5912   match(Set dst (CastX2P src));
 5913 
 5914   format %{ "movq    $dst, $src\t# long->ptr" %}
 5915   ins_encode %{
 5916     if ($dst$$reg != $src$$reg) {
 5917       __ movptr($dst$$Register, $src$$Register);
 5918     }
 5919   %}
 5920   ins_pipe(ialu_reg_reg); // XXX
 5921 %}
 5922 
 5923 instruct castN2X(rRegL dst, rRegN src)
 5924 %{
 5925   match(Set dst (CastP2X src));
 5926 
 5927   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5928   ins_encode %{
 5929     if ($dst$$reg != $src$$reg) {
 5930       __ movptr($dst$$Register, $src$$Register);
 5931     }
 5932   %}
 5933   ins_pipe(ialu_reg_reg); // XXX
 5934 %}
 5935 
 5936 instruct castP2X(rRegL dst, rRegP src)
 5937 %{
 5938   match(Set dst (CastP2X src));
 5939 
 5940   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5941   ins_encode %{
 5942     if ($dst$$reg != $src$$reg) {
 5943       __ movptr($dst$$Register, $src$$Register);
 5944     }
 5945   %}
 5946   ins_pipe(ialu_reg_reg); // XXX
 5947 %}
 5948 
 5949 // Convert oop into int for vectors alignment masking
 5950 instruct convP2I(rRegI dst, rRegP src)
 5951 %{
 5952   match(Set dst (ConvL2I (CastP2X src)));
 5953 
 5954   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5955   ins_encode %{

10464   effect(DEF dst, USE src);
10465   ins_cost(100);
10466   format %{ "movd    $dst,$src\t# MoveI2F" %}
10467   ins_encode %{
10468     __ movdl($dst$$XMMRegister, $src$$Register);
10469   %}
10470   ins_pipe( pipe_slow );
10471 %}
10472 
10473 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10474   match(Set dst (MoveL2D src));
10475   effect(DEF dst, USE src);
10476   ins_cost(100);
10477   format %{ "movd    $dst,$src\t# MoveL2D" %}
10478   ins_encode %{
10479      __ movdq($dst$$XMMRegister, $src$$Register);
10480   %}
10481   ins_pipe( pipe_slow );
10482 %}
10483 
10484 
10485 // Fast clearing of an array
10486 // Small non-constant lenght ClearArray for non-AVX512 targets.
10487 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10488                   Universe dummy, rFlagsReg cr)
10489 %{
10490   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10491   match(Set dummy (ClearArray (Binary cnt base) val));
10492   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10493 
10494   format %{ $$template
10495     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10496     $$emit$$"jg      LARGE\n\t"
10497     $$emit$$"dec     rcx\n\t"
10498     $$emit$$"js      DONE\t# Zero length\n\t"
10499     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10500     $$emit$$"dec     rcx\n\t"
10501     $$emit$$"jge     LOOP\n\t"
10502     $$emit$$"jmp     DONE\n\t"
10503     $$emit$$"# LARGE:\n\t"
10504     if (UseFastStosb) {
10505        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10506        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10507     } else if (UseXMMForObjInit) {
10508        $$emit$$"movdq   $tmp, $val\n\t"
10509        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10510        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10511        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10512        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10513        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10514        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10515        $$emit$$"add     0x40,rax\n\t"
10516        $$emit$$"# L_zero_64_bytes:\n\t"
10517        $$emit$$"sub     0x8,rcx\n\t"
10518        $$emit$$"jge     L_loop\n\t"
10519        $$emit$$"add     0x4,rcx\n\t"
10520        $$emit$$"jl      L_tail\n\t"
10521        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10522        $$emit$$"add     0x20,rax\n\t"
10523        $$emit$$"sub     0x4,rcx\n\t"
10524        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10525        $$emit$$"add     0x4,rcx\n\t"
10526        $$emit$$"jle     L_end\n\t"
10527        $$emit$$"dec     rcx\n\t"
10528        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10529        $$emit$$"vmovq   xmm0,(rax)\n\t"
10530        $$emit$$"add     0x8,rax\n\t"
10531        $$emit$$"dec     rcx\n\t"
10532        $$emit$$"jge     L_sloop\n\t"
10533        $$emit$$"# L_end:\n\t"
10534     } else {
10535        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10536     }
10537     $$emit$$"# DONE"
10538   %}
10539   ins_encode %{
10540     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10541                  $tmp$$XMMRegister, false, false);
10542   %}
10543   ins_pipe(pipe_slow);
10544 %}
10545 
10546 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10547                             Universe dummy, rFlagsReg cr)
10548 %{
10549   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10550   match(Set dummy (ClearArray (Binary cnt base) val));
10551   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10552 
10553   format %{ $$template
10554     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10555     $$emit$$"jg      LARGE\n\t"
10556     $$emit$$"dec     rcx\n\t"
10557     $$emit$$"js      DONE\t# Zero length\n\t"
10558     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10559     $$emit$$"dec     rcx\n\t"
10560     $$emit$$"jge     LOOP\n\t"
10561     $$emit$$"jmp     DONE\n\t"
10562     $$emit$$"# LARGE:\n\t"
10563     if (UseXMMForObjInit) {
10564        $$emit$$"movdq   $tmp, $val\n\t"
10565        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10566        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10567        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10568        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10569        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10570        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10571        $$emit$$"add     0x40,rax\n\t"
10572        $$emit$$"# L_zero_64_bytes:\n\t"
10573        $$emit$$"sub     0x8,rcx\n\t"
10574        $$emit$$"jge     L_loop\n\t"
10575        $$emit$$"add     0x4,rcx\n\t"
10576        $$emit$$"jl      L_tail\n\t"
10577        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10578        $$emit$$"add     0x20,rax\n\t"
10579        $$emit$$"sub     0x4,rcx\n\t"
10580        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10581        $$emit$$"add     0x4,rcx\n\t"
10582        $$emit$$"jle     L_end\n\t"
10583        $$emit$$"dec     rcx\n\t"
10584        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10585        $$emit$$"vmovq   xmm0,(rax)\n\t"
10586        $$emit$$"add     0x8,rax\n\t"
10587        $$emit$$"dec     rcx\n\t"
10588        $$emit$$"jge     L_sloop\n\t"
10589        $$emit$$"# L_end:\n\t"
10590     } else {
10591        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10592     }
10593     $$emit$$"# DONE"
10594   %}
10595   ins_encode %{
10596     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10597                  $tmp$$XMMRegister, false, true);
10598   %}
10599   ins_pipe(pipe_slow);
10600 %}
10601 
10602 // Small non-constant length ClearArray for AVX512 targets.
10603 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10604                        Universe dummy, rFlagsReg cr)
10605 %{
10606   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10607   match(Set dummy (ClearArray (Binary cnt base) val));
10608   ins_cost(125);
10609   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10610 
10611   format %{ $$template
10612     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10613     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10614     $$emit$$"jg      LARGE\n\t"
10615     $$emit$$"dec     rcx\n\t"
10616     $$emit$$"js      DONE\t# Zero length\n\t"
10617     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10618     $$emit$$"dec     rcx\n\t"
10619     $$emit$$"jge     LOOP\n\t"
10620     $$emit$$"jmp     DONE\n\t"
10621     $$emit$$"# LARGE:\n\t"
10622     if (UseFastStosb) {
10623        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10624        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10625     } else if (UseXMMForObjInit) {
10626        $$emit$$"mov     rdi,rax\n\t"
10627        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10628        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10629        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10637        $$emit$$"jl      L_tail\n\t"
10638        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10639        $$emit$$"add     0x20,rax\n\t"
10640        $$emit$$"sub     0x4,rcx\n\t"
10641        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10642        $$emit$$"add     0x4,rcx\n\t"
10643        $$emit$$"jle     L_end\n\t"
10644        $$emit$$"dec     rcx\n\t"
10645        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10646        $$emit$$"vmovq   xmm0,(rax)\n\t"
10647        $$emit$$"add     0x8,rax\n\t"
10648        $$emit$$"dec     rcx\n\t"
10649        $$emit$$"jge     L_sloop\n\t"
10650        $$emit$$"# L_end:\n\t"
10651     } else {
10652        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10653     }
10654     $$emit$$"# DONE"
10655   %}
10656   ins_encode %{
10657     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10658                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10659   %}
10660   ins_pipe(pipe_slow);
10661 %}
10662 
10663 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10664                                  Universe dummy, rFlagsReg cr)

10665 %{
10666   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10667   match(Set dummy (ClearArray (Binary cnt base) val));
10668   ins_cost(125);
10669   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10670 
10671   format %{ $$template
10672     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10673     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10674     $$emit$$"jg      LARGE\n\t"
10675     $$emit$$"dec     rcx\n\t"
10676     $$emit$$"js      DONE\t# Zero length\n\t"
10677     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10678     $$emit$$"dec     rcx\n\t"
10679     $$emit$$"jge     LOOP\n\t"
10680     $$emit$$"jmp     DONE\n\t"
10681     $$emit$$"# LARGE:\n\t"
10682     if (UseFastStosb) {
10683        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10684        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10685     } else if (UseXMMForObjInit) {
10686        $$emit$$"mov     rdi,rax\n\t"
10687        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10688        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10689        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

10697        $$emit$$"jl      L_tail\n\t"
10698        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10699        $$emit$$"add     0x20,rax\n\t"
10700        $$emit$$"sub     0x4,rcx\n\t"
10701        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10702        $$emit$$"add     0x4,rcx\n\t"
10703        $$emit$$"jle     L_end\n\t"
10704        $$emit$$"dec     rcx\n\t"
10705        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10706        $$emit$$"vmovq   xmm0,(rax)\n\t"
10707        $$emit$$"add     0x8,rax\n\t"
10708        $$emit$$"dec     rcx\n\t"
10709        $$emit$$"jge     L_sloop\n\t"
10710        $$emit$$"# L_end:\n\t"
10711     } else {
10712        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10713     }
10714     $$emit$$"# DONE"
10715   %}
10716   ins_encode %{
10717     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10718                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10719   %}
10720   ins_pipe(pipe_slow);
10721 %}
10722 
10723 // Large non-constant length ClearArray for non-AVX512 targets.
10724 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10725                         Universe dummy, rFlagsReg cr)
10726 %{
10727   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10728   match(Set dummy (ClearArray (Binary cnt base) val));
10729   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10730 
10731   format %{ $$template
10732     if (UseFastStosb) {
10733        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10734        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10735     } else if (UseXMMForObjInit) {
10736        $$emit$$"movdq   $tmp, $val\n\t"
10737        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10738        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10739        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10740        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10741        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10742        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10743        $$emit$$"add     0x40,rax\n\t"
10744        $$emit$$"# L_zero_64_bytes:\n\t"
10745        $$emit$$"sub     0x8,rcx\n\t"
10746        $$emit$$"jge     L_loop\n\t"
10747        $$emit$$"add     0x4,rcx\n\t"
10748        $$emit$$"jl      L_tail\n\t"
10749        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10750        $$emit$$"add     0x20,rax\n\t"
10751        $$emit$$"sub     0x4,rcx\n\t"
10752        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10753        $$emit$$"add     0x4,rcx\n\t"
10754        $$emit$$"jle     L_end\n\t"
10755        $$emit$$"dec     rcx\n\t"
10756        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10757        $$emit$$"vmovq   xmm0,(rax)\n\t"
10758        $$emit$$"add     0x8,rax\n\t"
10759        $$emit$$"dec     rcx\n\t"
10760        $$emit$$"jge     L_sloop\n\t"
10761        $$emit$$"# L_end:\n\t"
10762     } else {
10763        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10764     }
10765   %}
10766   ins_encode %{
10767     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10768                  $tmp$$XMMRegister, true, false);
10769   %}
10770   ins_pipe(pipe_slow);
10771 %}
10772 
10773 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10774                                   Universe dummy, rFlagsReg cr)
10775 %{
10776   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10777   match(Set dummy (ClearArray (Binary cnt base) val));
10778   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10779 
10780   format %{ $$template
10781     if (UseXMMForObjInit) {
10782        $$emit$$"movdq   $tmp, $val\n\t"
10783        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10784        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10785        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10786        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10787        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10788        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10789        $$emit$$"add     0x40,rax\n\t"
10790        $$emit$$"# L_zero_64_bytes:\n\t"
10791        $$emit$$"sub     0x8,rcx\n\t"
10792        $$emit$$"jge     L_loop\n\t"
10793        $$emit$$"add     0x4,rcx\n\t"
10794        $$emit$$"jl      L_tail\n\t"
10795        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10796        $$emit$$"add     0x20,rax\n\t"
10797        $$emit$$"sub     0x4,rcx\n\t"
10798        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10799        $$emit$$"add     0x4,rcx\n\t"
10800        $$emit$$"jle     L_end\n\t"
10801        $$emit$$"dec     rcx\n\t"
10802        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10803        $$emit$$"vmovq   xmm0,(rax)\n\t"
10804        $$emit$$"add     0x8,rax\n\t"
10805        $$emit$$"dec     rcx\n\t"
10806        $$emit$$"jge     L_sloop\n\t"
10807        $$emit$$"# L_end:\n\t"
10808     } else {
10809        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10810     }
10811   %}
10812   ins_encode %{
10813     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10814                  $tmp$$XMMRegister, true, true);
10815   %}
10816   ins_pipe(pipe_slow);
10817 %}
10818 
10819 // Large non-constant length ClearArray for AVX512 targets.
10820 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10821                              Universe dummy, rFlagsReg cr)
10822 %{
10823   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10824   match(Set dummy (ClearArray (Binary cnt base) val));
10825   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10826 
10827   format %{ $$template
10828     if (UseFastStosb) {
10829        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10830        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10831        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10832     } else if (UseXMMForObjInit) {
10833        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10834        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10835        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10836        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10837        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10838        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10839        $$emit$$"add     0x40,rax\n\t"
10840        $$emit$$"# L_zero_64_bytes:\n\t"
10841        $$emit$$"sub     0x8,rcx\n\t"
10842        $$emit$$"jge     L_loop\n\t"
10843        $$emit$$"add     0x4,rcx\n\t"
10844        $$emit$$"jl      L_tail\n\t"
10845        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10846        $$emit$$"add     0x20,rax\n\t"
10847        $$emit$$"sub     0x4,rcx\n\t"
10848        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10849        $$emit$$"add     0x4,rcx\n\t"
10850        $$emit$$"jle     L_end\n\t"
10851        $$emit$$"dec     rcx\n\t"
10852        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10853        $$emit$$"vmovq   xmm0,(rax)\n\t"
10854        $$emit$$"add     0x8,rax\n\t"
10855        $$emit$$"dec     rcx\n\t"
10856        $$emit$$"jge     L_sloop\n\t"
10857        $$emit$$"# L_end:\n\t"
10858     } else {
10859        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10860        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10861     }
10862   %}
10863   ins_encode %{
10864     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10865                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10866   %}
10867   ins_pipe(pipe_slow);
10868 %}
10869 
10870 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10871                                        Universe dummy, rFlagsReg cr)

10872 %{
10873   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10874   match(Set dummy (ClearArray (Binary cnt base) val));
10875   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10876 
10877   format %{ $$template
10878     if (UseFastStosb) {
10879        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10880        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10881        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10882     } else if (UseXMMForObjInit) {
10883        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10884        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10885        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10886        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10887        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10888        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10889        $$emit$$"add     0x40,rax\n\t"
10890        $$emit$$"# L_zero_64_bytes:\n\t"
10891        $$emit$$"sub     0x8,rcx\n\t"
10892        $$emit$$"jge     L_loop\n\t"
10893        $$emit$$"add     0x4,rcx\n\t"
10894        $$emit$$"jl      L_tail\n\t"
10895        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10896        $$emit$$"add     0x20,rax\n\t"
10897        $$emit$$"sub     0x4,rcx\n\t"
10898        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10899        $$emit$$"add     0x4,rcx\n\t"
10900        $$emit$$"jle     L_end\n\t"
10901        $$emit$$"dec     rcx\n\t"
10902        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10903        $$emit$$"vmovq   xmm0,(rax)\n\t"
10904        $$emit$$"add     0x8,rax\n\t"
10905        $$emit$$"dec     rcx\n\t"
10906        $$emit$$"jge     L_sloop\n\t"
10907        $$emit$$"# L_end:\n\t"
10908     } else {
10909        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10910        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10911     }
10912   %}
10913   ins_encode %{
10914     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10915                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10916   %}
10917   ins_pipe(pipe_slow);
10918 %}
10919 
10920 // Small constant length ClearArray for AVX512 targets.
10921 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
10922 %{
10923   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
10924             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
10925   match(Set dummy (ClearArray (Binary cnt base) val));
10926   ins_cost(100);
10927   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
10928   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
10929   ins_encode %{
10930     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10931   %}
10932   ins_pipe(pipe_slow);
10933 %}
10934 
10935 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10936                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
10937 %{
10938   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10939   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10940   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10941 
10942   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
10943   ins_encode %{
10944     __ string_compare($str1$$Register, $str2$$Register,
10945                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
10946                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10947   %}
10948   ins_pipe( pipe_slow );
10949 %}
10950 

12728 
12729   ins_cost(300);
12730   format %{ "call_leaf,runtime " %}
12731   ins_encode(clear_avx, Java_To_Runtime(meth));
12732   ins_pipe(pipe_slow);
12733 %}
12734 
12735 // Call runtime without safepoint and with vector arguments
12736 instruct CallLeafDirectVector(method meth)
12737 %{
12738   match(CallLeafVector);
12739   effect(USE meth);
12740 
12741   ins_cost(300);
12742   format %{ "call_leaf,vector " %}
12743   ins_encode(Java_To_Runtime(meth));
12744   ins_pipe(pipe_slow);
12745 %}
12746 
12747 // Call runtime without safepoint
12748 // entry point is null, target holds the address to call
12749 instruct CallLeafNoFPInDirect(rRegP target)
12750 %{
12751   predicate(n->as_Call()->entry_point() == nullptr);
12752   match(CallLeafNoFP target);
12753 
12754   ins_cost(300);
12755   format %{ "call_leaf_nofp,runtime indirect " %}
12756   ins_encode %{
12757      __ call($target$$Register);
12758   %}
12759 
12760   ins_pipe(pipe_slow);
12761 %}
12762 
12763 instruct CallLeafNoFPDirect(method meth)
12764 %{
12765   predicate(n->as_Call()->entry_point() != nullptr);
12766   match(CallLeafNoFP);
12767   effect(USE meth);
12768 
12769   ins_cost(300);
12770   format %{ "call_leaf_nofp,runtime " %}
12771   ins_encode(clear_avx, Java_To_Runtime(meth));
12772   ins_pipe(pipe_slow);
12773 %}
12774 
12775 // Return Instruction
12776 // Remove the return address & jump to it.
12777 // Notice: We always emit a nop after a ret to make sure there is room
12778 // for safepoint patching
12779 instruct Ret()
12780 %{
12781   match(Return);
12782 
12783   format %{ "ret" %}
12784   ins_encode %{
12785     __ ret(0);
< prev index next >