< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {




  606   int offset = 13; // movq r10,#addr; callq (r10)
  607   if (this->ideal_Opcode() != Op_CallLeafVector) {
  608     offset += clear_avx_size();
  609   }
  610   return offset;
  611 }

  612 //
  613 // Compute padding required for nodes which need alignment
  614 //
  615 
  616 // The address of the call instruction needs to be 4-byte aligned to
  617 // ensure that it does not span a cache line so that it can be patched.
  618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  619 {
  620   current_offset += clear_avx_size(); // skip vzeroupper
  621   current_offset += 1; // skip call opcode byte
  622   return align_up(current_offset, alignment_required()) - current_offset;
  623 }
  624 
  625 // The address of the call instruction needs to be 4-byte aligned to
  626 // ensure that it does not span a cache line so that it can be patched.
  627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  628 {
  629   current_offset += clear_avx_size(); // skip vzeroupper
  630   current_offset += 11; // skip movq instruction + call opcode byte
  631   return align_up(current_offset, alignment_required()) - current_offset;

  817     st->print("\n\t");
  818     st->print("# stack alignment check");
  819 #endif
  820   }
  821   if (C->stub_function() != nullptr) {
  822     st->print("\n\t");
  823     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  824     st->print("\n\t");
  825     st->print("je      fast_entry\t");
  826     st->print("\n\t");
  827     st->print("call    #nmethod_entry_barrier_stub\t");
  828     st->print("\n\tfast_entry:");
  829   }
  830   st->cr();
  831 }
  832 #endif
  833 
  834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  835   Compile* C = ra_->C;
  836 
  837   int framesize = C->output()->frame_size_in_bytes();
  838   int bangsize = C->output()->bang_size_in_bytes();
  839 
  840   if (C->clinit_barrier_on_entry()) {
  841     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  842     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  843 
  844     Label L_skip_barrier;
  845     Register klass = rscratch1;
  846 
  847     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  848     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  849 
  850     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  851 
  852     __ bind(L_skip_barrier);

  853   }
  854 
  855   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);


  856 
  857   C->output()->set_frame_complete(__ offset());
  858 
  859   if (C->has_mach_constant_base_node()) {
  860     // NOTE: We set the table base offset here because users might be
  861     // emitted before MachConstantBaseNode.
  862     ConstantTable& constant_table = C->output()->constant_table();
  863     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  864   }
  865 }
  866 
  867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  868 {
  869   return MachNode::size(ra_); // too many variables; just compute it
  870                               // the hard way
  871 }
  872 
  873 int MachPrologNode::reloc() const
  874 {
  875   return 0; // a large enough number
  876 }
  877 
  878 //=============================================================================
  879 #ifndef PRODUCT
  880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  881 {
  882   Compile* C = ra_->C;
  883   if (generate_vzeroupper(C)) {
  884     st->print("vzeroupper");
  885     st->cr(); st->print("\t");
  886   }
  887 
  888   int framesize = C->output()->frame_size_in_bytes();
  889   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  890   // Remove word for return adr already pushed
  891   // and RBP
  892   framesize -= 2*wordSize;

  899   st->print_cr("popq    rbp");
  900   if (do_polling() && C->is_method_compilation()) {
  901     st->print("\t");
  902     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  903                  "ja      #safepoint_stub\t"
  904                  "# Safepoint: poll for GC");
  905   }
  906 }
  907 #endif
  908 
  909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  910 {
  911   Compile* C = ra_->C;
  912 
  913   if (generate_vzeroupper(C)) {
  914     // Clear upper bits of YMM registers when current compiled code uses
  915     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  916     __ vzeroupper();
  917   }
  918 
  919   int framesize = C->output()->frame_size_in_bytes();
  920   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  921   // Remove word for return adr already pushed
  922   // and RBP
  923   framesize -= 2*wordSize;
  924 
  925   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  926 
  927   if (framesize) {
  928     __ addq(rsp, framesize);
  929   }
  930 
  931   __ popq(rbp);
  932 
  933   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  934     __ reserved_stack_check();
  935   }
  936 
  937   if (do_polling() && C->is_method_compilation()) {
  938     Label dummy_label;
  939     Label* code_stub = &dummy_label;
  940     if (!C->output()->in_scratch_emit_size()) {
  941       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  942       C->output()->add_stub(stub);
  943       code_stub = &stub->entry();
  944     }
  945     __ relocate(relocInfo::poll_return_type);
  946     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  947   }
  948 }
  949 
  950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  951 {
  952   return MachNode::size(ra_); // too many variables; just compute it
  953                               // the hard way
  954 }
  955 
  956 int MachEpilogNode::reloc() const
  957 {
  958   return 2; // a large enough number
  959 }
  960 
  961 const Pipeline* MachEpilogNode::pipeline() const
  962 {
  963   return MachNode::pipeline_class();
  964 }
  965 
  966 //=============================================================================
  967 
  968 enum RC {
  969   rc_bad,
  970   rc_int,
  971   rc_kreg,
  972   rc_float,
  973   rc_stack
  974 };
  975 

 1537 #endif
 1538 
 1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1540 {
 1541   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1542   int reg = ra_->get_encode(this);
 1543 
 1544   __ lea(as_Register(reg), Address(rsp, offset));
 1545 }
 1546 
 1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1548 {
 1549   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1550   if (ra_->get_encode(this) > 15) {
 1551     return (offset < 0x80) ? 6 : 9; // REX2
 1552   } else {
 1553     return (offset < 0x80) ? 5 : 8; // REX
 1554   }
 1555 }
 1556 











































 1557 //=============================================================================
 1558 #ifndef PRODUCT
 1559 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1560 {
 1561   if (UseCompressedClassPointers) {
 1562     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1563     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1564   } else {
 1565     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1566     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1567   }
 1568   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1569 }
 1570 #endif
 1571 
 1572 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1573 {
 1574   __ ic_check(InteriorEntryAlignment);
 1575 }
 1576 
 1577 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1578 {
 1579   return MachNode::size(ra_); // too many variables; just compute it
 1580                               // the hard way
 1581 }
 1582 
 1583 
 1584 //=============================================================================
 1585 
 1586 bool Matcher::supports_vector_calling_convention(void) {
 1587   if (EnableVectorSupport && UseVectorStubs) {
 1588     return true;
 1589   }
 1590   return false;
 1591 }
 1592 
 1593 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1594   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1595   int lo = XMM0_num;
 1596   int hi = XMM0b_num;
 1597   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1598   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1599   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1600   return OptoRegPair(hi, lo);
 1601 }
 1602 
 1603 // Is this branch offset short enough that a short branch can be used?

 3038   %}
 3039 %}
 3040 
 3041 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3042 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3043 %{
 3044   constraint(ALLOC_IN_RC(ptr_reg));
 3045   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3046   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3047 
 3048   op_cost(10);
 3049   format %{"[$reg + $off + $idx << $scale]" %}
 3050   interface(MEMORY_INTER) %{
 3051     base($reg);
 3052     index($idx);
 3053     scale($scale);
 3054     disp($off);
 3055   %}
 3056 %}
 3057 
















 3058 // Indirect Narrow Oop Plus Offset Operand
 3059 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3060 // we can't free r12 even with CompressedOops::base() == nullptr.
 3061 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3062   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3063   constraint(ALLOC_IN_RC(ptr_reg));
 3064   match(AddP (DecodeN reg) off);
 3065 
 3066   op_cost(10);
 3067   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3068   interface(MEMORY_INTER) %{
 3069     base(0xc); // R12
 3070     index($reg);
 3071     scale(0x3);
 3072     disp($off);
 3073   %}
 3074 %}
 3075 
 3076 // Indirect Memory Operand
 3077 operand indirectNarrow(rRegN reg)

 3384     equal(0x4, "e");
 3385     not_equal(0x5, "ne");
 3386     less(0x2, "b");
 3387     greater_equal(0x3, "ae");
 3388     less_equal(0x6, "be");
 3389     greater(0x7, "a");
 3390     overflow(0x0, "o");
 3391     no_overflow(0x1, "no");
 3392   %}
 3393 %}
 3394 
 3395 //----------OPERAND CLASSES----------------------------------------------------
 3396 // Operand Classes are groups of operands that are used as to simplify
 3397 // instruction definitions by not requiring the AD writer to specify separate
 3398 // instructions for every form of operand when the instruction accepts
 3399 // multiple operand types with the same basic encoding and format.  The classic
 3400 // case of this is memory operands.
 3401 
 3402 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3403                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3404                indCompressedOopOffset,
 3405                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3406                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3407                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3408 
 3409 //----------PIPELINE-----------------------------------------------------------
 3410 // Rules which define the behavior of the target architectures pipeline.
 3411 pipeline %{
 3412 
 3413 //----------ATTRIBUTES---------------------------------------------------------
 3414 attributes %{
 3415   variable_size_instructions;        // Fixed size instructions
 3416   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3417   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3418   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3419   instruction_fetch_units = 1;       // of 16 bytes
 3420 
 3421   // List of nop instructions
 3422   nops( MachNop );
 3423 %}
 3424 

 5925   format %{ "MEMBAR-storestore (empty encoding)" %}
 5926   ins_encode( );
 5927   ins_pipe(empty);
 5928 %}
 5929 
 5930 //----------Move Instructions--------------------------------------------------
 5931 
 5932 instruct castX2P(rRegP dst, rRegL src)
 5933 %{
 5934   match(Set dst (CastX2P src));
 5935 
 5936   format %{ "movq    $dst, $src\t# long->ptr" %}
 5937   ins_encode %{
 5938     if ($dst$$reg != $src$$reg) {
 5939       __ movptr($dst$$Register, $src$$Register);
 5940     }
 5941   %}
 5942   ins_pipe(ialu_reg_reg); // XXX
 5943 %}
 5944 


























 5945 instruct castP2X(rRegL dst, rRegP src)
 5946 %{
 5947   match(Set dst (CastP2X src));
 5948 
 5949   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5950   ins_encode %{
 5951     if ($dst$$reg != $src$$reg) {
 5952       __ movptr($dst$$Register, $src$$Register);
 5953     }
 5954   %}
 5955   ins_pipe(ialu_reg_reg); // XXX
 5956 %}
 5957 
 5958 // Convert oop into int for vectors alignment masking
 5959 instruct convP2I(rRegI dst, rRegP src)
 5960 %{
 5961   match(Set dst (ConvL2I (CastP2X src)));
 5962 
 5963   format %{ "movl    $dst, $src\t# ptr -> int" %}
 5964   ins_encode %{

12135   effect(DEF dst, USE src);
12136   ins_cost(100);
12137   format %{ "movd    $dst,$src\t# MoveI2F" %}
12138   ins_encode %{
12139     __ movdl($dst$$XMMRegister, $src$$Register);
12140   %}
12141   ins_pipe( pipe_slow );
12142 %}
12143 
12144 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12145   match(Set dst (MoveL2D src));
12146   effect(DEF dst, USE src);
12147   ins_cost(100);
12148   format %{ "movd    $dst,$src\t# MoveL2D" %}
12149   ins_encode %{
12150      __ movdq($dst$$XMMRegister, $src$$Register);
12151   %}
12152   ins_pipe( pipe_slow );
12153 %}
12154 

12155 // Fast clearing of an array
12156 // Small non-constant lenght ClearArray for non-AVX512 targets.
12157 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12158                   Universe dummy, rFlagsReg cr)
12159 %{
12160   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12161   match(Set dummy (ClearArray cnt base));
12162   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);





















































































































12163 
12164   format %{ $$template
12165     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12166     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12167     $$emit$$"jg      LARGE\n\t"
12168     $$emit$$"dec     rcx\n\t"
12169     $$emit$$"js      DONE\t# Zero length\n\t"
12170     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12171     $$emit$$"dec     rcx\n\t"
12172     $$emit$$"jge     LOOP\n\t"
12173     $$emit$$"jmp     DONE\n\t"
12174     $$emit$$"# LARGE:\n\t"
12175     if (UseFastStosb) {
12176        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12177        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12178     } else if (UseXMMForObjInit) {
12179        $$emit$$"mov     rdi,rax\n\t"
12180        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12181        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12182        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12190        $$emit$$"jl      L_tail\n\t"
12191        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12192        $$emit$$"add     0x20,rax\n\t"
12193        $$emit$$"sub     0x4,rcx\n\t"
12194        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12195        $$emit$$"add     0x4,rcx\n\t"
12196        $$emit$$"jle     L_end\n\t"
12197        $$emit$$"dec     rcx\n\t"
12198        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12199        $$emit$$"vmovq   xmm0,(rax)\n\t"
12200        $$emit$$"add     0x8,rax\n\t"
12201        $$emit$$"dec     rcx\n\t"
12202        $$emit$$"jge     L_sloop\n\t"
12203        $$emit$$"# L_end:\n\t"
12204     } else {
12205        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12206     }
12207     $$emit$$"# DONE"
12208   %}
12209   ins_encode %{
12210     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12211                  $tmp$$XMMRegister, false, knoreg);
12212   %}
12213   ins_pipe(pipe_slow);
12214 %}
12215 
12216 // Small non-constant length ClearArray for AVX512 targets.
12217 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12218                        Universe dummy, rFlagsReg cr)
12219 %{
12220   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12221   match(Set dummy (ClearArray cnt base));
12222   ins_cost(125);
12223   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12224 
12225   format %{ $$template
12226     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12227     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12228     $$emit$$"jg      LARGE\n\t"
12229     $$emit$$"dec     rcx\n\t"
12230     $$emit$$"js      DONE\t# Zero length\n\t"
12231     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12232     $$emit$$"dec     rcx\n\t"
12233     $$emit$$"jge     LOOP\n\t"
12234     $$emit$$"jmp     DONE\n\t"
12235     $$emit$$"# LARGE:\n\t"
12236     if (UseFastStosb) {
12237        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12238        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12239     } else if (UseXMMForObjInit) {
12240        $$emit$$"mov     rdi,rax\n\t"
12241        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12242        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12243        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12251        $$emit$$"jl      L_tail\n\t"
12252        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12253        $$emit$$"add     0x20,rax\n\t"
12254        $$emit$$"sub     0x4,rcx\n\t"
12255        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12256        $$emit$$"add     0x4,rcx\n\t"
12257        $$emit$$"jle     L_end\n\t"
12258        $$emit$$"dec     rcx\n\t"
12259        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12260        $$emit$$"vmovq   xmm0,(rax)\n\t"
12261        $$emit$$"add     0x8,rax\n\t"
12262        $$emit$$"dec     rcx\n\t"
12263        $$emit$$"jge     L_sloop\n\t"
12264        $$emit$$"# L_end:\n\t"
12265     } else {
12266        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12267     }
12268     $$emit$$"# DONE"
12269   %}
12270   ins_encode %{
12271     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12272                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
12273   %}
12274   ins_pipe(pipe_slow);
12275 %}
12276 
12277 // Large non-constant length ClearArray for non-AVX512 targets.
12278 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12279                         Universe dummy, rFlagsReg cr)
12280 %{
12281   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12282   match(Set dummy (ClearArray cnt base));
12283   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
































































































12284 
12285   format %{ $$template
12286     if (UseFastStosb) {
12287        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12288        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12289        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12290     } else if (UseXMMForObjInit) {
12291        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12292        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12293        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12294        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12295        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12296        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12297        $$emit$$"add     0x40,rax\n\t"
12298        $$emit$$"# L_zero_64_bytes:\n\t"
12299        $$emit$$"sub     0x8,rcx\n\t"
12300        $$emit$$"jge     L_loop\n\t"
12301        $$emit$$"add     0x4,rcx\n\t"
12302        $$emit$$"jl      L_tail\n\t"
12303        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12304        $$emit$$"add     0x20,rax\n\t"
12305        $$emit$$"sub     0x4,rcx\n\t"
12306        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12307        $$emit$$"add     0x4,rcx\n\t"
12308        $$emit$$"jle     L_end\n\t"
12309        $$emit$$"dec     rcx\n\t"
12310        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12311        $$emit$$"vmovq   xmm0,(rax)\n\t"
12312        $$emit$$"add     0x8,rax\n\t"
12313        $$emit$$"dec     rcx\n\t"
12314        $$emit$$"jge     L_sloop\n\t"
12315        $$emit$$"# L_end:\n\t"
12316     } else {
12317        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12318        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12319     }
12320   %}
12321   ins_encode %{
12322     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12323                  $tmp$$XMMRegister, true, knoreg);
12324   %}
12325   ins_pipe(pipe_slow);
12326 %}
12327 
12328 // Large non-constant length ClearArray for AVX512 targets.
12329 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12330                              Universe dummy, rFlagsReg cr)
12331 %{
12332   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12333   match(Set dummy (ClearArray cnt base));
12334   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12335 
12336   format %{ $$template
12337     if (UseFastStosb) {
12338        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12339        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12340        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12341     } else if (UseXMMForObjInit) {
12342        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12343        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12344        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12345        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12346        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12347        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12348        $$emit$$"add     0x40,rax\n\t"
12349        $$emit$$"# L_zero_64_bytes:\n\t"
12350        $$emit$$"sub     0x8,rcx\n\t"
12351        $$emit$$"jge     L_loop\n\t"
12352        $$emit$$"add     0x4,rcx\n\t"
12353        $$emit$$"jl      L_tail\n\t"
12354        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12355        $$emit$$"add     0x20,rax\n\t"
12356        $$emit$$"sub     0x4,rcx\n\t"
12357        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12358        $$emit$$"add     0x4,rcx\n\t"
12359        $$emit$$"jle     L_end\n\t"
12360        $$emit$$"dec     rcx\n\t"
12361        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12362        $$emit$$"vmovq   xmm0,(rax)\n\t"
12363        $$emit$$"add     0x8,rax\n\t"
12364        $$emit$$"dec     rcx\n\t"
12365        $$emit$$"jge     L_sloop\n\t"
12366        $$emit$$"# L_end:\n\t"
12367     } else {
12368        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12369        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12370     }
12371   %}
12372   ins_encode %{
12373     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12374                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
12375   %}
12376   ins_pipe(pipe_slow);
12377 %}
12378 
12379 // Small constant length ClearArray for AVX512 targets.
12380 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12381 %{
12382   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12383   match(Set dummy (ClearArray cnt base));

12384   ins_cost(100);
12385   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12386   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12387   ins_encode %{
12388    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12389   %}
12390   ins_pipe(pipe_slow);
12391 %}
12392 
12393 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12394                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12395 %{
12396   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12397   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12398   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12399 
12400   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12401   ins_encode %{
12402     __ string_compare($str1$$Register, $str2$$Register,
12403                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12404                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12405   %}
12406   ins_pipe( pipe_slow );
12407 %}
12408 

14241 
14242   ins_cost(300);
14243   format %{ "call_leaf,runtime " %}
14244   ins_encode(clear_avx, Java_To_Runtime(meth));
14245   ins_pipe(pipe_slow);
14246 %}
14247 
14248 // Call runtime without safepoint and with vector arguments
14249 instruct CallLeafDirectVector(method meth)
14250 %{
14251   match(CallLeafVector);
14252   effect(USE meth);
14253 
14254   ins_cost(300);
14255   format %{ "call_leaf,vector " %}
14256   ins_encode(Java_To_Runtime(meth));
14257   ins_pipe(pipe_slow);
14258 %}
14259 
14260 // Call runtime without safepoint















14261 instruct CallLeafNoFPDirect(method meth)
14262 %{

14263   match(CallLeafNoFP);
14264   effect(USE meth);
14265 
14266   ins_cost(300);
14267   format %{ "call_leaf_nofp,runtime " %}
14268   ins_encode(clear_avx, Java_To_Runtime(meth));
14269   ins_pipe(pipe_slow);
14270 %}
14271 
14272 // Return Instruction
14273 // Remove the return address & jump to it.
14274 // Notice: We always emit a nop after a ret to make sure there is room
14275 // for safepoint patching
14276 instruct Ret()
14277 %{
14278   match(Return);
14279 
14280   format %{ "ret" %}
14281   ins_encode %{
14282     __ ret(0);

  586 }
  587 
  588 // !!!!! Special hack to get all types of calls to specify the byte offset
  589 //       from the start of the call to the point where the return address
  590 //       will point.
  591 int MachCallStaticJavaNode::ret_addr_offset()
  592 {
  593   int offset = 5; // 5 bytes from start of call to where return address points
  594   offset += clear_avx_size();
  595   return offset;
  596 }
  597 
  598 int MachCallDynamicJavaNode::ret_addr_offset()
  599 {
  600   int offset = 15; // 15 bytes from start of call to where return address points
  601   offset += clear_avx_size();
  602   return offset;
  603 }
  604 
  605 int MachCallRuntimeNode::ret_addr_offset() {
  606   if (_entry_point == nullptr) {
  607     // CallLeafNoFPInDirect
  608     return 3; // callq (register)
  609   }
  610   int offset = 13; // movq r10,#addr; callq (r10)
  611   if (this->ideal_Opcode() != Op_CallLeafVector) {
  612     offset += clear_avx_size();
  613   }
  614   return offset;
  615 }
  616 
  617 //
  618 // Compute padding required for nodes which need alignment
  619 //
  620 
  621 // The address of the call instruction needs to be 4-byte aligned to
  622 // ensure that it does not span a cache line so that it can be patched.
  623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  624 {
  625   current_offset += clear_avx_size(); // skip vzeroupper
  626   current_offset += 1; // skip call opcode byte
  627   return align_up(current_offset, alignment_required()) - current_offset;
  628 }
  629 
  630 // The address of the call instruction needs to be 4-byte aligned to
  631 // ensure that it does not span a cache line so that it can be patched.
  632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  633 {
  634   current_offset += clear_avx_size(); // skip vzeroupper
  635   current_offset += 11; // skip movq instruction + call opcode byte
  636   return align_up(current_offset, alignment_required()) - current_offset;

  822     st->print("\n\t");
  823     st->print("# stack alignment check");
  824 #endif
  825   }
  826   if (C->stub_function() != nullptr) {
  827     st->print("\n\t");
  828     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  829     st->print("\n\t");
  830     st->print("je      fast_entry\t");
  831     st->print("\n\t");
  832     st->print("call    #nmethod_entry_barrier_stub\t");
  833     st->print("\n\tfast_entry:");
  834   }
  835   st->cr();
  836 }
  837 #endif
  838 
  839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  840   Compile* C = ra_->C;
  841 
  842   __ verified_entry(C);













  843 
  844   if (ra_->C->stub_function() == nullptr) {
  845     __ entry_barrier();
  846   }
  847 
  848   if (!Compile::current()->output()->in_scratch_emit_size()) {
  849     __ bind(*_verified_entry);
  850   }
  851 
  852   C->output()->set_frame_complete(__ offset());
  853 
  854   if (C->has_mach_constant_base_node()) {
  855     // NOTE: We set the table base offset here because users might be
  856     // emitted before MachConstantBaseNode.
  857     ConstantTable& constant_table = C->output()->constant_table();
  858     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  859   }
  860 }
  861 






  862 int MachPrologNode::reloc() const
  863 {
  864   return 0; // a large enough number
  865 }
  866 
  867 //=============================================================================
  868 #ifndef PRODUCT
  869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  870 {
  871   Compile* C = ra_->C;
  872   if (generate_vzeroupper(C)) {
  873     st->print("vzeroupper");
  874     st->cr(); st->print("\t");
  875   }
  876 
  877   int framesize = C->output()->frame_size_in_bytes();
  878   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  879   // Remove word for return adr already pushed
  880   // and RBP
  881   framesize -= 2*wordSize;

  888   st->print_cr("popq    rbp");
  889   if (do_polling() && C->is_method_compilation()) {
  890     st->print("\t");
  891     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  892                  "ja      #safepoint_stub\t"
  893                  "# Safepoint: poll for GC");
  894   }
  895 }
  896 #endif
  897 
  898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
  899 {
  900   Compile* C = ra_->C;
  901 
  902   if (generate_vzeroupper(C)) {
  903     // Clear upper bits of YMM registers when current compiled code uses
  904     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  905     __ vzeroupper();
  906   }
  907 
  908   // Subtract two words to account for return address and rbp
  909   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  910   __ remove_frame(initial_framesize, C->needs_stack_repair());










  911 
  912   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  913     __ reserved_stack_check();
  914   }
  915 
  916   if (do_polling() && C->is_method_compilation()) {
  917     Label dummy_label;
  918     Label* code_stub = &dummy_label;
  919     if (!C->output()->in_scratch_emit_size()) {
  920       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  921       C->output()->add_stub(stub);
  922       code_stub = &stub->entry();
  923     }
  924     __ relocate(relocInfo::poll_return_type);
  925     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  926   }
  927 }
  928 






  929 int MachEpilogNode::reloc() const
  930 {
  931   return 2; // a large enough number
  932 }
  933 
  934 const Pipeline* MachEpilogNode::pipeline() const
  935 {
  936   return MachNode::pipeline_class();
  937 }
  938 
  939 //=============================================================================
  940 
  941 enum RC {
  942   rc_bad,
  943   rc_int,
  944   rc_kreg,
  945   rc_float,
  946   rc_stack
  947 };
  948 

 1510 #endif
 1511 
 1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1513 {
 1514   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1515   int reg = ra_->get_encode(this);
 1516 
 1517   __ lea(as_Register(reg), Address(rsp, offset));
 1518 }
 1519 
 1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1521 {
 1522   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1523   if (ra_->get_encode(this) > 15) {
 1524     return (offset < 0x80) ? 6 : 9; // REX2
 1525   } else {
 1526     return (offset < 0x80) ? 5 : 8; // REX
 1527   }
 1528 }
 1529 
 1530 //=============================================================================
 1531 #ifndef PRODUCT
 1532 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1533 {
 1534   st->print_cr("MachVEPNode");
 1535 }
 1536 #endif
 1537 
 1538 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1539 {
 1540   CodeBuffer* cbuf = masm->code();
 1541   uint insts_size = cbuf->insts_size();
 1542   if (!_verified) {
 1543     __ ic_check(1);
 1544   } else {
 1545     // TODO 8284443 Avoid creation of temporary frame
 1546     if (ra_->C->stub_function() == nullptr) {
 1547       __ verified_entry(ra_->C, 0);
 1548       __ entry_barrier();
 1549       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1550       __ remove_frame(initial_framesize, false);
 1551     }
 1552     // Unpack inline type args passed as oop and then jump to
 1553     // the verified entry point (skipping the unverified entry).
 1554     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1555     // Emit code for verified entry and save increment for stack repair on return
 1556     __ verified_entry(ra_->C, sp_inc);
 1557     if (Compile::current()->output()->in_scratch_emit_size()) {
 1558       Label dummy_verified_entry;
 1559       __ jmp(dummy_verified_entry);
 1560     } else {
 1561       __ jmp(*_verified_entry);
 1562     }
 1563   }
 1564   /* WARNING these NOPs are critical so that verified entry point is properly
 1565      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1566   int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
 1567   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1568   if (nops_cnt > 0) {
 1569     __ nop(nops_cnt);
 1570   }
 1571 }
 1572 
 1573 //=============================================================================
 1574 #ifndef PRODUCT
 1575 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1576 {
 1577   if (UseCompressedClassPointers) {
 1578     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1579     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1580   } else {
 1581     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1582     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 1583   }
 1584   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1585 }
 1586 #endif
 1587 
 1588 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1589 {
 1590   __ ic_check(InteriorEntryAlignment);
 1591 }
 1592 







 1593 //=============================================================================
 1594 
 1595 bool Matcher::supports_vector_calling_convention(void) {
 1596   if (EnableVectorSupport && UseVectorStubs) {
 1597     return true;
 1598   }
 1599   return false;
 1600 }
 1601 
 1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1603   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1604   int lo = XMM0_num;
 1605   int hi = XMM0b_num;
 1606   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1607   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1608   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1609   return OptoRegPair(hi, lo);
 1610 }
 1611 
 1612 // Is this branch offset short enough that a short branch can be used?

 3047   %}
 3048 %}
 3049 
 3050 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3051 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3052 %{
 3053   constraint(ALLOC_IN_RC(ptr_reg));
 3054   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3055   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3056 
 3057   op_cost(10);
 3058   format %{"[$reg + $off + $idx << $scale]" %}
 3059   interface(MEMORY_INTER) %{
 3060     base($reg);
 3061     index($idx);
 3062     scale($scale);
 3063     disp($off);
 3064   %}
 3065 %}
 3066 
 3067 // Indirect Narrow Oop Operand
 3068 operand indCompressedOop(rRegN reg) %{
 3069   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3070   constraint(ALLOC_IN_RC(ptr_reg));
 3071   match(DecodeN reg);
 3072 
 3073   op_cost(10);
 3074   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3075   interface(MEMORY_INTER) %{
 3076     base(0xc); // R12
 3077     index($reg);
 3078     scale(0x3);
 3079     disp(0x0);
 3080   %}
 3081 %}
 3082 
 3083 // Indirect Narrow Oop Plus Offset Operand
 3084 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3085 // we can't free r12 even with CompressedOops::base() == nullptr.
 3086 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3087   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3088   constraint(ALLOC_IN_RC(ptr_reg));
 3089   match(AddP (DecodeN reg) off);
 3090 
 3091   op_cost(10);
 3092   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3093   interface(MEMORY_INTER) %{
 3094     base(0xc); // R12
 3095     index($reg);
 3096     scale(0x3);
 3097     disp($off);
 3098   %}
 3099 %}
 3100 
 3101 // Indirect Memory Operand
 3102 operand indirectNarrow(rRegN reg)

 3409     equal(0x4, "e");
 3410     not_equal(0x5, "ne");
 3411     less(0x2, "b");
 3412     greater_equal(0x3, "ae");
 3413     less_equal(0x6, "be");
 3414     greater(0x7, "a");
 3415     overflow(0x0, "o");
 3416     no_overflow(0x1, "no");
 3417   %}
 3418 %}
 3419 
 3420 //----------OPERAND CLASSES----------------------------------------------------
 3421 // Operand Classes are groups of operands that are used as to simplify
 3422 // instruction definitions by not requiring the AD writer to specify separate
 3423 // instructions for every form of operand when the instruction accepts
 3424 // multiple operand types with the same basic encoding and format.  The classic
 3425 // case of this is memory operands.
 3426 
 3427 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3428                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3429                indCompressedOop, indCompressedOopOffset,
 3430                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3431                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3432                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3433 
 3434 //----------PIPELINE-----------------------------------------------------------
 3435 // Rules which define the behavior of the target architectures pipeline.
 3436 pipeline %{
 3437 
 3438 //----------ATTRIBUTES---------------------------------------------------------
 3439 attributes %{
 3440   variable_size_instructions;        // Fixed size instructions
 3441   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3442   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3443   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3444   instruction_fetch_units = 1;       // of 16 bytes
 3445 
 3446   // List of nop instructions
 3447   nops( MachNop );
 3448 %}
 3449 

 5950   format %{ "MEMBAR-storestore (empty encoding)" %}
 5951   ins_encode( );
 5952   ins_pipe(empty);
 5953 %}
 5954 
 5955 //----------Move Instructions--------------------------------------------------
 5956 
 5957 instruct castX2P(rRegP dst, rRegL src)
 5958 %{
 5959   match(Set dst (CastX2P src));
 5960 
 5961   format %{ "movq    $dst, $src\t# long->ptr" %}
 5962   ins_encode %{
 5963     if ($dst$$reg != $src$$reg) {
 5964       __ movptr($dst$$Register, $src$$Register);
 5965     }
 5966   %}
 5967   ins_pipe(ialu_reg_reg); // XXX
 5968 %}
 5969 
 5970 instruct castI2N(rRegN dst, rRegI src)
 5971 %{
 5972   match(Set dst (CastI2N src));
 5973 
 5974   format %{ "movq    $dst, $src\t# int -> narrow ptr" %}
 5975   ins_encode %{
 5976     if ($dst$$reg != $src$$reg) {
 5977       __ movl($dst$$Register, $src$$Register);
 5978     }
 5979   %}
 5980   ins_pipe(ialu_reg_reg); // XXX
 5981 %}
 5982 
 5983 instruct castN2X(rRegL dst, rRegN src)
 5984 %{
 5985   match(Set dst (CastP2X src));
 5986 
 5987   format %{ "movq    $dst, $src\t# ptr -> long" %}
 5988   ins_encode %{
 5989     if ($dst$$reg != $src$$reg) {
 5990       __ movptr($dst$$Register, $src$$Register);
 5991     }
 5992   %}
 5993   ins_pipe(ialu_reg_reg); // XXX
 5994 %}
 5995 
 5996 instruct castP2X(rRegL dst, rRegP src)
 5997 %{
 5998   match(Set dst (CastP2X src));
 5999 
 6000   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6001   ins_encode %{
 6002     if ($dst$$reg != $src$$reg) {
 6003       __ movptr($dst$$Register, $src$$Register);
 6004     }
 6005   %}
 6006   ins_pipe(ialu_reg_reg); // XXX
 6007 %}
 6008 
 6009 // Convert oop into int for vectors alignment masking
 6010 instruct convP2I(rRegI dst, rRegP src)
 6011 %{
 6012   match(Set dst (ConvL2I (CastP2X src)));
 6013 
 6014   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6015   ins_encode %{

12186   effect(DEF dst, USE src);
12187   ins_cost(100);
12188   format %{ "movd    $dst,$src\t# MoveI2F" %}
12189   ins_encode %{
12190     __ movdl($dst$$XMMRegister, $src$$Register);
12191   %}
12192   ins_pipe( pipe_slow );
12193 %}
12194 
12195 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12196   match(Set dst (MoveL2D src));
12197   effect(DEF dst, USE src);
12198   ins_cost(100);
12199   format %{ "movd    $dst,$src\t# MoveL2D" %}
12200   ins_encode %{
12201      __ movdq($dst$$XMMRegister, $src$$Register);
12202   %}
12203   ins_pipe( pipe_slow );
12204 %}
12205 
12206 
12207 // Fast clearing of an array
12208 // Small non-constant lenght ClearArray for non-AVX512 targets.
12209 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12210                   Universe dummy, rFlagsReg cr)
12211 %{
12212   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12213   match(Set dummy (ClearArray (Binary cnt base) val));
12214   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12215 
12216   format %{ $$template
12217     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12218     $$emit$$"jg      LARGE\n\t"
12219     $$emit$$"dec     rcx\n\t"
12220     $$emit$$"js      DONE\t# Zero length\n\t"
12221     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12222     $$emit$$"dec     rcx\n\t"
12223     $$emit$$"jge     LOOP\n\t"
12224     $$emit$$"jmp     DONE\n\t"
12225     $$emit$$"# LARGE:\n\t"
12226     if (UseFastStosb) {
12227        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12228        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12229     } else if (UseXMMForObjInit) {
12230        $$emit$$"movdq   $tmp, $val\n\t"
12231        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12232        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12233        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12234        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12235        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12236        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12237        $$emit$$"add     0x40,rax\n\t"
12238        $$emit$$"# L_zero_64_bytes:\n\t"
12239        $$emit$$"sub     0x8,rcx\n\t"
12240        $$emit$$"jge     L_loop\n\t"
12241        $$emit$$"add     0x4,rcx\n\t"
12242        $$emit$$"jl      L_tail\n\t"
12243        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12244        $$emit$$"add     0x20,rax\n\t"
12245        $$emit$$"sub     0x4,rcx\n\t"
12246        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12247        $$emit$$"add     0x4,rcx\n\t"
12248        $$emit$$"jle     L_end\n\t"
12249        $$emit$$"dec     rcx\n\t"
12250        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12251        $$emit$$"vmovq   xmm0,(rax)\n\t"
12252        $$emit$$"add     0x8,rax\n\t"
12253        $$emit$$"dec     rcx\n\t"
12254        $$emit$$"jge     L_sloop\n\t"
12255        $$emit$$"# L_end:\n\t"
12256     } else {
12257        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12258     }
12259     $$emit$$"# DONE"
12260   %}
12261   ins_encode %{
12262     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12263                  $tmp$$XMMRegister, false, false);
12264   %}
12265   ins_pipe(pipe_slow);
12266 %}
12267 
12268 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12269                             Universe dummy, rFlagsReg cr)
12270 %{
12271   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12272   match(Set dummy (ClearArray (Binary cnt base) val));
12273   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12274 
12275   format %{ $$template
12276     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12277     $$emit$$"jg      LARGE\n\t"
12278     $$emit$$"dec     rcx\n\t"
12279     $$emit$$"js      DONE\t# Zero length\n\t"
12280     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12281     $$emit$$"dec     rcx\n\t"
12282     $$emit$$"jge     LOOP\n\t"
12283     $$emit$$"jmp     DONE\n\t"
12284     $$emit$$"# LARGE:\n\t"
12285     if (UseXMMForObjInit) {
12286        $$emit$$"movdq   $tmp, $val\n\t"
12287        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12288        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12289        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12290        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12291        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12292        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12293        $$emit$$"add     0x40,rax\n\t"
12294        $$emit$$"# L_zero_64_bytes:\n\t"
12295        $$emit$$"sub     0x8,rcx\n\t"
12296        $$emit$$"jge     L_loop\n\t"
12297        $$emit$$"add     0x4,rcx\n\t"
12298        $$emit$$"jl      L_tail\n\t"
12299        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12300        $$emit$$"add     0x20,rax\n\t"
12301        $$emit$$"sub     0x4,rcx\n\t"
12302        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12303        $$emit$$"add     0x4,rcx\n\t"
12304        $$emit$$"jle     L_end\n\t"
12305        $$emit$$"dec     rcx\n\t"
12306        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12307        $$emit$$"vmovq   xmm0,(rax)\n\t"
12308        $$emit$$"add     0x8,rax\n\t"
12309        $$emit$$"dec     rcx\n\t"
12310        $$emit$$"jge     L_sloop\n\t"
12311        $$emit$$"# L_end:\n\t"
12312     } else {
12313        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12314     }
12315     $$emit$$"# DONE"
12316   %}
12317   ins_encode %{
12318     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12319                  $tmp$$XMMRegister, false, true);
12320   %}
12321   ins_pipe(pipe_slow);
12322 %}
12323 
12324 // Small non-constant length ClearArray for AVX512 targets.
12325 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12326                        Universe dummy, rFlagsReg cr)
12327 %{
12328   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12329   match(Set dummy (ClearArray (Binary cnt base) val));
12330   ins_cost(125);
12331   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12332 
12333   format %{ $$template
12334     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12335     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12336     $$emit$$"jg      LARGE\n\t"
12337     $$emit$$"dec     rcx\n\t"
12338     $$emit$$"js      DONE\t# Zero length\n\t"
12339     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12340     $$emit$$"dec     rcx\n\t"
12341     $$emit$$"jge     LOOP\n\t"
12342     $$emit$$"jmp     DONE\n\t"
12343     $$emit$$"# LARGE:\n\t"
12344     if (UseFastStosb) {
12345        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12346        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12347     } else if (UseXMMForObjInit) {
12348        $$emit$$"mov     rdi,rax\n\t"
12349        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12350        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12351        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12359        $$emit$$"jl      L_tail\n\t"
12360        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12361        $$emit$$"add     0x20,rax\n\t"
12362        $$emit$$"sub     0x4,rcx\n\t"
12363        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12364        $$emit$$"add     0x4,rcx\n\t"
12365        $$emit$$"jle     L_end\n\t"
12366        $$emit$$"dec     rcx\n\t"
12367        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12368        $$emit$$"vmovq   xmm0,(rax)\n\t"
12369        $$emit$$"add     0x8,rax\n\t"
12370        $$emit$$"dec     rcx\n\t"
12371        $$emit$$"jge     L_sloop\n\t"
12372        $$emit$$"# L_end:\n\t"
12373     } else {
12374        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12375     }
12376     $$emit$$"# DONE"
12377   %}
12378   ins_encode %{
12379     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12380                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12381   %}
12382   ins_pipe(pipe_slow);
12383 %}
12384 
12385 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12386                                  Universe dummy, rFlagsReg cr)

12387 %{
12388   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12389   match(Set dummy (ClearArray (Binary cnt base) val));
12390   ins_cost(125);
12391   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12392 
12393   format %{ $$template
12394     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12395     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
12396     $$emit$$"jg      LARGE\n\t"
12397     $$emit$$"dec     rcx\n\t"
12398     $$emit$$"js      DONE\t# Zero length\n\t"
12399     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
12400     $$emit$$"dec     rcx\n\t"
12401     $$emit$$"jge     LOOP\n\t"
12402     $$emit$$"jmp     DONE\n\t"
12403     $$emit$$"# LARGE:\n\t"
12404     if (UseFastStosb) {
12405        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12406        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
12407     } else if (UseXMMForObjInit) {
12408        $$emit$$"mov     rdi,rax\n\t"
12409        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12410        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12411        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"

12419        $$emit$$"jl      L_tail\n\t"
12420        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12421        $$emit$$"add     0x20,rax\n\t"
12422        $$emit$$"sub     0x4,rcx\n\t"
12423        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12424        $$emit$$"add     0x4,rcx\n\t"
12425        $$emit$$"jle     L_end\n\t"
12426        $$emit$$"dec     rcx\n\t"
12427        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12428        $$emit$$"vmovq   xmm0,(rax)\n\t"
12429        $$emit$$"add     0x8,rax\n\t"
12430        $$emit$$"dec     rcx\n\t"
12431        $$emit$$"jge     L_sloop\n\t"
12432        $$emit$$"# L_end:\n\t"
12433     } else {
12434        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
12435     }
12436     $$emit$$"# DONE"
12437   %}
12438   ins_encode %{
12439     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12440                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12441   %}
12442   ins_pipe(pipe_slow);
12443 %}
12444 
12445 // Large non-constant length ClearArray for non-AVX512 targets.
12446 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12447                         Universe dummy, rFlagsReg cr)
12448 %{
12449   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12450   match(Set dummy (ClearArray (Binary cnt base) val));
12451   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12452 
12453   format %{ $$template
12454     if (UseFastStosb) {
12455        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12456        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12457     } else if (UseXMMForObjInit) {
12458        $$emit$$"movdq   $tmp, $val\n\t"
12459        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12460        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12461        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12462        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12463        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12464        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12465        $$emit$$"add     0x40,rax\n\t"
12466        $$emit$$"# L_zero_64_bytes:\n\t"
12467        $$emit$$"sub     0x8,rcx\n\t"
12468        $$emit$$"jge     L_loop\n\t"
12469        $$emit$$"add     0x4,rcx\n\t"
12470        $$emit$$"jl      L_tail\n\t"
12471        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12472        $$emit$$"add     0x20,rax\n\t"
12473        $$emit$$"sub     0x4,rcx\n\t"
12474        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12475        $$emit$$"add     0x4,rcx\n\t"
12476        $$emit$$"jle     L_end\n\t"
12477        $$emit$$"dec     rcx\n\t"
12478        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12479        $$emit$$"vmovq   xmm0,(rax)\n\t"
12480        $$emit$$"add     0x8,rax\n\t"
12481        $$emit$$"dec     rcx\n\t"
12482        $$emit$$"jge     L_sloop\n\t"
12483        $$emit$$"# L_end:\n\t"
12484     } else {
12485        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12486     }
12487   %}
12488   ins_encode %{
12489     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12490                  $tmp$$XMMRegister, true, false);
12491   %}
12492   ins_pipe(pipe_slow);
12493 %}
12494 
12495 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12496                                   Universe dummy, rFlagsReg cr)
12497 %{
12498   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12499   match(Set dummy (ClearArray (Binary cnt base) val));
12500   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12501 
12502   format %{ $$template
12503     if (UseXMMForObjInit) {
12504        $$emit$$"movdq   $tmp, $val\n\t"
12505        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12506        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12507        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12508        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12509        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12510        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12511        $$emit$$"add     0x40,rax\n\t"
12512        $$emit$$"# L_zero_64_bytes:\n\t"
12513        $$emit$$"sub     0x8,rcx\n\t"
12514        $$emit$$"jge     L_loop\n\t"
12515        $$emit$$"add     0x4,rcx\n\t"
12516        $$emit$$"jl      L_tail\n\t"
12517        $$emit$$"vmovdqu $tmp,(rax)\n\t"
12518        $$emit$$"add     0x20,rax\n\t"
12519        $$emit$$"sub     0x4,rcx\n\t"
12520        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12521        $$emit$$"add     0x4,rcx\n\t"
12522        $$emit$$"jle     L_end\n\t"
12523        $$emit$$"dec     rcx\n\t"
12524        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12525        $$emit$$"vmovq   xmm0,(rax)\n\t"
12526        $$emit$$"add     0x8,rax\n\t"
12527        $$emit$$"dec     rcx\n\t"
12528        $$emit$$"jge     L_sloop\n\t"
12529        $$emit$$"# L_end:\n\t"
12530     } else {
12531        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12532     }
12533   %}
12534   ins_encode %{
12535     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12536                  $tmp$$XMMRegister, true, true);
12537   %}
12538   ins_pipe(pipe_slow);
12539 %}
12540 
12541 // Large non-constant length ClearArray for AVX512 targets.
12542 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12543                              Universe dummy, rFlagsReg cr)
12544 %{
12545   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12546   match(Set dummy (ClearArray (Binary cnt base) val));
12547   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12548 
12549   format %{ $$template
12550     if (UseFastStosb) {
12551        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12552        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12553        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12554     } else if (UseXMMForObjInit) {
12555        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12556        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12557        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12558        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12559        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12560        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12561        $$emit$$"add     0x40,rax\n\t"
12562        $$emit$$"# L_zero_64_bytes:\n\t"
12563        $$emit$$"sub     0x8,rcx\n\t"
12564        $$emit$$"jge     L_loop\n\t"
12565        $$emit$$"add     0x4,rcx\n\t"
12566        $$emit$$"jl      L_tail\n\t"
12567        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12568        $$emit$$"add     0x20,rax\n\t"
12569        $$emit$$"sub     0x4,rcx\n\t"
12570        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12571        $$emit$$"add     0x4,rcx\n\t"
12572        $$emit$$"jle     L_end\n\t"
12573        $$emit$$"dec     rcx\n\t"
12574        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12575        $$emit$$"vmovq   xmm0,(rax)\n\t"
12576        $$emit$$"add     0x8,rax\n\t"
12577        $$emit$$"dec     rcx\n\t"
12578        $$emit$$"jge     L_sloop\n\t"
12579        $$emit$$"# L_end:\n\t"
12580     } else {
12581        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12582        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12583     }
12584   %}
12585   ins_encode %{
12586     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12587                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12588   %}
12589   ins_pipe(pipe_slow);
12590 %}
12591 
12592 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12593                                        Universe dummy, rFlagsReg cr)

12594 %{
12595   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12596   match(Set dummy (ClearArray (Binary cnt base) val));
12597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12598 
12599   format %{ $$template
12600     if (UseFastStosb) {
12601        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12602        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
12603        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
12604     } else if (UseXMMForObjInit) {
12605        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
12606        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
12607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
12608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12609        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12610        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12611        $$emit$$"add     0x40,rax\n\t"
12612        $$emit$$"# L_zero_64_bytes:\n\t"
12613        $$emit$$"sub     0x8,rcx\n\t"
12614        $$emit$$"jge     L_loop\n\t"
12615        $$emit$$"add     0x4,rcx\n\t"
12616        $$emit$$"jl      L_tail\n\t"
12617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618        $$emit$$"add     0x20,rax\n\t"
12619        $$emit$$"sub     0x4,rcx\n\t"
12620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12621        $$emit$$"add     0x4,rcx\n\t"
12622        $$emit$$"jle     L_end\n\t"
12623        $$emit$$"dec     rcx\n\t"
12624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12625        $$emit$$"vmovq   xmm0,(rax)\n\t"
12626        $$emit$$"add     0x8,rax\n\t"
12627        $$emit$$"dec     rcx\n\t"
12628        $$emit$$"jge     L_sloop\n\t"
12629        $$emit$$"# L_end:\n\t"
12630     } else {
12631        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
12632        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
12633     }
12634   %}
12635   ins_encode %{
12636     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12637                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12638   %}
12639   ins_pipe(pipe_slow);
12640 %}
12641 
12642 // Small constant length ClearArray for AVX512 targets.
12643 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12644 %{
12645   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12646             ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12647   match(Set dummy (ClearArray (Binary cnt base) val));
12648   ins_cost(100);
12649   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12650   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
12651   ins_encode %{
12652     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12653   %}
12654   ins_pipe(pipe_slow);
12655 %}
12656 
12657 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12658                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
12659 %{
12660   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12661   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12662   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12663 
12664   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12665   ins_encode %{
12666     __ string_compare($str1$$Register, $str2$$Register,
12667                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12668                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12669   %}
12670   ins_pipe( pipe_slow );
12671 %}
12672 

14505 
14506   ins_cost(300);
14507   format %{ "call_leaf,runtime " %}
14508   ins_encode(clear_avx, Java_To_Runtime(meth));
14509   ins_pipe(pipe_slow);
14510 %}
14511 
14512 // Call runtime without safepoint and with vector arguments
14513 instruct CallLeafDirectVector(method meth)
14514 %{
14515   match(CallLeafVector);
14516   effect(USE meth);
14517 
14518   ins_cost(300);
14519   format %{ "call_leaf,vector " %}
14520   ins_encode(Java_To_Runtime(meth));
14521   ins_pipe(pipe_slow);
14522 %}
14523 
14524 // Call runtime without safepoint
14525 // entry point is null, target holds the address to call
14526 instruct CallLeafNoFPInDirect(rRegP target)
14527 %{
14528   predicate(n->as_Call()->entry_point() == nullptr);
14529   match(CallLeafNoFP target);
14530 
14531   ins_cost(300);
14532   format %{ "call_leaf_nofp,runtime indirect " %}
14533   ins_encode %{
14534      __ call($target$$Register);
14535   %}
14536 
14537   ins_pipe(pipe_slow);
14538 %}
14539 
14540 instruct CallLeafNoFPDirect(method meth)
14541 %{
14542   predicate(n->as_Call()->entry_point() != nullptr);
14543   match(CallLeafNoFP);
14544   effect(USE meth);
14545 
14546   ins_cost(300);
14547   format %{ "call_leaf_nofp,runtime " %}
14548   ins_encode(clear_avx, Java_To_Runtime(meth));
14549   ins_pipe(pipe_slow);
14550 %}
14551 
14552 // Return Instruction
14553 // Remove the return address & jump to it.
14554 // Notice: We always emit a nop after a ret to make sure there is room
14555 // for safepoint patching
14556 instruct Ret()
14557 %{
14558   match(Return);
14559 
14560   format %{ "ret" %}
14561   ins_encode %{
14562     __ ret(0);
< prev index next >