586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 int offset = 13; // movq r10,#addr; callq (r10)
607 if (this->ideal_Opcode() != Op_CallLeafVector) {
608 offset += clear_avx_size();
609 }
610 return offset;
611 }
612 //
613 // Compute padding required for nodes which need alignment
614 //
615
616 // The address of the call instruction needs to be 4-byte aligned to
617 // ensure that it does not span a cache line so that it can be patched.
618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
619 {
620 current_offset += clear_avx_size(); // skip vzeroupper
621 current_offset += 1; // skip call opcode byte
622 return align_up(current_offset, alignment_required()) - current_offset;
623 }
624
625 // The address of the call instruction needs to be 4-byte aligned to
626 // ensure that it does not span a cache line so that it can be patched.
627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
628 {
629 current_offset += clear_avx_size(); // skip vzeroupper
630 current_offset += 11; // skip movq instruction + call opcode byte
631 return align_up(current_offset, alignment_required()) - current_offset;
817 st->print("\n\t");
818 st->print("# stack alignment check");
819 #endif
820 }
821 if (C->stub_function() != nullptr) {
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
863 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
864 }
865 }
866
867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
868 {
869 return MachNode::size(ra_); // too many variables; just compute it
870 // the hard way
871 }
872
873 int MachPrologNode::reloc() const
874 {
875 return 0; // a large enough number
876 }
877
878 //=============================================================================
879 #ifndef PRODUCT
880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
881 {
882 Compile* C = ra_->C;
883 if (generate_vzeroupper(C)) {
884 st->print("vzeroupper");
885 st->cr(); st->print("\t");
886 }
887
888 int framesize = C->output()->frame_size_in_bytes();
889 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
890 // Remove word for return adr already pushed
891 // and RBP
892 framesize -= 2*wordSize;
899 st->print_cr("popq rbp");
900 if (do_polling() && C->is_method_compilation()) {
901 st->print("\t");
902 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
903 "ja #safepoint_stub\t"
904 "# Safepoint: poll for GC");
905 }
906 }
907 #endif
908
909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
910 {
911 Compile* C = ra_->C;
912
913 if (generate_vzeroupper(C)) {
914 // Clear upper bits of YMM registers when current compiled code uses
915 // wide vectors to avoid AVX <-> SSE transition penalty during call.
916 __ vzeroupper();
917 }
918
919 int framesize = C->output()->frame_size_in_bytes();
920 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
921 // Remove word for return adr already pushed
922 // and RBP
923 framesize -= 2*wordSize;
924
925 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
926
927 if (framesize) {
928 __ addq(rsp, framesize);
929 }
930
931 __ popq(rbp);
932
933 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
934 __ reserved_stack_check();
935 }
936
937 if (do_polling() && C->is_method_compilation()) {
938 Label dummy_label;
939 Label* code_stub = &dummy_label;
940 if (!C->output()->in_scratch_emit_size()) {
941 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
942 C->output()->add_stub(stub);
943 code_stub = &stub->entry();
944 }
945 __ relocate(relocInfo::poll_return_type);
946 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
947 }
948 }
949
950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
951 {
952 return MachNode::size(ra_); // too many variables; just compute it
953 // the hard way
954 }
955
956 int MachEpilogNode::reloc() const
957 {
958 return 2; // a large enough number
959 }
960
961 const Pipeline* MachEpilogNode::pipeline() const
962 {
963 return MachNode::pipeline_class();
964 }
965
966 //=============================================================================
967
968 enum RC {
969 rc_bad,
970 rc_int,
971 rc_kreg,
972 rc_float,
973 rc_stack
974 };
975
1537 #endif
1538
1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1540 {
1541 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1542 int reg = ra_->get_encode(this);
1543
1544 __ lea(as_Register(reg), Address(rsp, offset));
1545 }
1546
1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1548 {
1549 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1550 if (ra_->get_encode(this) > 15) {
1551 return (offset < 0x80) ? 6 : 9; // REX2
1552 } else {
1553 return (offset < 0x80) ? 5 : 8; // REX
1554 }
1555 }
1556
1557 //=============================================================================
1558 #ifndef PRODUCT
1559 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1560 {
1561 if (UseCompressedClassPointers) {
1562 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1563 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1564 } else {
1565 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1566 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1567 }
1568 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1569 }
1570 #endif
1571
1572 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1573 {
1574 __ ic_check(InteriorEntryAlignment);
1575 }
1576
1577 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1578 {
1579 return MachNode::size(ra_); // too many variables; just compute it
1580 // the hard way
1581 }
1582
1583
1584 //=============================================================================
1585
1586 bool Matcher::supports_vector_calling_convention(void) {
1587 if (EnableVectorSupport && UseVectorStubs) {
1588 return true;
1589 }
1590 return false;
1591 }
1592
1593 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1594 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1595 int lo = XMM0_num;
1596 int hi = XMM0b_num;
1597 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1598 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1599 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1600 return OptoRegPair(hi, lo);
1601 }
1602
1603 // Is this branch offset short enough that a short branch can be used?
3038 %}
3039 %}
3040
3041 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3042 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3043 %{
3044 constraint(ALLOC_IN_RC(ptr_reg));
3045 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3046 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3047
3048 op_cost(10);
3049 format %{"[$reg + $off + $idx << $scale]" %}
3050 interface(MEMORY_INTER) %{
3051 base($reg);
3052 index($idx);
3053 scale($scale);
3054 disp($off);
3055 %}
3056 %}
3057
3058 // Indirect Narrow Oop Plus Offset Operand
3059 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3060 // we can't free r12 even with CompressedOops::base() == nullptr.
3061 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3062 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3063 constraint(ALLOC_IN_RC(ptr_reg));
3064 match(AddP (DecodeN reg) off);
3065
3066 op_cost(10);
3067 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3068 interface(MEMORY_INTER) %{
3069 base(0xc); // R12
3070 index($reg);
3071 scale(0x3);
3072 disp($off);
3073 %}
3074 %}
3075
3076 // Indirect Memory Operand
3077 operand indirectNarrow(rRegN reg)
3384 equal(0x4, "e");
3385 not_equal(0x5, "ne");
3386 less(0x2, "b");
3387 greater_equal(0x3, "ae");
3388 less_equal(0x6, "be");
3389 greater(0x7, "a");
3390 overflow(0x0, "o");
3391 no_overflow(0x1, "no");
3392 %}
3393 %}
3394
3395 //----------OPERAND CLASSES----------------------------------------------------
3396 // Operand Classes are groups of operands that are used as to simplify
3397 // instruction definitions by not requiring the AD writer to specify separate
3398 // instructions for every form of operand when the instruction accepts
3399 // multiple operand types with the same basic encoding and format. The classic
3400 // case of this is memory operands.
3401
3402 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3403 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3404 indCompressedOopOffset,
3405 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3406 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3407 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3408
3409 //----------PIPELINE-----------------------------------------------------------
3410 // Rules which define the behavior of the target architectures pipeline.
3411 pipeline %{
3412
3413 //----------ATTRIBUTES---------------------------------------------------------
3414 attributes %{
3415 variable_size_instructions; // Fixed size instructions
3416 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3417 instruction_unit_size = 1; // An instruction is 1 bytes long
3418 instruction_fetch_unit_size = 16; // The processor fetches one line
3419 instruction_fetch_units = 1; // of 16 bytes
3420
3421 // List of nop instructions
3422 nops( MachNop );
3423 %}
3424
5925 format %{ "MEMBAR-storestore (empty encoding)" %}
5926 ins_encode( );
5927 ins_pipe(empty);
5928 %}
5929
5930 //----------Move Instructions--------------------------------------------------
5931
5932 instruct castX2P(rRegP dst, rRegL src)
5933 %{
5934 match(Set dst (CastX2P src));
5935
5936 format %{ "movq $dst, $src\t# long->ptr" %}
5937 ins_encode %{
5938 if ($dst$$reg != $src$$reg) {
5939 __ movptr($dst$$Register, $src$$Register);
5940 }
5941 %}
5942 ins_pipe(ialu_reg_reg); // XXX
5943 %}
5944
5945 instruct castP2X(rRegL dst, rRegP src)
5946 %{
5947 match(Set dst (CastP2X src));
5948
5949 format %{ "movq $dst, $src\t# ptr -> long" %}
5950 ins_encode %{
5951 if ($dst$$reg != $src$$reg) {
5952 __ movptr($dst$$Register, $src$$Register);
5953 }
5954 %}
5955 ins_pipe(ialu_reg_reg); // XXX
5956 %}
5957
5958 // Convert oop into int for vectors alignment masking
5959 instruct convP2I(rRegI dst, rRegP src)
5960 %{
5961 match(Set dst (ConvL2I (CastP2X src)));
5962
5963 format %{ "movl $dst, $src\t# ptr -> int" %}
5964 ins_encode %{
12135 effect(DEF dst, USE src);
12136 ins_cost(100);
12137 format %{ "movd $dst,$src\t# MoveI2F" %}
12138 ins_encode %{
12139 __ movdl($dst$$XMMRegister, $src$$Register);
12140 %}
12141 ins_pipe( pipe_slow );
12142 %}
12143
12144 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12145 match(Set dst (MoveL2D src));
12146 effect(DEF dst, USE src);
12147 ins_cost(100);
12148 format %{ "movd $dst,$src\t# MoveL2D" %}
12149 ins_encode %{
12150 __ movdq($dst$$XMMRegister, $src$$Register);
12151 %}
12152 ins_pipe( pipe_slow );
12153 %}
12154
12155 // Fast clearing of an array
12156 // Small non-constant lenght ClearArray for non-AVX512 targets.
12157 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12158 Universe dummy, rFlagsReg cr)
12159 %{
12160 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
12161 match(Set dummy (ClearArray cnt base));
12162 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12163
12164 format %{ $$template
12165 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12166 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12167 $$emit$$"jg LARGE\n\t"
12168 $$emit$$"dec rcx\n\t"
12169 $$emit$$"js DONE\t# Zero length\n\t"
12170 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12171 $$emit$$"dec rcx\n\t"
12172 $$emit$$"jge LOOP\n\t"
12173 $$emit$$"jmp DONE\n\t"
12174 $$emit$$"# LARGE:\n\t"
12175 if (UseFastStosb) {
12176 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12177 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12178 } else if (UseXMMForObjInit) {
12179 $$emit$$"mov rdi,rax\n\t"
12180 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12181 $$emit$$"jmpq L_zero_64_bytes\n\t"
12182 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12190 $$emit$$"jl L_tail\n\t"
12191 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12192 $$emit$$"add 0x20,rax\n\t"
12193 $$emit$$"sub 0x4,rcx\n\t"
12194 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12195 $$emit$$"add 0x4,rcx\n\t"
12196 $$emit$$"jle L_end\n\t"
12197 $$emit$$"dec rcx\n\t"
12198 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12199 $$emit$$"vmovq xmm0,(rax)\n\t"
12200 $$emit$$"add 0x8,rax\n\t"
12201 $$emit$$"dec rcx\n\t"
12202 $$emit$$"jge L_sloop\n\t"
12203 $$emit$$"# L_end:\n\t"
12204 } else {
12205 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12206 }
12207 $$emit$$"# DONE"
12208 %}
12209 ins_encode %{
12210 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12211 $tmp$$XMMRegister, false, knoreg);
12212 %}
12213 ins_pipe(pipe_slow);
12214 %}
12215
12216 // Small non-constant length ClearArray for AVX512 targets.
12217 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12218 Universe dummy, rFlagsReg cr)
12219 %{
12220 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
12221 match(Set dummy (ClearArray cnt base));
12222 ins_cost(125);
12223 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12224
12225 format %{ $$template
12226 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12227 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12228 $$emit$$"jg LARGE\n\t"
12229 $$emit$$"dec rcx\n\t"
12230 $$emit$$"js DONE\t# Zero length\n\t"
12231 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12232 $$emit$$"dec rcx\n\t"
12233 $$emit$$"jge LOOP\n\t"
12234 $$emit$$"jmp DONE\n\t"
12235 $$emit$$"# LARGE:\n\t"
12236 if (UseFastStosb) {
12237 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12238 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12239 } else if (UseXMMForObjInit) {
12240 $$emit$$"mov rdi,rax\n\t"
12241 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12242 $$emit$$"jmpq L_zero_64_bytes\n\t"
12243 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12251 $$emit$$"jl L_tail\n\t"
12252 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12253 $$emit$$"add 0x20,rax\n\t"
12254 $$emit$$"sub 0x4,rcx\n\t"
12255 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12256 $$emit$$"add 0x4,rcx\n\t"
12257 $$emit$$"jle L_end\n\t"
12258 $$emit$$"dec rcx\n\t"
12259 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12260 $$emit$$"vmovq xmm0,(rax)\n\t"
12261 $$emit$$"add 0x8,rax\n\t"
12262 $$emit$$"dec rcx\n\t"
12263 $$emit$$"jge L_sloop\n\t"
12264 $$emit$$"# L_end:\n\t"
12265 } else {
12266 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12267 }
12268 $$emit$$"# DONE"
12269 %}
12270 ins_encode %{
12271 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12272 $tmp$$XMMRegister, false, $ktmp$$KRegister);
12273 %}
12274 ins_pipe(pipe_slow);
12275 %}
12276
12277 // Large non-constant length ClearArray for non-AVX512 targets.
12278 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
12279 Universe dummy, rFlagsReg cr)
12280 %{
12281 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
12282 match(Set dummy (ClearArray cnt base));
12283 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
12284
12285 format %{ $$template
12286 if (UseFastStosb) {
12287 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12288 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12289 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12290 } else if (UseXMMForObjInit) {
12291 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12292 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12293 $$emit$$"jmpq L_zero_64_bytes\n\t"
12294 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12295 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12296 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12297 $$emit$$"add 0x40,rax\n\t"
12298 $$emit$$"# L_zero_64_bytes:\n\t"
12299 $$emit$$"sub 0x8,rcx\n\t"
12300 $$emit$$"jge L_loop\n\t"
12301 $$emit$$"add 0x4,rcx\n\t"
12302 $$emit$$"jl L_tail\n\t"
12303 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12304 $$emit$$"add 0x20,rax\n\t"
12305 $$emit$$"sub 0x4,rcx\n\t"
12306 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12307 $$emit$$"add 0x4,rcx\n\t"
12308 $$emit$$"jle L_end\n\t"
12309 $$emit$$"dec rcx\n\t"
12310 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12311 $$emit$$"vmovq xmm0,(rax)\n\t"
12312 $$emit$$"add 0x8,rax\n\t"
12313 $$emit$$"dec rcx\n\t"
12314 $$emit$$"jge L_sloop\n\t"
12315 $$emit$$"# L_end:\n\t"
12316 } else {
12317 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12318 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12319 }
12320 %}
12321 ins_encode %{
12322 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12323 $tmp$$XMMRegister, true, knoreg);
12324 %}
12325 ins_pipe(pipe_slow);
12326 %}
12327
12328 // Large non-constant length ClearArray for AVX512 targets.
12329 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
12330 Universe dummy, rFlagsReg cr)
12331 %{
12332 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
12333 match(Set dummy (ClearArray cnt base));
12334 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
12335
12336 format %{ $$template
12337 if (UseFastStosb) {
12338 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12339 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12340 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12341 } else if (UseXMMForObjInit) {
12342 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12343 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12344 $$emit$$"jmpq L_zero_64_bytes\n\t"
12345 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12346 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12347 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12348 $$emit$$"add 0x40,rax\n\t"
12349 $$emit$$"# L_zero_64_bytes:\n\t"
12350 $$emit$$"sub 0x8,rcx\n\t"
12351 $$emit$$"jge L_loop\n\t"
12352 $$emit$$"add 0x4,rcx\n\t"
12353 $$emit$$"jl L_tail\n\t"
12354 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12355 $$emit$$"add 0x20,rax\n\t"
12356 $$emit$$"sub 0x4,rcx\n\t"
12357 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12358 $$emit$$"add 0x4,rcx\n\t"
12359 $$emit$$"jle L_end\n\t"
12360 $$emit$$"dec rcx\n\t"
12361 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12362 $$emit$$"vmovq xmm0,(rax)\n\t"
12363 $$emit$$"add 0x8,rax\n\t"
12364 $$emit$$"dec rcx\n\t"
12365 $$emit$$"jge L_sloop\n\t"
12366 $$emit$$"# L_end:\n\t"
12367 } else {
12368 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12369 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12370 }
12371 %}
12372 ins_encode %{
12373 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
12374 $tmp$$XMMRegister, true, $ktmp$$KRegister);
12375 %}
12376 ins_pipe(pipe_slow);
12377 %}
12378
12379 // Small constant length ClearArray for AVX512 targets.
12380 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
12381 %{
12382 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
12383 match(Set dummy (ClearArray cnt base));
12384 ins_cost(100);
12385 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
12386 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12387 ins_encode %{
12388 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12389 %}
12390 ins_pipe(pipe_slow);
12391 %}
12392
12393 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12394 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12395 %{
12396 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12397 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12398 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12399
12400 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12401 ins_encode %{
12402 __ string_compare($str1$$Register, $str2$$Register,
12403 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12404 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12405 %}
12406 ins_pipe( pipe_slow );
12407 %}
12408
14241
14242 ins_cost(300);
14243 format %{ "call_leaf,runtime " %}
14244 ins_encode(clear_avx, Java_To_Runtime(meth));
14245 ins_pipe(pipe_slow);
14246 %}
14247
14248 // Call runtime without safepoint and with vector arguments
14249 instruct CallLeafDirectVector(method meth)
14250 %{
14251 match(CallLeafVector);
14252 effect(USE meth);
14253
14254 ins_cost(300);
14255 format %{ "call_leaf,vector " %}
14256 ins_encode(Java_To_Runtime(meth));
14257 ins_pipe(pipe_slow);
14258 %}
14259
14260 // Call runtime without safepoint
14261 instruct CallLeafNoFPDirect(method meth)
14262 %{
14263 match(CallLeafNoFP);
14264 effect(USE meth);
14265
14266 ins_cost(300);
14267 format %{ "call_leaf_nofp,runtime " %}
14268 ins_encode(clear_avx, Java_To_Runtime(meth));
14269 ins_pipe(pipe_slow);
14270 %}
14271
14272 // Return Instruction
14273 // Remove the return address & jump to it.
14274 // Notice: We always emit a nop after a ret to make sure there is room
14275 // for safepoint patching
14276 instruct Ret()
14277 %{
14278 match(Return);
14279
14280 format %{ "ret" %}
14281 ins_encode %{
14282 __ ret(0);
|
586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 if (_entry_point == nullptr) {
607 // CallLeafNoFPInDirect
608 return 3; // callq (register)
609 }
610 int offset = 13; // movq r10,#addr; callq (r10)
611 if (this->ideal_Opcode() != Op_CallLeafVector) {
612 offset += clear_avx_size();
613 }
614 return offset;
615 }
616
617 //
618 // Compute padding required for nodes which need alignment
619 //
620
621 // The address of the call instruction needs to be 4-byte aligned to
622 // ensure that it does not span a cache line so that it can be patched.
623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
624 {
625 current_offset += clear_avx_size(); // skip vzeroupper
626 current_offset += 1; // skip call opcode byte
627 return align_up(current_offset, alignment_required()) - current_offset;
628 }
629
630 // The address of the call instruction needs to be 4-byte aligned to
631 // ensure that it does not span a cache line so that it can be patched.
632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
633 {
634 current_offset += clear_avx_size(); // skip vzeroupper
635 current_offset += 11; // skip movq instruction + call opcode byte
636 return align_up(current_offset, alignment_required()) - current_offset;
822 st->print("\n\t");
823 st->print("# stack alignment check");
824 #endif
825 }
826 if (C->stub_function() != nullptr) {
827 st->print("\n\t");
828 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
829 st->print("\n\t");
830 st->print("je fast_entry\t");
831 st->print("\n\t");
832 st->print("call #nmethod_entry_barrier_stub\t");
833 st->print("\n\tfast_entry:");
834 }
835 st->cr();
836 }
837 #endif
838
839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
840 Compile* C = ra_->C;
841
842 __ verified_entry(C);
843
844 if (ra_->C->stub_function() == nullptr) {
845 __ entry_barrier();
846 }
847
848 if (!Compile::current()->output()->in_scratch_emit_size()) {
849 __ bind(*_verified_entry);
850 }
851
852 C->output()->set_frame_complete(__ offset());
853
854 if (C->has_mach_constant_base_node()) {
855 // NOTE: We set the table base offset here because users might be
856 // emitted before MachConstantBaseNode.
857 ConstantTable& constant_table = C->output()->constant_table();
858 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
859 }
860 }
861
862 int MachPrologNode::reloc() const
863 {
864 return 0; // a large enough number
865 }
866
867 //=============================================================================
868 #ifndef PRODUCT
869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
870 {
871 Compile* C = ra_->C;
872 if (generate_vzeroupper(C)) {
873 st->print("vzeroupper");
874 st->cr(); st->print("\t");
875 }
876
877 int framesize = C->output()->frame_size_in_bytes();
878 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
879 // Remove word for return adr already pushed
880 // and RBP
881 framesize -= 2*wordSize;
888 st->print_cr("popq rbp");
889 if (do_polling() && C->is_method_compilation()) {
890 st->print("\t");
891 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
892 "ja #safepoint_stub\t"
893 "# Safepoint: poll for GC");
894 }
895 }
896 #endif
897
898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
899 {
900 Compile* C = ra_->C;
901
902 if (generate_vzeroupper(C)) {
903 // Clear upper bits of YMM registers when current compiled code uses
904 // wide vectors to avoid AVX <-> SSE transition penalty during call.
905 __ vzeroupper();
906 }
907
908 // Subtract two words to account for return address and rbp
909 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
910 __ remove_frame(initial_framesize, C->needs_stack_repair());
911
912 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
913 __ reserved_stack_check();
914 }
915
916 if (do_polling() && C->is_method_compilation()) {
917 Label dummy_label;
918 Label* code_stub = &dummy_label;
919 if (!C->output()->in_scratch_emit_size()) {
920 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
921 C->output()->add_stub(stub);
922 code_stub = &stub->entry();
923 }
924 __ relocate(relocInfo::poll_return_type);
925 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
926 }
927 }
928
929 int MachEpilogNode::reloc() const
930 {
931 return 2; // a large enough number
932 }
933
934 const Pipeline* MachEpilogNode::pipeline() const
935 {
936 return MachNode::pipeline_class();
937 }
938
939 //=============================================================================
940
941 enum RC {
942 rc_bad,
943 rc_int,
944 rc_kreg,
945 rc_float,
946 rc_stack
947 };
948
1510 #endif
1511
1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1513 {
1514 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1515 int reg = ra_->get_encode(this);
1516
1517 __ lea(as_Register(reg), Address(rsp, offset));
1518 }
1519
1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1521 {
1522 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1523 if (ra_->get_encode(this) > 15) {
1524 return (offset < 0x80) ? 6 : 9; // REX2
1525 } else {
1526 return (offset < 0x80) ? 5 : 8; // REX
1527 }
1528 }
1529
1530 //=============================================================================
1531 #ifndef PRODUCT
1532 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1533 {
1534 st->print_cr("MachVEPNode");
1535 }
1536 #endif
1537
1538 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1539 {
1540 CodeBuffer* cbuf = masm->code();
1541 uint insts_size = cbuf->insts_size();
1542 if (!_verified) {
1543 __ ic_check(1);
1544 } else {
1545 // TODO 8284443 Avoid creation of temporary frame
1546 if (ra_->C->stub_function() == nullptr) {
1547 __ verified_entry(ra_->C, 0);
1548 __ entry_barrier();
1549 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1550 __ remove_frame(initial_framesize, false);
1551 }
1552 // Unpack inline type args passed as oop and then jump to
1553 // the verified entry point (skipping the unverified entry).
1554 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1555 // Emit code for verified entry and save increment for stack repair on return
1556 __ verified_entry(ra_->C, sp_inc);
1557 if (Compile::current()->output()->in_scratch_emit_size()) {
1558 Label dummy_verified_entry;
1559 __ jmp(dummy_verified_entry);
1560 } else {
1561 __ jmp(*_verified_entry);
1562 }
1563 }
1564 /* WARNING these NOPs are critical so that verified entry point is properly
1565 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1566 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1567 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1568 if (nops_cnt > 0) {
1569 __ nop(nops_cnt);
1570 }
1571 }
1572
1573 //=============================================================================
1574 #ifndef PRODUCT
1575 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1576 {
1577 if (UseCompressedClassPointers) {
1578 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1579 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1580 } else {
1581 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1582 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1583 }
1584 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1585 }
1586 #endif
1587
1588 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1589 {
1590 __ ic_check(InteriorEntryAlignment);
1591 }
1592
1593 //=============================================================================
1594
1595 bool Matcher::supports_vector_calling_convention(void) {
1596 if (EnableVectorSupport && UseVectorStubs) {
1597 return true;
1598 }
1599 return false;
1600 }
1601
1602 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1603 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1604 int lo = XMM0_num;
1605 int hi = XMM0b_num;
1606 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1607 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1608 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1609 return OptoRegPair(hi, lo);
1610 }
1611
1612 // Is this branch offset short enough that a short branch can be used?
3047 %}
3048 %}
3049
3050 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3051 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3052 %{
3053 constraint(ALLOC_IN_RC(ptr_reg));
3054 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3055 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3056
3057 op_cost(10);
3058 format %{"[$reg + $off + $idx << $scale]" %}
3059 interface(MEMORY_INTER) %{
3060 base($reg);
3061 index($idx);
3062 scale($scale);
3063 disp($off);
3064 %}
3065 %}
3066
3067 // Indirect Narrow Oop Operand
3068 operand indCompressedOop(rRegN reg) %{
3069 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3070 constraint(ALLOC_IN_RC(ptr_reg));
3071 match(DecodeN reg);
3072
3073 op_cost(10);
3074 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3075 interface(MEMORY_INTER) %{
3076 base(0xc); // R12
3077 index($reg);
3078 scale(0x3);
3079 disp(0x0);
3080 %}
3081 %}
3082
3083 // Indirect Narrow Oop Plus Offset Operand
3084 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3085 // we can't free r12 even with CompressedOops::base() == nullptr.
3086 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3087 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3088 constraint(ALLOC_IN_RC(ptr_reg));
3089 match(AddP (DecodeN reg) off);
3090
3091 op_cost(10);
3092 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3093 interface(MEMORY_INTER) %{
3094 base(0xc); // R12
3095 index($reg);
3096 scale(0x3);
3097 disp($off);
3098 %}
3099 %}
3100
3101 // Indirect Memory Operand
3102 operand indirectNarrow(rRegN reg)
3409 equal(0x4, "e");
3410 not_equal(0x5, "ne");
3411 less(0x2, "b");
3412 greater_equal(0x3, "ae");
3413 less_equal(0x6, "be");
3414 greater(0x7, "a");
3415 overflow(0x0, "o");
3416 no_overflow(0x1, "no");
3417 %}
3418 %}
3419
3420 //----------OPERAND CLASSES----------------------------------------------------
3421 // Operand Classes are groups of operands that are used as to simplify
3422 // instruction definitions by not requiring the AD writer to specify separate
3423 // instructions for every form of operand when the instruction accepts
3424 // multiple operand types with the same basic encoding and format. The classic
3425 // case of this is memory operands.
3426
3427 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3428 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3429 indCompressedOop, indCompressedOopOffset,
3430 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3431 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3432 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3433
3434 //----------PIPELINE-----------------------------------------------------------
3435 // Rules which define the behavior of the target architectures pipeline.
3436 pipeline %{
3437
3438 //----------ATTRIBUTES---------------------------------------------------------
3439 attributes %{
3440 variable_size_instructions; // Fixed size instructions
3441 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3442 instruction_unit_size = 1; // An instruction is 1 bytes long
3443 instruction_fetch_unit_size = 16; // The processor fetches one line
3444 instruction_fetch_units = 1; // of 16 bytes
3445
3446 // List of nop instructions
3447 nops( MachNop );
3448 %}
3449
5950 format %{ "MEMBAR-storestore (empty encoding)" %}
5951 ins_encode( );
5952 ins_pipe(empty);
5953 %}
5954
5955 //----------Move Instructions--------------------------------------------------
5956
5957 instruct castX2P(rRegP dst, rRegL src)
5958 %{
5959 match(Set dst (CastX2P src));
5960
5961 format %{ "movq $dst, $src\t# long->ptr" %}
5962 ins_encode %{
5963 if ($dst$$reg != $src$$reg) {
5964 __ movptr($dst$$Register, $src$$Register);
5965 }
5966 %}
5967 ins_pipe(ialu_reg_reg); // XXX
5968 %}
5969
5970 instruct castI2N(rRegN dst, rRegI src)
5971 %{
5972 match(Set dst (CastI2N src));
5973
5974 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
5975 ins_encode %{
5976 if ($dst$$reg != $src$$reg) {
5977 __ movl($dst$$Register, $src$$Register);
5978 }
5979 %}
5980 ins_pipe(ialu_reg_reg); // XXX
5981 %}
5982
5983 instruct castN2X(rRegL dst, rRegN src)
5984 %{
5985 match(Set dst (CastP2X src));
5986
5987 format %{ "movq $dst, $src\t# ptr -> long" %}
5988 ins_encode %{
5989 if ($dst$$reg != $src$$reg) {
5990 __ movptr($dst$$Register, $src$$Register);
5991 }
5992 %}
5993 ins_pipe(ialu_reg_reg); // XXX
5994 %}
5995
5996 instruct castP2X(rRegL dst, rRegP src)
5997 %{
5998 match(Set dst (CastP2X src));
5999
6000 format %{ "movq $dst, $src\t# ptr -> long" %}
6001 ins_encode %{
6002 if ($dst$$reg != $src$$reg) {
6003 __ movptr($dst$$Register, $src$$Register);
6004 }
6005 %}
6006 ins_pipe(ialu_reg_reg); // XXX
6007 %}
6008
6009 // Convert oop into int for vectors alignment masking
6010 instruct convP2I(rRegI dst, rRegP src)
6011 %{
6012 match(Set dst (ConvL2I (CastP2X src)));
6013
6014 format %{ "movl $dst, $src\t# ptr -> int" %}
6015 ins_encode %{
12186 effect(DEF dst, USE src);
12187 ins_cost(100);
12188 format %{ "movd $dst,$src\t# MoveI2F" %}
12189 ins_encode %{
12190 __ movdl($dst$$XMMRegister, $src$$Register);
12191 %}
12192 ins_pipe( pipe_slow );
12193 %}
12194
12195 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
12196 match(Set dst (MoveL2D src));
12197 effect(DEF dst, USE src);
12198 ins_cost(100);
12199 format %{ "movd $dst,$src\t# MoveL2D" %}
12200 ins_encode %{
12201 __ movdq($dst$$XMMRegister, $src$$Register);
12202 %}
12203 ins_pipe( pipe_slow );
12204 %}
12205
12206
12207 // Fast clearing of an array
12208 // Small non-constant lenght ClearArray for non-AVX512 targets.
12209 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12210 Universe dummy, rFlagsReg cr)
12211 %{
12212 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12213 match(Set dummy (ClearArray (Binary cnt base) val));
12214 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12215
12216 format %{ $$template
12217 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12218 $$emit$$"jg LARGE\n\t"
12219 $$emit$$"dec rcx\n\t"
12220 $$emit$$"js DONE\t# Zero length\n\t"
12221 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12222 $$emit$$"dec rcx\n\t"
12223 $$emit$$"jge LOOP\n\t"
12224 $$emit$$"jmp DONE\n\t"
12225 $$emit$$"# LARGE:\n\t"
12226 if (UseFastStosb) {
12227 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12228 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12229 } else if (UseXMMForObjInit) {
12230 $$emit$$"movdq $tmp, $val\n\t"
12231 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12232 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12233 $$emit$$"jmpq L_zero_64_bytes\n\t"
12234 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12235 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12236 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12237 $$emit$$"add 0x40,rax\n\t"
12238 $$emit$$"# L_zero_64_bytes:\n\t"
12239 $$emit$$"sub 0x8,rcx\n\t"
12240 $$emit$$"jge L_loop\n\t"
12241 $$emit$$"add 0x4,rcx\n\t"
12242 $$emit$$"jl L_tail\n\t"
12243 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12244 $$emit$$"add 0x20,rax\n\t"
12245 $$emit$$"sub 0x4,rcx\n\t"
12246 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12247 $$emit$$"add 0x4,rcx\n\t"
12248 $$emit$$"jle L_end\n\t"
12249 $$emit$$"dec rcx\n\t"
12250 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12251 $$emit$$"vmovq xmm0,(rax)\n\t"
12252 $$emit$$"add 0x8,rax\n\t"
12253 $$emit$$"dec rcx\n\t"
12254 $$emit$$"jge L_sloop\n\t"
12255 $$emit$$"# L_end:\n\t"
12256 } else {
12257 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12258 }
12259 $$emit$$"# DONE"
12260 %}
12261 ins_encode %{
12262 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12263 $tmp$$XMMRegister, false, false);
12264 %}
12265 ins_pipe(pipe_slow);
12266 %}
12267
12268 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12269 Universe dummy, rFlagsReg cr)
12270 %{
12271 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12272 match(Set dummy (ClearArray (Binary cnt base) val));
12273 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12274
12275 format %{ $$template
12276 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12277 $$emit$$"jg LARGE\n\t"
12278 $$emit$$"dec rcx\n\t"
12279 $$emit$$"js DONE\t# Zero length\n\t"
12280 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12281 $$emit$$"dec rcx\n\t"
12282 $$emit$$"jge LOOP\n\t"
12283 $$emit$$"jmp DONE\n\t"
12284 $$emit$$"# LARGE:\n\t"
12285 if (UseXMMForObjInit) {
12286 $$emit$$"movdq $tmp, $val\n\t"
12287 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12288 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12289 $$emit$$"jmpq L_zero_64_bytes\n\t"
12290 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12291 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12292 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12293 $$emit$$"add 0x40,rax\n\t"
12294 $$emit$$"# L_zero_64_bytes:\n\t"
12295 $$emit$$"sub 0x8,rcx\n\t"
12296 $$emit$$"jge L_loop\n\t"
12297 $$emit$$"add 0x4,rcx\n\t"
12298 $$emit$$"jl L_tail\n\t"
12299 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12300 $$emit$$"add 0x20,rax\n\t"
12301 $$emit$$"sub 0x4,rcx\n\t"
12302 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12303 $$emit$$"add 0x4,rcx\n\t"
12304 $$emit$$"jle L_end\n\t"
12305 $$emit$$"dec rcx\n\t"
12306 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12307 $$emit$$"vmovq xmm0,(rax)\n\t"
12308 $$emit$$"add 0x8,rax\n\t"
12309 $$emit$$"dec rcx\n\t"
12310 $$emit$$"jge L_sloop\n\t"
12311 $$emit$$"# L_end:\n\t"
12312 } else {
12313 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12314 }
12315 $$emit$$"# DONE"
12316 %}
12317 ins_encode %{
12318 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12319 $tmp$$XMMRegister, false, true);
12320 %}
12321 ins_pipe(pipe_slow);
12322 %}
12323
12324 // Small non-constant length ClearArray for AVX512 targets.
12325 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12326 Universe dummy, rFlagsReg cr)
12327 %{
12328 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12329 match(Set dummy (ClearArray (Binary cnt base) val));
12330 ins_cost(125);
12331 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12332
12333 format %{ $$template
12334 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12335 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12336 $$emit$$"jg LARGE\n\t"
12337 $$emit$$"dec rcx\n\t"
12338 $$emit$$"js DONE\t# Zero length\n\t"
12339 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12340 $$emit$$"dec rcx\n\t"
12341 $$emit$$"jge LOOP\n\t"
12342 $$emit$$"jmp DONE\n\t"
12343 $$emit$$"# LARGE:\n\t"
12344 if (UseFastStosb) {
12345 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12346 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12347 } else if (UseXMMForObjInit) {
12348 $$emit$$"mov rdi,rax\n\t"
12349 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12350 $$emit$$"jmpq L_zero_64_bytes\n\t"
12351 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12359 $$emit$$"jl L_tail\n\t"
12360 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12361 $$emit$$"add 0x20,rax\n\t"
12362 $$emit$$"sub 0x4,rcx\n\t"
12363 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12364 $$emit$$"add 0x4,rcx\n\t"
12365 $$emit$$"jle L_end\n\t"
12366 $$emit$$"dec rcx\n\t"
12367 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12368 $$emit$$"vmovq xmm0,(rax)\n\t"
12369 $$emit$$"add 0x8,rax\n\t"
12370 $$emit$$"dec rcx\n\t"
12371 $$emit$$"jge L_sloop\n\t"
12372 $$emit$$"# L_end:\n\t"
12373 } else {
12374 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12375 }
12376 $$emit$$"# DONE"
12377 %}
12378 ins_encode %{
12379 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12380 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
12381 %}
12382 ins_pipe(pipe_slow);
12383 %}
12384
12385 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12386 Universe dummy, rFlagsReg cr)
12387 %{
12388 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12389 match(Set dummy (ClearArray (Binary cnt base) val));
12390 ins_cost(125);
12391 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12392
12393 format %{ $$template
12394 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12395 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
12396 $$emit$$"jg LARGE\n\t"
12397 $$emit$$"dec rcx\n\t"
12398 $$emit$$"js DONE\t# Zero length\n\t"
12399 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
12400 $$emit$$"dec rcx\n\t"
12401 $$emit$$"jge LOOP\n\t"
12402 $$emit$$"jmp DONE\n\t"
12403 $$emit$$"# LARGE:\n\t"
12404 if (UseFastStosb) {
12405 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12406 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
12407 } else if (UseXMMForObjInit) {
12408 $$emit$$"mov rdi,rax\n\t"
12409 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12410 $$emit$$"jmpq L_zero_64_bytes\n\t"
12411 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12419 $$emit$$"jl L_tail\n\t"
12420 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12421 $$emit$$"add 0x20,rax\n\t"
12422 $$emit$$"sub 0x4,rcx\n\t"
12423 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12424 $$emit$$"add 0x4,rcx\n\t"
12425 $$emit$$"jle L_end\n\t"
12426 $$emit$$"dec rcx\n\t"
12427 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12428 $$emit$$"vmovq xmm0,(rax)\n\t"
12429 $$emit$$"add 0x8,rax\n\t"
12430 $$emit$$"dec rcx\n\t"
12431 $$emit$$"jge L_sloop\n\t"
12432 $$emit$$"# L_end:\n\t"
12433 } else {
12434 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
12435 }
12436 $$emit$$"# DONE"
12437 %}
12438 ins_encode %{
12439 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12440 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
12441 %}
12442 ins_pipe(pipe_slow);
12443 %}
12444
12445 // Large non-constant length ClearArray for non-AVX512 targets.
12446 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12447 Universe dummy, rFlagsReg cr)
12448 %{
12449 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12450 match(Set dummy (ClearArray (Binary cnt base) val));
12451 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12452
12453 format %{ $$template
12454 if (UseFastStosb) {
12455 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12456 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12457 } else if (UseXMMForObjInit) {
12458 $$emit$$"movdq $tmp, $val\n\t"
12459 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12460 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12461 $$emit$$"jmpq L_zero_64_bytes\n\t"
12462 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12463 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12464 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12465 $$emit$$"add 0x40,rax\n\t"
12466 $$emit$$"# L_zero_64_bytes:\n\t"
12467 $$emit$$"sub 0x8,rcx\n\t"
12468 $$emit$$"jge L_loop\n\t"
12469 $$emit$$"add 0x4,rcx\n\t"
12470 $$emit$$"jl L_tail\n\t"
12471 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12472 $$emit$$"add 0x20,rax\n\t"
12473 $$emit$$"sub 0x4,rcx\n\t"
12474 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12475 $$emit$$"add 0x4,rcx\n\t"
12476 $$emit$$"jle L_end\n\t"
12477 $$emit$$"dec rcx\n\t"
12478 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12479 $$emit$$"vmovq xmm0,(rax)\n\t"
12480 $$emit$$"add 0x8,rax\n\t"
12481 $$emit$$"dec rcx\n\t"
12482 $$emit$$"jge L_sloop\n\t"
12483 $$emit$$"# L_end:\n\t"
12484 } else {
12485 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12486 }
12487 %}
12488 ins_encode %{
12489 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12490 $tmp$$XMMRegister, true, false);
12491 %}
12492 ins_pipe(pipe_slow);
12493 %}
12494
12495 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
12496 Universe dummy, rFlagsReg cr)
12497 %{
12498 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
12499 match(Set dummy (ClearArray (Binary cnt base) val));
12500 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
12501
12502 format %{ $$template
12503 if (UseXMMForObjInit) {
12504 $$emit$$"movdq $tmp, $val\n\t"
12505 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
12506 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
12507 $$emit$$"jmpq L_zero_64_bytes\n\t"
12508 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12509 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12510 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
12511 $$emit$$"add 0x40,rax\n\t"
12512 $$emit$$"# L_zero_64_bytes:\n\t"
12513 $$emit$$"sub 0x8,rcx\n\t"
12514 $$emit$$"jge L_loop\n\t"
12515 $$emit$$"add 0x4,rcx\n\t"
12516 $$emit$$"jl L_tail\n\t"
12517 $$emit$$"vmovdqu $tmp,(rax)\n\t"
12518 $$emit$$"add 0x20,rax\n\t"
12519 $$emit$$"sub 0x4,rcx\n\t"
12520 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12521 $$emit$$"add 0x4,rcx\n\t"
12522 $$emit$$"jle L_end\n\t"
12523 $$emit$$"dec rcx\n\t"
12524 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12525 $$emit$$"vmovq xmm0,(rax)\n\t"
12526 $$emit$$"add 0x8,rax\n\t"
12527 $$emit$$"dec rcx\n\t"
12528 $$emit$$"jge L_sloop\n\t"
12529 $$emit$$"# L_end:\n\t"
12530 } else {
12531 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12532 }
12533 %}
12534 ins_encode %{
12535 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12536 $tmp$$XMMRegister, true, true);
12537 %}
12538 ins_pipe(pipe_slow);
12539 %}
12540
12541 // Large non-constant length ClearArray for AVX512 targets.
12542 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12543 Universe dummy, rFlagsReg cr)
12544 %{
12545 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12546 match(Set dummy (ClearArray (Binary cnt base) val));
12547 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12548
12549 format %{ $$template
12550 if (UseFastStosb) {
12551 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12552 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12553 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12554 } else if (UseXMMForObjInit) {
12555 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12556 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12557 $$emit$$"jmpq L_zero_64_bytes\n\t"
12558 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12559 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12560 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12561 $$emit$$"add 0x40,rax\n\t"
12562 $$emit$$"# L_zero_64_bytes:\n\t"
12563 $$emit$$"sub 0x8,rcx\n\t"
12564 $$emit$$"jge L_loop\n\t"
12565 $$emit$$"add 0x4,rcx\n\t"
12566 $$emit$$"jl L_tail\n\t"
12567 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12568 $$emit$$"add 0x20,rax\n\t"
12569 $$emit$$"sub 0x4,rcx\n\t"
12570 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12571 $$emit$$"add 0x4,rcx\n\t"
12572 $$emit$$"jle L_end\n\t"
12573 $$emit$$"dec rcx\n\t"
12574 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12575 $$emit$$"vmovq xmm0,(rax)\n\t"
12576 $$emit$$"add 0x8,rax\n\t"
12577 $$emit$$"dec rcx\n\t"
12578 $$emit$$"jge L_sloop\n\t"
12579 $$emit$$"# L_end:\n\t"
12580 } else {
12581 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12582 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12583 }
12584 %}
12585 ins_encode %{
12586 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12587 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
12588 %}
12589 ins_pipe(pipe_slow);
12590 %}
12591
12592 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
12593 Universe dummy, rFlagsReg cr)
12594 %{
12595 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
12596 match(Set dummy (ClearArray (Binary cnt base) val));
12597 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
12598
12599 format %{ $$template
12600 if (UseFastStosb) {
12601 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12602 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
12603 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
12604 } else if (UseXMMForObjInit) {
12605 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
12606 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
12607 $$emit$$"jmpq L_zero_64_bytes\n\t"
12608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
12609 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12610 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
12611 $$emit$$"add 0x40,rax\n\t"
12612 $$emit$$"# L_zero_64_bytes:\n\t"
12613 $$emit$$"sub 0x8,rcx\n\t"
12614 $$emit$$"jge L_loop\n\t"
12615 $$emit$$"add 0x4,rcx\n\t"
12616 $$emit$$"jl L_tail\n\t"
12617 $$emit$$"vmovdqu ymm0,(rax)\n\t"
12618 $$emit$$"add 0x20,rax\n\t"
12619 $$emit$$"sub 0x4,rcx\n\t"
12620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
12621 $$emit$$"add 0x4,rcx\n\t"
12622 $$emit$$"jle L_end\n\t"
12623 $$emit$$"dec rcx\n\t"
12624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
12625 $$emit$$"vmovq xmm0,(rax)\n\t"
12626 $$emit$$"add 0x8,rax\n\t"
12627 $$emit$$"dec rcx\n\t"
12628 $$emit$$"jge L_sloop\n\t"
12629 $$emit$$"# L_end:\n\t"
12630 } else {
12631 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
12632 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
12633 }
12634 %}
12635 ins_encode %{
12636 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
12637 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
12638 %}
12639 ins_pipe(pipe_slow);
12640 %}
12641
12642 // Small constant length ClearArray for AVX512 targets.
12643 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
12644 %{
12645 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
12646 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
12647 match(Set dummy (ClearArray (Binary cnt base) val));
12648 ins_cost(100);
12649 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
12650 format %{ "clear_mem_imm $base , $cnt \n\t" %}
12651 ins_encode %{
12652 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
12653 %}
12654 ins_pipe(pipe_slow);
12655 %}
12656
12657 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12658 rax_RegI result, legRegD tmp1, rFlagsReg cr)
12659 %{
12660 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12661 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12662 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12663
12664 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12665 ins_encode %{
12666 __ string_compare($str1$$Register, $str2$$Register,
12667 $cnt1$$Register, $cnt2$$Register, $result$$Register,
12668 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
12669 %}
12670 ins_pipe( pipe_slow );
12671 %}
12672
14505
14506 ins_cost(300);
14507 format %{ "call_leaf,runtime " %}
14508 ins_encode(clear_avx, Java_To_Runtime(meth));
14509 ins_pipe(pipe_slow);
14510 %}
14511
14512 // Call runtime without safepoint and with vector arguments
14513 instruct CallLeafDirectVector(method meth)
14514 %{
14515 match(CallLeafVector);
14516 effect(USE meth);
14517
14518 ins_cost(300);
14519 format %{ "call_leaf,vector " %}
14520 ins_encode(Java_To_Runtime(meth));
14521 ins_pipe(pipe_slow);
14522 %}
14523
14524 // Call runtime without safepoint
14525 // entry point is null, target holds the address to call
14526 instruct CallLeafNoFPInDirect(rRegP target)
14527 %{
14528 predicate(n->as_Call()->entry_point() == nullptr);
14529 match(CallLeafNoFP target);
14530
14531 ins_cost(300);
14532 format %{ "call_leaf_nofp,runtime indirect " %}
14533 ins_encode %{
14534 __ call($target$$Register);
14535 %}
14536
14537 ins_pipe(pipe_slow);
14538 %}
14539
14540 instruct CallLeafNoFPDirect(method meth)
14541 %{
14542 predicate(n->as_Call()->entry_point() != nullptr);
14543 match(CallLeafNoFP);
14544 effect(USE meth);
14545
14546 ins_cost(300);
14547 format %{ "call_leaf_nofp,runtime " %}
14548 ins_encode(clear_avx, Java_To_Runtime(meth));
14549 ins_pipe(pipe_slow);
14550 %}
14551
14552 // Return Instruction
14553 // Remove the return address & jump to it.
14554 // Notice: We always emit a nop after a ret to make sure there is room
14555 // for safepoint patching
14556 instruct Ret()
14557 %{
14558 match(Return);
14559
14560 format %{ "ret" %}
14561 ins_encode %{
14562 __ ret(0);
|