586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 int offset = 13; // movq r10,#addr; callq (r10)
607 if (this->ideal_Opcode() != Op_CallLeafVector) {
608 offset += clear_avx_size();
609 }
610 return offset;
611 }
612 //
613 // Compute padding required for nodes which need alignment
614 //
615
616 // The address of the call instruction needs to be 4-byte aligned to
617 // ensure that it does not span a cache line so that it can be patched.
618 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
619 {
620 current_offset += clear_avx_size(); // skip vzeroupper
621 current_offset += 1; // skip call opcode byte
622 return align_up(current_offset, alignment_required()) - current_offset;
623 }
624
625 // The address of the call instruction needs to be 4-byte aligned to
626 // ensure that it does not span a cache line so that it can be patched.
627 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
628 {
629 current_offset += clear_avx_size(); // skip vzeroupper
630 current_offset += 11; // skip movq instruction + call opcode byte
631 return align_up(current_offset, alignment_required()) - current_offset;
817 st->print("\n\t");
818 st->print("# stack alignment check");
819 #endif
820 }
821 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
863 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
864 }
865 }
866
867 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
868 {
869 return MachNode::size(ra_); // too many variables; just compute it
870 // the hard way
871 }
872
873 int MachPrologNode::reloc() const
874 {
875 return 0; // a large enough number
876 }
877
878 //=============================================================================
879 #ifndef PRODUCT
880 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
881 {
882 Compile* C = ra_->C;
883 if (generate_vzeroupper(C)) {
884 st->print("vzeroupper");
885 st->cr(); st->print("\t");
886 }
887
888 int framesize = C->output()->frame_size_in_bytes();
889 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
890 // Remove word for return adr already pushed
891 // and RBP
892 framesize -= 2*wordSize;
899 st->print_cr("popq rbp");
900 if (do_polling() && C->is_method_compilation()) {
901 st->print("\t");
902 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
903 "ja #safepoint_stub\t"
904 "# Safepoint: poll for GC");
905 }
906 }
907 #endif
908
909 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
910 {
911 Compile* C = ra_->C;
912
913 if (generate_vzeroupper(C)) {
914 // Clear upper bits of YMM registers when current compiled code uses
915 // wide vectors to avoid AVX <-> SSE transition penalty during call.
916 __ vzeroupper();
917 }
918
919 int framesize = C->output()->frame_size_in_bytes();
920 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
921 // Remove word for return adr already pushed
922 // and RBP
923 framesize -= 2*wordSize;
924
925 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
926
927 if (framesize) {
928 __ addq(rsp, framesize);
929 }
930
931 __ popq(rbp);
932
933 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
934 __ reserved_stack_check();
935 }
936
937 if (do_polling() && C->is_method_compilation()) {
938 Label dummy_label;
939 Label* code_stub = &dummy_label;
940 if (!C->output()->in_scratch_emit_size()) {
941 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
942 C->output()->add_stub(stub);
943 code_stub = &stub->entry();
944 }
945 __ relocate(relocInfo::poll_return_type);
946 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
947 }
948 }
949
950 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
951 {
952 return MachNode::size(ra_); // too many variables; just compute it
953 // the hard way
954 }
955
956 int MachEpilogNode::reloc() const
957 {
958 return 2; // a large enough number
959 }
960
961 const Pipeline* MachEpilogNode::pipeline() const
962 {
963 return MachNode::pipeline_class();
964 }
965
966 //=============================================================================
967
968 enum RC {
969 rc_bad,
970 rc_int,
971 rc_kreg,
972 rc_float,
973 rc_stack
974 };
975
1533 int reg = ra_->get_reg_first(this);
1534 st->print("leaq %s, [rsp + #%d]\t# box lock",
1535 Matcher::regName[reg], offset);
1536 }
1537 #endif
1538
1539 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1540 {
1541 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1542 int reg = ra_->get_encode(this);
1543
1544 __ lea(as_Register(reg), Address(rsp, offset));
1545 }
1546
1547 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1548 {
1549 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1550 return (offset < 0x80) ? 5 : 8; // REX
1551 }
1552
1553 //=============================================================================
1554 #ifndef PRODUCT
1555 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1556 {
1557 if (UseCompressedClassPointers) {
1558 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1559 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1560 } else {
1561 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1562 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1563 }
1564 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1565 }
1566 #endif
1567
1568 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1569 {
1570 __ ic_check(InteriorEntryAlignment);
1571 }
1572
1573 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1574 {
1575 return MachNode::size(ra_); // too many variables; just compute it
1576 // the hard way
1577 }
1578
1579
1580 //=============================================================================
1581
1582 bool Matcher::supports_vector_calling_convention(void) {
1583 if (EnableVectorSupport && UseVectorStubs) {
1584 return true;
1585 }
1586 return false;
1587 }
1588
1589 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1590 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1591 int lo = XMM0_num;
1592 int hi = XMM0b_num;
1593 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1594 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1595 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1596 return OptoRegPair(hi, lo);
1597 }
1598
1599 // Is this branch offset short enough that a short branch can be used?
3024 %}
3025 %}
3026
3027 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3028 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3029 %{
3030 constraint(ALLOC_IN_RC(ptr_reg));
3031 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3032 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3033
3034 op_cost(10);
3035 format %{"[$reg + $off + $idx << $scale]" %}
3036 interface(MEMORY_INTER) %{
3037 base($reg);
3038 index($idx);
3039 scale($scale);
3040 disp($off);
3041 %}
3042 %}
3043
3044 // Indirect Narrow Oop Plus Offset Operand
3045 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3046 // we can't free r12 even with CompressedOops::base() == nullptr.
3047 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3048 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3049 constraint(ALLOC_IN_RC(ptr_reg));
3050 match(AddP (DecodeN reg) off);
3051
3052 op_cost(10);
3053 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3054 interface(MEMORY_INTER) %{
3055 base(0xc); // R12
3056 index($reg);
3057 scale(0x3);
3058 disp($off);
3059 %}
3060 %}
3061
3062 // Indirect Memory Operand
3063 operand indirectNarrow(rRegN reg)
3370 equal(0x4, "e");
3371 not_equal(0x5, "ne");
3372 less(0x2, "b");
3373 greater_equal(0x3, "ae");
3374 less_equal(0x6, "be");
3375 greater(0x7, "a");
3376 overflow(0x0, "o");
3377 no_overflow(0x1, "no");
3378 %}
3379 %}
3380
3381 //----------OPERAND CLASSES----------------------------------------------------
3382 // Operand Classes are groups of operands that are used as to simplify
3383 // instruction definitions by not requiring the AD writer to specify separate
3384 // instructions for every form of operand when the instruction accepts
3385 // multiple operand types with the same basic encoding and format. The classic
3386 // case of this is memory operands.
3387
3388 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3389 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3390 indCompressedOopOffset,
3391 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3392 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3393 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3394
3395 //----------PIPELINE-----------------------------------------------------------
3396 // Rules which define the behavior of the target architectures pipeline.
3397 pipeline %{
3398
3399 //----------ATTRIBUTES---------------------------------------------------------
3400 attributes %{
3401 variable_size_instructions; // Fixed size instructions
3402 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3403 instruction_unit_size = 1; // An instruction is 1 bytes long
3404 instruction_fetch_unit_size = 16; // The processor fetches one line
3405 instruction_fetch_units = 1; // of 16 bytes
3406
3407 // List of nop instructions
3408 nops( MachNop );
3409 %}
3410
5878 format %{ "MEMBAR-storestore (empty encoding)" %}
5879 ins_encode( );
5880 ins_pipe(empty);
5881 %}
5882
5883 //----------Move Instructions--------------------------------------------------
5884
5885 instruct castX2P(rRegP dst, rRegL src)
5886 %{
5887 match(Set dst (CastX2P src));
5888
5889 format %{ "movq $dst, $src\t# long->ptr" %}
5890 ins_encode %{
5891 if ($dst$$reg != $src$$reg) {
5892 __ movptr($dst$$Register, $src$$Register);
5893 }
5894 %}
5895 ins_pipe(ialu_reg_reg); // XXX
5896 %}
5897
5898 instruct castP2X(rRegL dst, rRegP src)
5899 %{
5900 match(Set dst (CastP2X src));
5901
5902 format %{ "movq $dst, $src\t# ptr -> long" %}
5903 ins_encode %{
5904 if ($dst$$reg != $src$$reg) {
5905 __ movptr($dst$$Register, $src$$Register);
5906 }
5907 %}
5908 ins_pipe(ialu_reg_reg); // XXX
5909 %}
5910
5911 // Convert oop into int for vectors alignment masking
5912 instruct convP2I(rRegI dst, rRegP src)
5913 %{
5914 match(Set dst (ConvL2I (CastP2X src)));
5915
5916 format %{ "movl $dst, $src\t# ptr -> int" %}
5917 ins_encode %{
10426 effect(DEF dst, USE src);
10427 ins_cost(100);
10428 format %{ "movd $dst,$src\t# MoveI2F" %}
10429 ins_encode %{
10430 __ movdl($dst$$XMMRegister, $src$$Register);
10431 %}
10432 ins_pipe( pipe_slow );
10433 %}
10434
10435 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10436 match(Set dst (MoveL2D src));
10437 effect(DEF dst, USE src);
10438 ins_cost(100);
10439 format %{ "movd $dst,$src\t# MoveL2D" %}
10440 ins_encode %{
10441 __ movdq($dst$$XMMRegister, $src$$Register);
10442 %}
10443 ins_pipe( pipe_slow );
10444 %}
10445
10446 // Fast clearing of an array
10447 // Small non-constant lenght ClearArray for non-AVX512 targets.
10448 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10449 Universe dummy, rFlagsReg cr)
10450 %{
10451 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
10452 match(Set dummy (ClearArray cnt base));
10453 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10454
10455 format %{ $$template
10456 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10457 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10458 $$emit$$"jg LARGE\n\t"
10459 $$emit$$"dec rcx\n\t"
10460 $$emit$$"js DONE\t# Zero length\n\t"
10461 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10462 $$emit$$"dec rcx\n\t"
10463 $$emit$$"jge LOOP\n\t"
10464 $$emit$$"jmp DONE\n\t"
10465 $$emit$$"# LARGE:\n\t"
10466 if (UseFastStosb) {
10467 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10468 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10469 } else if (UseXMMForObjInit) {
10470 $$emit$$"mov rdi,rax\n\t"
10471 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10472 $$emit$$"jmpq L_zero_64_bytes\n\t"
10473 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10481 $$emit$$"jl L_tail\n\t"
10482 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10483 $$emit$$"add 0x20,rax\n\t"
10484 $$emit$$"sub 0x4,rcx\n\t"
10485 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10486 $$emit$$"add 0x4,rcx\n\t"
10487 $$emit$$"jle L_end\n\t"
10488 $$emit$$"dec rcx\n\t"
10489 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10490 $$emit$$"vmovq xmm0,(rax)\n\t"
10491 $$emit$$"add 0x8,rax\n\t"
10492 $$emit$$"dec rcx\n\t"
10493 $$emit$$"jge L_sloop\n\t"
10494 $$emit$$"# L_end:\n\t"
10495 } else {
10496 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10497 }
10498 $$emit$$"# DONE"
10499 %}
10500 ins_encode %{
10501 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10502 $tmp$$XMMRegister, false, knoreg);
10503 %}
10504 ins_pipe(pipe_slow);
10505 %}
10506
10507 // Small non-constant length ClearArray for AVX512 targets.
10508 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10509 Universe dummy, rFlagsReg cr)
10510 %{
10511 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
10512 match(Set dummy (ClearArray cnt base));
10513 ins_cost(125);
10514 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10515
10516 format %{ $$template
10517 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10518 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10519 $$emit$$"jg LARGE\n\t"
10520 $$emit$$"dec rcx\n\t"
10521 $$emit$$"js DONE\t# Zero length\n\t"
10522 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10523 $$emit$$"dec rcx\n\t"
10524 $$emit$$"jge LOOP\n\t"
10525 $$emit$$"jmp DONE\n\t"
10526 $$emit$$"# LARGE:\n\t"
10527 if (UseFastStosb) {
10528 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10529 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10530 } else if (UseXMMForObjInit) {
10531 $$emit$$"mov rdi,rax\n\t"
10532 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10533 $$emit$$"jmpq L_zero_64_bytes\n\t"
10534 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10542 $$emit$$"jl L_tail\n\t"
10543 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10544 $$emit$$"add 0x20,rax\n\t"
10545 $$emit$$"sub 0x4,rcx\n\t"
10546 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10547 $$emit$$"add 0x4,rcx\n\t"
10548 $$emit$$"jle L_end\n\t"
10549 $$emit$$"dec rcx\n\t"
10550 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10551 $$emit$$"vmovq xmm0,(rax)\n\t"
10552 $$emit$$"add 0x8,rax\n\t"
10553 $$emit$$"dec rcx\n\t"
10554 $$emit$$"jge L_sloop\n\t"
10555 $$emit$$"# L_end:\n\t"
10556 } else {
10557 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10558 }
10559 $$emit$$"# DONE"
10560 %}
10561 ins_encode %{
10562 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10563 $tmp$$XMMRegister, false, $ktmp$$KRegister);
10564 %}
10565 ins_pipe(pipe_slow);
10566 %}
10567
10568 // Large non-constant length ClearArray for non-AVX512 targets.
10569 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10570 Universe dummy, rFlagsReg cr)
10571 %{
10572 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
10573 match(Set dummy (ClearArray cnt base));
10574 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10575
10576 format %{ $$template
10577 if (UseFastStosb) {
10578 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10579 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10580 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10581 } else if (UseXMMForObjInit) {
10582 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10583 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10584 $$emit$$"jmpq L_zero_64_bytes\n\t"
10585 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10586 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10587 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10588 $$emit$$"add 0x40,rax\n\t"
10589 $$emit$$"# L_zero_64_bytes:\n\t"
10590 $$emit$$"sub 0x8,rcx\n\t"
10591 $$emit$$"jge L_loop\n\t"
10592 $$emit$$"add 0x4,rcx\n\t"
10593 $$emit$$"jl L_tail\n\t"
10594 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10595 $$emit$$"add 0x20,rax\n\t"
10596 $$emit$$"sub 0x4,rcx\n\t"
10597 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10598 $$emit$$"add 0x4,rcx\n\t"
10599 $$emit$$"jle L_end\n\t"
10600 $$emit$$"dec rcx\n\t"
10601 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10602 $$emit$$"vmovq xmm0,(rax)\n\t"
10603 $$emit$$"add 0x8,rax\n\t"
10604 $$emit$$"dec rcx\n\t"
10605 $$emit$$"jge L_sloop\n\t"
10606 $$emit$$"# L_end:\n\t"
10607 } else {
10608 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10609 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10610 }
10611 %}
10612 ins_encode %{
10613 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10614 $tmp$$XMMRegister, true, knoreg);
10615 %}
10616 ins_pipe(pipe_slow);
10617 %}
10618
10619 // Large non-constant length ClearArray for AVX512 targets.
10620 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
10621 Universe dummy, rFlagsReg cr)
10622 %{
10623 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
10624 match(Set dummy (ClearArray cnt base));
10625 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
10626
10627 format %{ $$template
10628 if (UseFastStosb) {
10629 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10630 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10631 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10632 } else if (UseXMMForObjInit) {
10633 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10634 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10635 $$emit$$"jmpq L_zero_64_bytes\n\t"
10636 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10637 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10638 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10639 $$emit$$"add 0x40,rax\n\t"
10640 $$emit$$"# L_zero_64_bytes:\n\t"
10641 $$emit$$"sub 0x8,rcx\n\t"
10642 $$emit$$"jge L_loop\n\t"
10643 $$emit$$"add 0x4,rcx\n\t"
10644 $$emit$$"jl L_tail\n\t"
10645 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10646 $$emit$$"add 0x20,rax\n\t"
10647 $$emit$$"sub 0x4,rcx\n\t"
10648 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10649 $$emit$$"add 0x4,rcx\n\t"
10650 $$emit$$"jle L_end\n\t"
10651 $$emit$$"dec rcx\n\t"
10652 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10653 $$emit$$"vmovq xmm0,(rax)\n\t"
10654 $$emit$$"add 0x8,rax\n\t"
10655 $$emit$$"dec rcx\n\t"
10656 $$emit$$"jge L_sloop\n\t"
10657 $$emit$$"# L_end:\n\t"
10658 } else {
10659 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10660 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10661 }
10662 %}
10663 ins_encode %{
10664 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10665 $tmp$$XMMRegister, true, $ktmp$$KRegister);
10666 %}
10667 ins_pipe(pipe_slow);
10668 %}
10669
10670 // Small constant length ClearArray for AVX512 targets.
10671 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
10672 %{
10673 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
10674 match(Set dummy (ClearArray cnt base));
10675 ins_cost(100);
10676 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
10677 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10678 ins_encode %{
10679 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10680 %}
10681 ins_pipe(pipe_slow);
10682 %}
10683
10684 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10685 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10686 %{
10687 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10688 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10689 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10690
10691 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10692 ins_encode %{
10693 __ string_compare($str1$$Register, $str2$$Register,
10694 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10695 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10696 %}
10697 ins_pipe( pipe_slow );
10698 %}
10699
12477
12478 ins_cost(300);
12479 format %{ "call_leaf,runtime " %}
12480 ins_encode(clear_avx, Java_To_Runtime(meth));
12481 ins_pipe(pipe_slow);
12482 %}
12483
12484 // Call runtime without safepoint and with vector arguments
12485 instruct CallLeafDirectVector(method meth)
12486 %{
12487 match(CallLeafVector);
12488 effect(USE meth);
12489
12490 ins_cost(300);
12491 format %{ "call_leaf,vector " %}
12492 ins_encode(Java_To_Runtime(meth));
12493 ins_pipe(pipe_slow);
12494 %}
12495
12496 // Call runtime without safepoint
12497 instruct CallLeafNoFPDirect(method meth)
12498 %{
12499 match(CallLeafNoFP);
12500 effect(USE meth);
12501
12502 ins_cost(300);
12503 format %{ "call_leaf_nofp,runtime " %}
12504 ins_encode(clear_avx, Java_To_Runtime(meth));
12505 ins_pipe(pipe_slow);
12506 %}
12507
12508 // Return Instruction
12509 // Remove the return address & jump to it.
12510 // Notice: We always emit a nop after a ret to make sure there is room
12511 // for safepoint patching
12512 instruct Ret()
12513 %{
12514 match(Return);
12515
12516 format %{ "ret" %}
12517 ins_encode %{
12518 __ ret(0);
|
586 }
587
588 // !!!!! Special hack to get all types of calls to specify the byte offset
589 // from the start of the call to the point where the return address
590 // will point.
591 int MachCallStaticJavaNode::ret_addr_offset()
592 {
593 int offset = 5; // 5 bytes from start of call to where return address points
594 offset += clear_avx_size();
595 return offset;
596 }
597
598 int MachCallDynamicJavaNode::ret_addr_offset()
599 {
600 int offset = 15; // 15 bytes from start of call to where return address points
601 offset += clear_avx_size();
602 return offset;
603 }
604
605 int MachCallRuntimeNode::ret_addr_offset() {
606 if (_entry_point == nullptr) {
607 // CallLeafNoFPInDirect
608 return 3; // callq (register)
609 }
610 int offset = 13; // movq r10,#addr; callq (r10)
611 if (this->ideal_Opcode() != Op_CallLeafVector) {
612 offset += clear_avx_size();
613 }
614 return offset;
615 }
616
617 //
618 // Compute padding required for nodes which need alignment
619 //
620
621 // The address of the call instruction needs to be 4-byte aligned to
622 // ensure that it does not span a cache line so that it can be patched.
623 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
624 {
625 current_offset += clear_avx_size(); // skip vzeroupper
626 current_offset += 1; // skip call opcode byte
627 return align_up(current_offset, alignment_required()) - current_offset;
628 }
629
630 // The address of the call instruction needs to be 4-byte aligned to
631 // ensure that it does not span a cache line so that it can be patched.
632 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
633 {
634 current_offset += clear_avx_size(); // skip vzeroupper
635 current_offset += 11; // skip movq instruction + call opcode byte
636 return align_up(current_offset, alignment_required()) - current_offset;
822 st->print("\n\t");
823 st->print("# stack alignment check");
824 #endif
825 }
826 if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
827 st->print("\n\t");
828 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
829 st->print("\n\t");
830 st->print("je fast_entry\t");
831 st->print("\n\t");
832 st->print("call #nmethod_entry_barrier_stub\t");
833 st->print("\n\tfast_entry:");
834 }
835 st->cr();
836 }
837 #endif
838
839 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
840 Compile* C = ra_->C;
841
842 __ verified_entry(C);
843
844 if (ra_->C->stub_function() == nullptr) {
845 __ entry_barrier();
846 }
847
848 if (!Compile::current()->output()->in_scratch_emit_size()) {
849 __ bind(*_verified_entry);
850 }
851
852 C->output()->set_frame_complete(__ offset());
853
854 if (C->has_mach_constant_base_node()) {
855 // NOTE: We set the table base offset here because users might be
856 // emitted before MachConstantBaseNode.
857 ConstantTable& constant_table = C->output()->constant_table();
858 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
859 }
860 }
861
862 int MachPrologNode::reloc() const
863 {
864 return 0; // a large enough number
865 }
866
867 //=============================================================================
868 #ifndef PRODUCT
869 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
870 {
871 Compile* C = ra_->C;
872 if (generate_vzeroupper(C)) {
873 st->print("vzeroupper");
874 st->cr(); st->print("\t");
875 }
876
877 int framesize = C->output()->frame_size_in_bytes();
878 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
879 // Remove word for return adr already pushed
880 // and RBP
881 framesize -= 2*wordSize;
888 st->print_cr("popq rbp");
889 if (do_polling() && C->is_method_compilation()) {
890 st->print("\t");
891 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
892 "ja #safepoint_stub\t"
893 "# Safepoint: poll for GC");
894 }
895 }
896 #endif
897
898 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
899 {
900 Compile* C = ra_->C;
901
902 if (generate_vzeroupper(C)) {
903 // Clear upper bits of YMM registers when current compiled code uses
904 // wide vectors to avoid AVX <-> SSE transition penalty during call.
905 __ vzeroupper();
906 }
907
908 // Subtract two words to account for return address and rbp
909 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
910 __ remove_frame(initial_framesize, C->needs_stack_repair());
911
912 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
913 __ reserved_stack_check();
914 }
915
916 if (do_polling() && C->is_method_compilation()) {
917 Label dummy_label;
918 Label* code_stub = &dummy_label;
919 if (!C->output()->in_scratch_emit_size()) {
920 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
921 C->output()->add_stub(stub);
922 code_stub = &stub->entry();
923 }
924 __ relocate(relocInfo::poll_return_type);
925 __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
926 }
927 }
928
929 int MachEpilogNode::reloc() const
930 {
931 return 2; // a large enough number
932 }
933
934 const Pipeline* MachEpilogNode::pipeline() const
935 {
936 return MachNode::pipeline_class();
937 }
938
939 //=============================================================================
940
941 enum RC {
942 rc_bad,
943 rc_int,
944 rc_kreg,
945 rc_float,
946 rc_stack
947 };
948
1506 int reg = ra_->get_reg_first(this);
1507 st->print("leaq %s, [rsp + #%d]\t# box lock",
1508 Matcher::regName[reg], offset);
1509 }
1510 #endif
1511
1512 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1513 {
1514 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1515 int reg = ra_->get_encode(this);
1516
1517 __ lea(as_Register(reg), Address(rsp, offset));
1518 }
1519
1520 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1521 {
1522 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1523 return (offset < 0x80) ? 5 : 8; // REX
1524 }
1525
1526 //=============================================================================
1527 #ifndef PRODUCT
1528 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1529 {
1530 st->print_cr("MachVEPNode");
1531 }
1532 #endif
1533
1534 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1535 {
1536 CodeBuffer* cbuf = masm->code();
1537 uint insts_size = cbuf->insts_size();
1538 if (!_verified) {
1539 __ ic_check(1);
1540 } else {
1541 // TODO 8284443 Avoid creation of temporary frame
1542 if (ra_->C->stub_function() == nullptr) {
1543 __ verified_entry(ra_->C, 0);
1544 __ entry_barrier();
1545 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
1546 __ remove_frame(initial_framesize, false);
1547 }
1548 // Unpack inline type args passed as oop and then jump to
1549 // the verified entry point (skipping the unverified entry).
1550 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
1551 // Emit code for verified entry and save increment for stack repair on return
1552 __ verified_entry(ra_->C, sp_inc);
1553 if (Compile::current()->output()->in_scratch_emit_size()) {
1554 Label dummy_verified_entry;
1555 __ jmp(dummy_verified_entry);
1556 } else {
1557 __ jmp(*_verified_entry);
1558 }
1559 }
1560 /* WARNING these NOPs are critical so that verified entry point is properly
1561 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1562 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
1563 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1564 if (nops_cnt > 0) {
1565 __ nop(nops_cnt);
1566 }
1567 }
1568
1569 //=============================================================================
1570 #ifndef PRODUCT
1571 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1572 {
1573 if (UseCompressedClassPointers) {
1574 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1575 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1576 } else {
1577 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1578 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
1579 }
1580 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1581 }
1582 #endif
1583
1584 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1585 {
1586 __ ic_check(InteriorEntryAlignment);
1587 }
1588
1589 //=============================================================================
1590
1591 bool Matcher::supports_vector_calling_convention(void) {
1592 if (EnableVectorSupport && UseVectorStubs) {
1593 return true;
1594 }
1595 return false;
1596 }
1597
1598 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1599 assert(EnableVectorSupport && UseVectorStubs, "sanity");
1600 int lo = XMM0_num;
1601 int hi = XMM0b_num;
1602 if (ideal_reg == Op_VecX) hi = XMM0d_num;
1603 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
1604 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
1605 return OptoRegPair(hi, lo);
1606 }
1607
1608 // Is this branch offset short enough that a short branch can be used?
3033 %}
3034 %}
3035
3036 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3037 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3038 %{
3039 constraint(ALLOC_IN_RC(ptr_reg));
3040 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3041 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3042
3043 op_cost(10);
3044 format %{"[$reg + $off + $idx << $scale]" %}
3045 interface(MEMORY_INTER) %{
3046 base($reg);
3047 index($idx);
3048 scale($scale);
3049 disp($off);
3050 %}
3051 %}
3052
3053 // Indirect Narrow Oop Operand
3054 operand indCompressedOop(rRegN reg) %{
3055 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3056 constraint(ALLOC_IN_RC(ptr_reg));
3057 match(DecodeN reg);
3058
3059 op_cost(10);
3060 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3061 interface(MEMORY_INTER) %{
3062 base(0xc); // R12
3063 index($reg);
3064 scale(0x3);
3065 disp(0x0);
3066 %}
3067 %}
3068
3069 // Indirect Narrow Oop Plus Offset Operand
3070 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3071 // we can't free r12 even with CompressedOops::base() == nullptr.
3072 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3073 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3074 constraint(ALLOC_IN_RC(ptr_reg));
3075 match(AddP (DecodeN reg) off);
3076
3077 op_cost(10);
3078 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3079 interface(MEMORY_INTER) %{
3080 base(0xc); // R12
3081 index($reg);
3082 scale(0x3);
3083 disp($off);
3084 %}
3085 %}
3086
3087 // Indirect Memory Operand
3088 operand indirectNarrow(rRegN reg)
3395 equal(0x4, "e");
3396 not_equal(0x5, "ne");
3397 less(0x2, "b");
3398 greater_equal(0x3, "ae");
3399 less_equal(0x6, "be");
3400 greater(0x7, "a");
3401 overflow(0x0, "o");
3402 no_overflow(0x1, "no");
3403 %}
3404 %}
3405
3406 //----------OPERAND CLASSES----------------------------------------------------
3407 // Operand Classes are groups of operands that are used as to simplify
3408 // instruction definitions by not requiring the AD writer to specify separate
3409 // instructions for every form of operand when the instruction accepts
3410 // multiple operand types with the same basic encoding and format. The classic
3411 // case of this is memory operands.
3412
3413 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
3414 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
3415 indCompressedOop, indCompressedOopOffset,
3416 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
3417 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
3418 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
3419
3420 //----------PIPELINE-----------------------------------------------------------
3421 // Rules which define the behavior of the target architectures pipeline.
3422 pipeline %{
3423
3424 //----------ATTRIBUTES---------------------------------------------------------
3425 attributes %{
3426 variable_size_instructions; // Fixed size instructions
3427 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
3428 instruction_unit_size = 1; // An instruction is 1 bytes long
3429 instruction_fetch_unit_size = 16; // The processor fetches one line
3430 instruction_fetch_units = 1; // of 16 bytes
3431
3432 // List of nop instructions
3433 nops( MachNop );
3434 %}
3435
5903 format %{ "MEMBAR-storestore (empty encoding)" %}
5904 ins_encode( );
5905 ins_pipe(empty);
5906 %}
5907
5908 //----------Move Instructions--------------------------------------------------
5909
5910 instruct castX2P(rRegP dst, rRegL src)
5911 %{
5912 match(Set dst (CastX2P src));
5913
5914 format %{ "movq $dst, $src\t# long->ptr" %}
5915 ins_encode %{
5916 if ($dst$$reg != $src$$reg) {
5917 __ movptr($dst$$Register, $src$$Register);
5918 }
5919 %}
5920 ins_pipe(ialu_reg_reg); // XXX
5921 %}
5922
5923 instruct castN2X(rRegL dst, rRegN src)
5924 %{
5925 match(Set dst (CastP2X src));
5926
5927 format %{ "movq $dst, $src\t# ptr -> long" %}
5928 ins_encode %{
5929 if ($dst$$reg != $src$$reg) {
5930 __ movptr($dst$$Register, $src$$Register);
5931 }
5932 %}
5933 ins_pipe(ialu_reg_reg); // XXX
5934 %}
5935
5936 instruct castP2X(rRegL dst, rRegP src)
5937 %{
5938 match(Set dst (CastP2X src));
5939
5940 format %{ "movq $dst, $src\t# ptr -> long" %}
5941 ins_encode %{
5942 if ($dst$$reg != $src$$reg) {
5943 __ movptr($dst$$Register, $src$$Register);
5944 }
5945 %}
5946 ins_pipe(ialu_reg_reg); // XXX
5947 %}
5948
5949 // Convert oop into int for vectors alignment masking
5950 instruct convP2I(rRegI dst, rRegP src)
5951 %{
5952 match(Set dst (ConvL2I (CastP2X src)));
5953
5954 format %{ "movl $dst, $src\t# ptr -> int" %}
5955 ins_encode %{
10464 effect(DEF dst, USE src);
10465 ins_cost(100);
10466 format %{ "movd $dst,$src\t# MoveI2F" %}
10467 ins_encode %{
10468 __ movdl($dst$$XMMRegister, $src$$Register);
10469 %}
10470 ins_pipe( pipe_slow );
10471 %}
10472
10473 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10474 match(Set dst (MoveL2D src));
10475 effect(DEF dst, USE src);
10476 ins_cost(100);
10477 format %{ "movd $dst,$src\t# MoveL2D" %}
10478 ins_encode %{
10479 __ movdq($dst$$XMMRegister, $src$$Register);
10480 %}
10481 ins_pipe( pipe_slow );
10482 %}
10483
10484
10485 // Fast clearing of an array
10486 // Small non-constant lenght ClearArray for non-AVX512 targets.
10487 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10488 Universe dummy, rFlagsReg cr)
10489 %{
10490 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10491 match(Set dummy (ClearArray (Binary cnt base) val));
10492 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10493
10494 format %{ $$template
10495 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10496 $$emit$$"jg LARGE\n\t"
10497 $$emit$$"dec rcx\n\t"
10498 $$emit$$"js DONE\t# Zero length\n\t"
10499 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10500 $$emit$$"dec rcx\n\t"
10501 $$emit$$"jge LOOP\n\t"
10502 $$emit$$"jmp DONE\n\t"
10503 $$emit$$"# LARGE:\n\t"
10504 if (UseFastStosb) {
10505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10507 } else if (UseXMMForObjInit) {
10508 $$emit$$"movdq $tmp, $val\n\t"
10509 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10510 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10511 $$emit$$"jmpq L_zero_64_bytes\n\t"
10512 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10513 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10514 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10515 $$emit$$"add 0x40,rax\n\t"
10516 $$emit$$"# L_zero_64_bytes:\n\t"
10517 $$emit$$"sub 0x8,rcx\n\t"
10518 $$emit$$"jge L_loop\n\t"
10519 $$emit$$"add 0x4,rcx\n\t"
10520 $$emit$$"jl L_tail\n\t"
10521 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10522 $$emit$$"add 0x20,rax\n\t"
10523 $$emit$$"sub 0x4,rcx\n\t"
10524 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10525 $$emit$$"add 0x4,rcx\n\t"
10526 $$emit$$"jle L_end\n\t"
10527 $$emit$$"dec rcx\n\t"
10528 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10529 $$emit$$"vmovq xmm0,(rax)\n\t"
10530 $$emit$$"add 0x8,rax\n\t"
10531 $$emit$$"dec rcx\n\t"
10532 $$emit$$"jge L_sloop\n\t"
10533 $$emit$$"# L_end:\n\t"
10534 } else {
10535 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10536 }
10537 $$emit$$"# DONE"
10538 %}
10539 ins_encode %{
10540 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10541 $tmp$$XMMRegister, false, false);
10542 %}
10543 ins_pipe(pipe_slow);
10544 %}
10545
10546 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10547 Universe dummy, rFlagsReg cr)
10548 %{
10549 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10550 match(Set dummy (ClearArray (Binary cnt base) val));
10551 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10552
10553 format %{ $$template
10554 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10555 $$emit$$"jg LARGE\n\t"
10556 $$emit$$"dec rcx\n\t"
10557 $$emit$$"js DONE\t# Zero length\n\t"
10558 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10559 $$emit$$"dec rcx\n\t"
10560 $$emit$$"jge LOOP\n\t"
10561 $$emit$$"jmp DONE\n\t"
10562 $$emit$$"# LARGE:\n\t"
10563 if (UseXMMForObjInit) {
10564 $$emit$$"movdq $tmp, $val\n\t"
10565 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10566 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10567 $$emit$$"jmpq L_zero_64_bytes\n\t"
10568 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10569 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10570 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10571 $$emit$$"add 0x40,rax\n\t"
10572 $$emit$$"# L_zero_64_bytes:\n\t"
10573 $$emit$$"sub 0x8,rcx\n\t"
10574 $$emit$$"jge L_loop\n\t"
10575 $$emit$$"add 0x4,rcx\n\t"
10576 $$emit$$"jl L_tail\n\t"
10577 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10578 $$emit$$"add 0x20,rax\n\t"
10579 $$emit$$"sub 0x4,rcx\n\t"
10580 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10581 $$emit$$"add 0x4,rcx\n\t"
10582 $$emit$$"jle L_end\n\t"
10583 $$emit$$"dec rcx\n\t"
10584 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10585 $$emit$$"vmovq xmm0,(rax)\n\t"
10586 $$emit$$"add 0x8,rax\n\t"
10587 $$emit$$"dec rcx\n\t"
10588 $$emit$$"jge L_sloop\n\t"
10589 $$emit$$"# L_end:\n\t"
10590 } else {
10591 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10592 }
10593 $$emit$$"# DONE"
10594 %}
10595 ins_encode %{
10596 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10597 $tmp$$XMMRegister, false, true);
10598 %}
10599 ins_pipe(pipe_slow);
10600 %}
10601
10602 // Small non-constant length ClearArray for AVX512 targets.
10603 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10604 Universe dummy, rFlagsReg cr)
10605 %{
10606 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10607 match(Set dummy (ClearArray (Binary cnt base) val));
10608 ins_cost(125);
10609 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10610
10611 format %{ $$template
10612 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10613 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10614 $$emit$$"jg LARGE\n\t"
10615 $$emit$$"dec rcx\n\t"
10616 $$emit$$"js DONE\t# Zero length\n\t"
10617 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10618 $$emit$$"dec rcx\n\t"
10619 $$emit$$"jge LOOP\n\t"
10620 $$emit$$"jmp DONE\n\t"
10621 $$emit$$"# LARGE:\n\t"
10622 if (UseFastStosb) {
10623 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10624 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10625 } else if (UseXMMForObjInit) {
10626 $$emit$$"mov rdi,rax\n\t"
10627 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10628 $$emit$$"jmpq L_zero_64_bytes\n\t"
10629 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10637 $$emit$$"jl L_tail\n\t"
10638 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10639 $$emit$$"add 0x20,rax\n\t"
10640 $$emit$$"sub 0x4,rcx\n\t"
10641 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10642 $$emit$$"add 0x4,rcx\n\t"
10643 $$emit$$"jle L_end\n\t"
10644 $$emit$$"dec rcx\n\t"
10645 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10646 $$emit$$"vmovq xmm0,(rax)\n\t"
10647 $$emit$$"add 0x8,rax\n\t"
10648 $$emit$$"dec rcx\n\t"
10649 $$emit$$"jge L_sloop\n\t"
10650 $$emit$$"# L_end:\n\t"
10651 } else {
10652 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10653 }
10654 $$emit$$"# DONE"
10655 %}
10656 ins_encode %{
10657 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10658 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10659 %}
10660 ins_pipe(pipe_slow);
10661 %}
10662
10663 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10664 Universe dummy, rFlagsReg cr)
10665 %{
10666 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10667 match(Set dummy (ClearArray (Binary cnt base) val));
10668 ins_cost(125);
10669 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10670
10671 format %{ $$template
10672 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10673 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10674 $$emit$$"jg LARGE\n\t"
10675 $$emit$$"dec rcx\n\t"
10676 $$emit$$"js DONE\t# Zero length\n\t"
10677 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10678 $$emit$$"dec rcx\n\t"
10679 $$emit$$"jge LOOP\n\t"
10680 $$emit$$"jmp DONE\n\t"
10681 $$emit$$"# LARGE:\n\t"
10682 if (UseFastStosb) {
10683 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10684 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10685 } else if (UseXMMForObjInit) {
10686 $$emit$$"mov rdi,rax\n\t"
10687 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10688 $$emit$$"jmpq L_zero_64_bytes\n\t"
10689 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10697 $$emit$$"jl L_tail\n\t"
10698 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10699 $$emit$$"add 0x20,rax\n\t"
10700 $$emit$$"sub 0x4,rcx\n\t"
10701 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10702 $$emit$$"add 0x4,rcx\n\t"
10703 $$emit$$"jle L_end\n\t"
10704 $$emit$$"dec rcx\n\t"
10705 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10706 $$emit$$"vmovq xmm0,(rax)\n\t"
10707 $$emit$$"add 0x8,rax\n\t"
10708 $$emit$$"dec rcx\n\t"
10709 $$emit$$"jge L_sloop\n\t"
10710 $$emit$$"# L_end:\n\t"
10711 } else {
10712 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10713 }
10714 $$emit$$"# DONE"
10715 %}
10716 ins_encode %{
10717 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10718 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10719 %}
10720 ins_pipe(pipe_slow);
10721 %}
10722
10723 // Large non-constant length ClearArray for non-AVX512 targets.
10724 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10725 Universe dummy, rFlagsReg cr)
10726 %{
10727 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10728 match(Set dummy (ClearArray (Binary cnt base) val));
10729 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10730
10731 format %{ $$template
10732 if (UseFastStosb) {
10733 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10734 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10735 } else if (UseXMMForObjInit) {
10736 $$emit$$"movdq $tmp, $val\n\t"
10737 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10738 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10739 $$emit$$"jmpq L_zero_64_bytes\n\t"
10740 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10741 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10742 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10743 $$emit$$"add 0x40,rax\n\t"
10744 $$emit$$"# L_zero_64_bytes:\n\t"
10745 $$emit$$"sub 0x8,rcx\n\t"
10746 $$emit$$"jge L_loop\n\t"
10747 $$emit$$"add 0x4,rcx\n\t"
10748 $$emit$$"jl L_tail\n\t"
10749 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10750 $$emit$$"add 0x20,rax\n\t"
10751 $$emit$$"sub 0x4,rcx\n\t"
10752 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10753 $$emit$$"add 0x4,rcx\n\t"
10754 $$emit$$"jle L_end\n\t"
10755 $$emit$$"dec rcx\n\t"
10756 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10757 $$emit$$"vmovq xmm0,(rax)\n\t"
10758 $$emit$$"add 0x8,rax\n\t"
10759 $$emit$$"dec rcx\n\t"
10760 $$emit$$"jge L_sloop\n\t"
10761 $$emit$$"# L_end:\n\t"
10762 } else {
10763 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10764 }
10765 %}
10766 ins_encode %{
10767 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10768 $tmp$$XMMRegister, true, false);
10769 %}
10770 ins_pipe(pipe_slow);
10771 %}
10772
10773 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10774 Universe dummy, rFlagsReg cr)
10775 %{
10776 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10777 match(Set dummy (ClearArray (Binary cnt base) val));
10778 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10779
10780 format %{ $$template
10781 if (UseXMMForObjInit) {
10782 $$emit$$"movdq $tmp, $val\n\t"
10783 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10784 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10785 $$emit$$"jmpq L_zero_64_bytes\n\t"
10786 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10787 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10788 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10789 $$emit$$"add 0x40,rax\n\t"
10790 $$emit$$"# L_zero_64_bytes:\n\t"
10791 $$emit$$"sub 0x8,rcx\n\t"
10792 $$emit$$"jge L_loop\n\t"
10793 $$emit$$"add 0x4,rcx\n\t"
10794 $$emit$$"jl L_tail\n\t"
10795 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10796 $$emit$$"add 0x20,rax\n\t"
10797 $$emit$$"sub 0x4,rcx\n\t"
10798 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10799 $$emit$$"add 0x4,rcx\n\t"
10800 $$emit$$"jle L_end\n\t"
10801 $$emit$$"dec rcx\n\t"
10802 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10803 $$emit$$"vmovq xmm0,(rax)\n\t"
10804 $$emit$$"add 0x8,rax\n\t"
10805 $$emit$$"dec rcx\n\t"
10806 $$emit$$"jge L_sloop\n\t"
10807 $$emit$$"# L_end:\n\t"
10808 } else {
10809 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10810 }
10811 %}
10812 ins_encode %{
10813 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10814 $tmp$$XMMRegister, true, true);
10815 %}
10816 ins_pipe(pipe_slow);
10817 %}
10818
10819 // Large non-constant length ClearArray for AVX512 targets.
10820 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10821 Universe dummy, rFlagsReg cr)
10822 %{
10823 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10824 match(Set dummy (ClearArray (Binary cnt base) val));
10825 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10826
10827 format %{ $$template
10828 if (UseFastStosb) {
10829 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10830 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10831 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10832 } else if (UseXMMForObjInit) {
10833 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10834 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10835 $$emit$$"jmpq L_zero_64_bytes\n\t"
10836 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10837 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10838 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10839 $$emit$$"add 0x40,rax\n\t"
10840 $$emit$$"# L_zero_64_bytes:\n\t"
10841 $$emit$$"sub 0x8,rcx\n\t"
10842 $$emit$$"jge L_loop\n\t"
10843 $$emit$$"add 0x4,rcx\n\t"
10844 $$emit$$"jl L_tail\n\t"
10845 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10846 $$emit$$"add 0x20,rax\n\t"
10847 $$emit$$"sub 0x4,rcx\n\t"
10848 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10849 $$emit$$"add 0x4,rcx\n\t"
10850 $$emit$$"jle L_end\n\t"
10851 $$emit$$"dec rcx\n\t"
10852 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10853 $$emit$$"vmovq xmm0,(rax)\n\t"
10854 $$emit$$"add 0x8,rax\n\t"
10855 $$emit$$"dec rcx\n\t"
10856 $$emit$$"jge L_sloop\n\t"
10857 $$emit$$"# L_end:\n\t"
10858 } else {
10859 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10860 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10861 }
10862 %}
10863 ins_encode %{
10864 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10865 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
10866 %}
10867 ins_pipe(pipe_slow);
10868 %}
10869
10870 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10871 Universe dummy, rFlagsReg cr)
10872 %{
10873 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10874 match(Set dummy (ClearArray (Binary cnt base) val));
10875 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10876
10877 format %{ $$template
10878 if (UseFastStosb) {
10879 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10880 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10881 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10882 } else if (UseXMMForObjInit) {
10883 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10884 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10885 $$emit$$"jmpq L_zero_64_bytes\n\t"
10886 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10887 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10888 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10889 $$emit$$"add 0x40,rax\n\t"
10890 $$emit$$"# L_zero_64_bytes:\n\t"
10891 $$emit$$"sub 0x8,rcx\n\t"
10892 $$emit$$"jge L_loop\n\t"
10893 $$emit$$"add 0x4,rcx\n\t"
10894 $$emit$$"jl L_tail\n\t"
10895 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10896 $$emit$$"add 0x20,rax\n\t"
10897 $$emit$$"sub 0x4,rcx\n\t"
10898 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10899 $$emit$$"add 0x4,rcx\n\t"
10900 $$emit$$"jle L_end\n\t"
10901 $$emit$$"dec rcx\n\t"
10902 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10903 $$emit$$"vmovq xmm0,(rax)\n\t"
10904 $$emit$$"add 0x8,rax\n\t"
10905 $$emit$$"dec rcx\n\t"
10906 $$emit$$"jge L_sloop\n\t"
10907 $$emit$$"# L_end:\n\t"
10908 } else {
10909 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10910 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10911 }
10912 %}
10913 ins_encode %{
10914 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10915 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
10916 %}
10917 ins_pipe(pipe_slow);
10918 %}
10919
10920 // Small constant length ClearArray for AVX512 targets.
10921 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
10922 %{
10923 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
10924 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
10925 match(Set dummy (ClearArray (Binary cnt base) val));
10926 ins_cost(100);
10927 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
10928 format %{ "clear_mem_imm $base , $cnt \n\t" %}
10929 ins_encode %{
10930 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
10931 %}
10932 ins_pipe(pipe_slow);
10933 %}
10934
10935 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10936 rax_RegI result, legRegD tmp1, rFlagsReg cr)
10937 %{
10938 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10939 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10940 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10941
10942 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10943 ins_encode %{
10944 __ string_compare($str1$$Register, $str2$$Register,
10945 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10946 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
10947 %}
10948 ins_pipe( pipe_slow );
10949 %}
10950
12728
12729 ins_cost(300);
12730 format %{ "call_leaf,runtime " %}
12731 ins_encode(clear_avx, Java_To_Runtime(meth));
12732 ins_pipe(pipe_slow);
12733 %}
12734
12735 // Call runtime without safepoint and with vector arguments
12736 instruct CallLeafDirectVector(method meth)
12737 %{
12738 match(CallLeafVector);
12739 effect(USE meth);
12740
12741 ins_cost(300);
12742 format %{ "call_leaf,vector " %}
12743 ins_encode(Java_To_Runtime(meth));
12744 ins_pipe(pipe_slow);
12745 %}
12746
12747 // Call runtime without safepoint
12748 // entry point is null, target holds the address to call
12749 instruct CallLeafNoFPInDirect(rRegP target)
12750 %{
12751 predicate(n->as_Call()->entry_point() == nullptr);
12752 match(CallLeafNoFP target);
12753
12754 ins_cost(300);
12755 format %{ "call_leaf_nofp,runtime indirect " %}
12756 ins_encode %{
12757 __ call($target$$Register);
12758 %}
12759
12760 ins_pipe(pipe_slow);
12761 %}
12762
12763 instruct CallLeafNoFPDirect(method meth)
12764 %{
12765 predicate(n->as_Call()->entry_point() != nullptr);
12766 match(CallLeafNoFP);
12767 effect(USE meth);
12768
12769 ins_cost(300);
12770 format %{ "call_leaf_nofp,runtime " %}
12771 ins_encode(clear_avx, Java_To_Runtime(meth));
12772 ins_pipe(pipe_slow);
12773 %}
12774
12775 // Return Instruction
12776 // Remove the return address & jump to it.
12777 // Notice: We always emit a nop after a ret to make sure there is room
12778 // for safepoint patching
12779 instruct Ret()
12780 %{
12781 match(Return);
12782
12783 format %{ "ret" %}
12784 ins_encode %{
12785 __ ret(0);
|