409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429
430 extern RegMask _ANY_REG_mask;
431 extern RegMask _PTR_REG_mask;
432 extern RegMask _PTR_REG_NO_RBP_mask;
433 extern RegMask _PTR_NO_RAX_REG_mask;
434 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
435 extern RegMask _LONG_REG_mask;
436 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
437 extern RegMask _LONG_NO_RCX_REG_mask;
438 extern RegMask _LONG_NO_RBP_R13_REG_mask;
439 extern RegMask _INT_REG_mask;
440 extern RegMask _INT_NO_RAX_RDX_REG_mask;
441 extern RegMask _INT_NO_RCX_REG_mask;
442 extern RegMask _INT_NO_RBP_R13_REG_mask;
443 extern RegMask _FLOAT_REG_mask;
444
445 extern RegMask _STACK_OR_PTR_REG_mask;
446 extern RegMask _STACK_OR_LONG_REG_mask;
447 extern RegMask _STACK_OR_INT_REG_mask;
448
449 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
1834 // idivq (note: must be emitted by the user of this rule)
1835 // <done>
1836 __ idivq($div$$Register);
1837 __ bind(done);
1838 %}
1839
1840 enc_class clear_avx %{
1841 debug_only(int off0 = __ offset());
1842 if (generate_vzeroupper(Compile::current())) {
1843 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1844 // Clear upper bits of YMM registers when current compiled code uses
1845 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1846 __ vzeroupper();
1847 }
1848 debug_only(int off1 = __ offset());
1849 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1850 %}
1851
1852 enc_class Java_To_Runtime(method meth) %{
1853 // No relocation needed
1854 __ mov64(r10, (int64_t) $meth$$method);
1855 __ call(r10);
1856 __ post_call_nop();
1857 %}
1858
1859 enc_class Java_Static_Call(method meth)
1860 %{
1861 // JAVA STATIC CALL
1862 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1863 // determine who we intended to call.
1864 if (!_method) {
1865 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1866 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1867 // The NOP here is purely to ensure that eliding a call to
1868 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1869 __ addr_nop_5();
1870 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1871 } else {
1872 int method_index = resolved_method_index(masm);
1873 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1874 : static_call_Relocation::spec(method_index);
2461 // Constant for byte-wide masking
2462 operand immL_255()
2463 %{
2464 predicate(n->get_long() == 255);
2465 match(ConL);
2466
2467 format %{ %}
2468 interface(CONST_INTER);
2469 %}
2470
2471 // Constant for short-wide masking
2472 operand immL_65535()
2473 %{
2474 predicate(n->get_long() == 65535);
2475 match(ConL);
2476
2477 format %{ %}
2478 interface(CONST_INTER);
2479 %}
2480
2481 operand kReg()
2482 %{
2483 constraint(ALLOC_IN_RC(vectmask_reg));
2484 match(RegVectMask);
2485 format %{%}
2486 interface(REG_INTER);
2487 %}
2488
2489 // Register Operands
2490 // Integer Register
2491 operand rRegI()
2492 %{
2493 constraint(ALLOC_IN_RC(int_reg));
2494 match(RegI);
2495
2496 match(rax_RegI);
2497 match(rbx_RegI);
2498 match(rcx_RegI);
2499 match(rdx_RegI);
2500 match(rdi_RegI);
4421 format %{ "movlpd $dst, $mem\t# double" %}
4422 ins_encode %{
4423 __ movdbl($dst$$XMMRegister, $mem$$Address);
4424 %}
4425 ins_pipe(pipe_slow); // XXX
4426 %}
4427
4428 instruct loadD(regD dst, memory mem)
4429 %{
4430 predicate(UseXmmLoadAndClearUpper);
4431 match(Set dst (LoadD mem));
4432
4433 ins_cost(145); // XXX
4434 format %{ "movsd $dst, $mem\t# double" %}
4435 ins_encode %{
4436 __ movdbl($dst$$XMMRegister, $mem$$Address);
4437 %}
4438 ins_pipe(pipe_slow); // XXX
4439 %}
4440
4441 // max = java.lang.Math.max(float a, float b)
4442 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4443 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4444 match(Set dst (MaxF a b));
4445 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4446 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
4447 ins_encode %{
4448 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4449 %}
4450 ins_pipe( pipe_slow );
4451 %}
4452
4453 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
4454 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4455 match(Set dst (MaxF a b));
4456 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
4457
4458 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
4459 ins_encode %{
4460 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429 extern RegMask _ANY_REG_mask;
430 extern RegMask _PTR_REG_mask;
431 extern RegMask _PTR_REG_NO_RBP_mask;
432 extern RegMask _PTR_NO_RAX_REG_mask;
433 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
434 extern RegMask _LONG_REG_mask;
435 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
436 extern RegMask _LONG_NO_RCX_REG_mask;
437 extern RegMask _LONG_NO_RBP_R13_REG_mask;
438 extern RegMask _INT_REG_mask;
439 extern RegMask _INT_NO_RAX_RDX_REG_mask;
440 extern RegMask _INT_NO_RCX_REG_mask;
441 extern RegMask _INT_NO_RBP_R13_REG_mask;
442 extern RegMask _FLOAT_REG_mask;
443
444 extern RegMask _STACK_OR_PTR_REG_mask;
445 extern RegMask _STACK_OR_LONG_REG_mask;
446 extern RegMask _STACK_OR_INT_REG_mask;
447
448 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
821 st->print("\n\t");
822 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
823 st->print("\n\t");
824 st->print("je fast_entry\t");
825 st->print("\n\t");
826 st->print("call #nmethod_entry_barrier_stub\t");
827 st->print("\n\tfast_entry:");
828 }
829 st->cr();
830 }
831 #endif
832
833 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
834 Compile* C = ra_->C;
835
836 int framesize = C->output()->frame_size_in_bytes();
837 int bangsize = C->output()->bang_size_in_bytes();
838
839 if (C->clinit_barrier_on_entry()) {
840 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
841 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
842
843 Label L_skip_barrier;
844 Register klass = rscratch1;
845
846 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
847 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
848
849 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
850
851 __ bind(L_skip_barrier);
852 }
853
854 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
855
856 C->output()->set_frame_complete(__ offset());
857
858 if (C->has_mach_constant_base_node()) {
859 // NOTE: We set the table base offset here because users might be
860 // emitted before MachConstantBaseNode.
861 ConstantTable& constant_table = C->output()->constant_table();
1833 // idivq (note: must be emitted by the user of this rule)
1834 // <done>
1835 __ idivq($div$$Register);
1836 __ bind(done);
1837 %}
1838
1839 enc_class clear_avx %{
1840 debug_only(int off0 = __ offset());
1841 if (generate_vzeroupper(Compile::current())) {
1842 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1843 // Clear upper bits of YMM registers when current compiled code uses
1844 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1845 __ vzeroupper();
1846 }
1847 debug_only(int off1 = __ offset());
1848 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1849 %}
1850
1851 enc_class Java_To_Runtime(method meth) %{
1852 // No relocation needed
1853 if (AOTCodeCache::is_on_for_write()) {
1854 // Created runtime_call_type relocation when caching code
1855 __ lea(r10, RuntimeAddress((address)$meth$$method));
1856 } else {
1857 __ mov64(r10, (int64_t) $meth$$method);
1858 }
1859 __ call(r10);
1860 __ post_call_nop();
1861 %}
1862
1863 enc_class Java_Static_Call(method meth)
1864 %{
1865 // JAVA STATIC CALL
1866 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1867 // determine who we intended to call.
1868 if (!_method) {
1869 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1870 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1871 // The NOP here is purely to ensure that eliding a call to
1872 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1873 __ addr_nop_5();
1874 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1875 } else {
1876 int method_index = resolved_method_index(masm);
1877 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1878 : static_call_Relocation::spec(method_index);
2465 // Constant for byte-wide masking
2466 operand immL_255()
2467 %{
2468 predicate(n->get_long() == 255);
2469 match(ConL);
2470
2471 format %{ %}
2472 interface(CONST_INTER);
2473 %}
2474
2475 // Constant for short-wide masking
2476 operand immL_65535()
2477 %{
2478 predicate(n->get_long() == 65535);
2479 match(ConL);
2480
2481 format %{ %}
2482 interface(CONST_INTER);
2483 %}
2484
2485 // AOT Runtime Constants Address
2486 operand immAOTRuntimeConstantsAddress()
2487 %{
2488 // Check if the address is in the range of AOT Runtime Constants
2489 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
2490 match(ConP);
2491
2492 op_cost(0);
2493 format %{ %}
2494 interface(CONST_INTER);
2495 %}
2496
2497 operand kReg()
2498 %{
2499 constraint(ALLOC_IN_RC(vectmask_reg));
2500 match(RegVectMask);
2501 format %{%}
2502 interface(REG_INTER);
2503 %}
2504
2505 // Register Operands
2506 // Integer Register
2507 operand rRegI()
2508 %{
2509 constraint(ALLOC_IN_RC(int_reg));
2510 match(RegI);
2511
2512 match(rax_RegI);
2513 match(rbx_RegI);
2514 match(rcx_RegI);
2515 match(rdx_RegI);
2516 match(rdi_RegI);
4437 format %{ "movlpd $dst, $mem\t# double" %}
4438 ins_encode %{
4439 __ movdbl($dst$$XMMRegister, $mem$$Address);
4440 %}
4441 ins_pipe(pipe_slow); // XXX
4442 %}
4443
4444 instruct loadD(regD dst, memory mem)
4445 %{
4446 predicate(UseXmmLoadAndClearUpper);
4447 match(Set dst (LoadD mem));
4448
4449 ins_cost(145); // XXX
4450 format %{ "movsd $dst, $mem\t# double" %}
4451 ins_encode %{
4452 __ movdbl($dst$$XMMRegister, $mem$$Address);
4453 %}
4454 ins_pipe(pipe_slow); // XXX
4455 %}
4456
4457 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
4458 %{
4459 match(Set dst con);
4460
4461 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
4462
4463 ins_encode %{
4464 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
4465 %}
4466
4467 ins_pipe(ialu_reg_fat);
4468 %}
4469
4470 // max = java.lang.Math.max(float a, float b)
4471 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4472 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4473 match(Set dst (MaxF a b));
4474 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4475 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
4476 ins_encode %{
4477 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4478 %}
4479 ins_pipe( pipe_slow );
4480 %}
4481
4482 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
4483 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4484 match(Set dst (MaxF a b));
4485 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
4486
4487 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
4488 ins_encode %{
4489 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|