Skip to content

Commit e91a6ec

Browse files
committed
8347489: RISC-V: Misaligned memory access with COH
Reviewed-by: mli, vkempik
1 parent d699aba commit e91a6ec

File tree

3 files changed

+130
-26
lines changed

3 files changed

+130
-26
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14091409
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
14101410
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
14111411

1412+
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1413+
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
1414+
1415+
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
1416+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1417+
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
1418+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1419+
14121420
BLOCK_COMMENT("string_compare {");
14131421

14141422
// Bizarrely, the counts are passed in bytes, regardless of whether they
@@ -1426,6 +1434,24 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14261434
mv(cnt2, cnt1);
14271435
bind(L);
14281436

1437+
// Load 4 bytes once to compare for alignment before main loop. Note that this
1438+
// is only possible for LL/UU case. We need to resort to load_long_misaligned
1439+
// for both LU and UL cases.
1440+
if (str1_isL == str2_isL) { // LL or UU
1441+
beq(str1, str2, DONE);
1442+
int base_offset = isLL ? base_offset1 : base_offset2;
1443+
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
1444+
mv(t0, minCharsInWord / 2);
1445+
ble(cnt2, t0, SHORT_STRING);
1446+
lwu(tmp1, Address(str1));
1447+
lwu(tmp2, Address(str2));
1448+
bne(tmp1, tmp2, DIFFERENCE);
1449+
addi(str1, str1, 4);
1450+
addi(str2, str2, 4);
1451+
subi(cnt2, cnt2, minCharsInWord / 2);
1452+
}
1453+
}
1454+
14291455
// A very short string
14301456
mv(t0, minCharsInWord);
14311457
ble(cnt2, t0, SHORT_STRING);
@@ -1434,8 +1460,14 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14341460
// load first parts of strings and finish initialization while loading
14351461
{
14361462
if (str1_isL == str2_isL) { // LL or UU
1437-
// check if str1 and str2 is same pointer
1438-
beq(str1, str2, DONE);
1463+
#ifdef ASSERT
1464+
Label align_ok;
1465+
orr(t0, str1, str2);
1466+
andi(t0, t0, 0x7);
1467+
beqz(t0, align_ok);
1468+
stop("bad alignment");
1469+
bind(align_ok);
1470+
#endif
14391471
// load 8 bytes once to compare
14401472
ld(tmp1, Address(str1));
14411473
ld(tmp2, Address(str2));
@@ -1452,7 +1484,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14521484
sub(cnt2, zr, cnt2);
14531485
} else if (isLU) { // LU case
14541486
lwu(tmp1, Address(str1));
1455-
ld(tmp2, Address(str2));
1487+
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
14561488
mv(t0, STUB_THRESHOLD);
14571489
bge(cnt2, t0, STUB);
14581490
subi(cnt2, cnt2, 4);
@@ -1465,11 +1497,11 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14651497
sub(cnt2, zr, cnt2);
14661498
addi(cnt1, cnt1, 4);
14671499
} else { // UL case
1468-
ld(tmp1, Address(str1));
1500+
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
14691501
lwu(tmp2, Address(str2));
14701502
mv(t0, STUB_THRESHOLD);
14711503
bge(cnt2, t0, STUB);
1472-
addi(cnt2, cnt2, -4);
1504+
subi(cnt2, cnt2, 4);
14731505
slli(t0, cnt2, 1);
14741506
sub(cnt1, zr, t0);
14751507
add(str1, str1, t0);
@@ -1486,6 +1518,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14861518
// main loop
14871519
bind(NEXT_WORD);
14881520
if (str1_isL == str2_isL) { // LL or UU
1521+
// both of the two loads are 8-byte aligned
14891522
add(t0, str1, cnt2);
14901523
ld(tmp1, Address(t0));
14911524
add(t0, str2, cnt2);
@@ -1495,7 +1528,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14951528
add(t0, str1, cnt1);
14961529
lwu(tmp1, Address(t0));
14971530
add(t0, str2, cnt2);
1498-
ld(tmp2, Address(t0));
1531+
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
14991532
addi(cnt1, cnt1, 4);
15001533
inflate_lo32(tmp3, tmp1);
15011534
mv(tmp1, tmp3);
@@ -1504,7 +1537,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
15041537
add(t0, str2, cnt2);
15051538
lwu(tmp2, Address(t0));
15061539
add(t0, str1, cnt1);
1507-
ld(tmp1, Address(t0));
1540+
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
15081541
inflate_lo32(tmp3, tmp2);
15091542
mv(tmp2, tmp3);
15101543
addi(cnt1, cnt1, 8);
@@ -1637,6 +1670,9 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
16371670
int length_offset = arrayOopDesc::length_offset_in_bytes();
16381671
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
16391672

1673+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
1674+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1675+
16401676
Register cnt1 = tmp3;
16411677
Register cnt2 = tmp1; // cnt2 only used in array length compare
16421678
Label DONE, SAME, NEXT_WORD, SHORT, TAIL03, TAIL01;
@@ -1660,10 +1696,31 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
16601696

16611697
la(a1, Address(a1, base_offset));
16621698
la(a2, Address(a2, base_offset));
1699+
1700+
// Load 4 bytes once to compare for alignment before main loop.
1701+
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
1702+
subi(cnt1, cnt1, elem_per_word / 2);
1703+
bltz(cnt1, TAIL03);
1704+
lwu(tmp1, Address(a1));
1705+
lwu(tmp2, Address(a2));
1706+
addi(a1, a1, 4);
1707+
addi(a2, a2, 4);
1708+
bne(tmp1, tmp2, DONE);
1709+
}
1710+
16631711
// Check for short strings, i.e. smaller than wordSize.
16641712
subi(cnt1, cnt1, elem_per_word);
16651713
bltz(cnt1, SHORT);
16661714

1715+
#ifdef ASSERT
1716+
Label align_ok;
1717+
orr(t0, a1, a2);
1718+
andi(t0, t0, 0x7);
1719+
beqz(t0, align_ok);
1720+
stop("bad alignment");
1721+
bind(align_ok);
1722+
#endif
1723+
16671724
// Main 8 byte comparison loop.
16681725
bind(NEXT_WORD); {
16691726
ld(tmp1, Address(a1));
@@ -1729,20 +1786,45 @@ void C2_MacroAssembler::arrays_equals(Register a1, Register a2,
17291786
void C2_MacroAssembler::string_equals(Register a1, Register a2,
17301787
Register result, Register cnt1)
17311788
{
1732-
Label SAME, DONE, SHORT, NEXT_WORD;
1789+
Label SAME, DONE, SHORT, NEXT_WORD, TAIL03, TAIL01;
17331790
Register tmp1 = t0;
17341791
Register tmp2 = t1;
17351792

17361793
assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2);
17371794

1795+
int base_offset = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1796+
1797+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
1798+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1799+
17381800
BLOCK_COMMENT("string_equals {");
17391801

17401802
mv(result, false);
17411803

1804+
// Load 4 bytes once to compare for alignment before main loop.
1805+
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
1806+
subi(cnt1, cnt1, 4);
1807+
bltz(cnt1, TAIL03);
1808+
lwu(tmp1, Address(a1));
1809+
lwu(tmp2, Address(a2));
1810+
addi(a1, a1, 4);
1811+
addi(a2, a2, 4);
1812+
bne(tmp1, tmp2, DONE);
1813+
}
1814+
17421815
// Check for short strings, i.e. smaller than wordSize.
17431816
subi(cnt1, cnt1, wordSize);
17441817
bltz(cnt1, SHORT);
17451818

1819+
#ifdef ASSERT
1820+
Label align_ok;
1821+
orr(t0, a1, a2);
1822+
andi(t0, t0, 0x7);
1823+
beqz(t0, align_ok);
1824+
stop("bad alignment");
1825+
bind(align_ok);
1826+
#endif
1827+
17461828
// Main 8 byte comparison loop.
17471829
bind(NEXT_WORD); {
17481830
ld(tmp1, Address(a1));
@@ -1757,8 +1839,6 @@ void C2_MacroAssembler::string_equals(Register a1, Register a2,
17571839
beqz(tmp1, SAME);
17581840

17591841
bind(SHORT);
1760-
Label TAIL03, TAIL01;
1761-
17621842
// 0-7 bytes left.
17631843
test_bit(tmp1, cnt1, 2);
17641844
beqz(tmp1, TAIL03);
@@ -2512,6 +2592,9 @@ void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register resul
25122592
int length_offset = arrayOopDesc::length_offset_in_bytes();
25132593
int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
25142594

2595+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
2596+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
2597+
25152598
BLOCK_COMMENT("arrays_equals_v {");
25162599

25172600
// if (a1 == a2), return true

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4307,7 +4307,7 @@ void MacroAssembler::population_count(Register dst, Register src,
43074307
{
43084308
bind(loop);
43094309
addi(dst, dst, 1);
4310-
addi(tmp2, tmp1, -1);
4310+
subi(tmp2, tmp1, 1);
43114311
andr(tmp1, tmp1, tmp2);
43124312
bnez(tmp1, loop);
43134313
}

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,17 +2449,28 @@ class StubGenerator: public StubCodeGenerator {
24492449
}
24502450

24512451
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
2452-
void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
2452+
void compare_string_8_x_LU(Register tmpL, Register tmpU,
2453+
Register strL, Register strU, Label& DIFF) {
24532454
const Register tmp = x30, tmpLval = x12;
2455+
2456+
int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
2457+
2458+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
2459+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
2460+
2461+
// strL is 8-byte aligned
24542462
__ ld(tmpLval, Address(strL));
24552463
__ addi(strL, strL, wordSize);
2456-
__ ld(tmpU, Address(strU));
2464+
2465+
// compare first 4 characters
2466+
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
24572467
__ addi(strU, strU, wordSize);
24582468
__ inflate_lo32(tmpL, tmpLval);
24592469
__ xorr(tmp, tmpU, tmpL);
24602470
__ bnez(tmp, DIFF);
24612471

2462-
__ ld(tmpU, Address(strU));
2472+
// compare second 4 characters
2473+
__ load_long_misaligned(tmpU, Address(strU), tmp, (base_offset % 8) != 0 ? 4 : 8);
24632474
__ addi(strU, strU, wordSize);
24642475
__ inflate_hi32(tmpL, tmpLval);
24652476
__ xorr(tmp, tmpU, tmpL);
@@ -2493,6 +2504,14 @@ class StubGenerator: public StubCodeGenerator {
24932504
const Register result = x10, str1 = x11, str2 = x13, cnt2 = x14,
24942505
tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x12;
24952506

2507+
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
2508+
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
2509+
2510+
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
2511+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
2512+
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
2513+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
2514+
24962515
// cnt2 == amount of characters left to compare
24972516
// Check already loaded first 4 symbols
24982517
__ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
@@ -2509,17 +2528,19 @@ class StubGenerator: public StubCodeGenerator {
25092528
tmpU = isLU ? tmp2 : tmp1, // where to keep U for comparison
25102529
tmpL = isLU ? tmp1 : tmp2; // where to keep L for comparison
25112530

2512-
// make sure main loop is 8 byte-aligned, we should load another 4 bytes from strL
2513-
// cnt2 is >= 68 here, no need to check it for >= 0
2514-
__ lwu(tmpL, Address(strL));
2515-
__ addi(strL, strL, wordSize / 2);
2516-
__ ld(tmpU, Address(strU));
2517-
__ addi(strU, strU, wordSize);
2518-
__ inflate_lo32(tmp3, tmpL);
2519-
__ mv(tmpL, tmp3);
2520-
__ xorr(tmp3, tmpU, tmpL);
2521-
__ bnez(tmp3, CALCULATE_DIFFERENCE);
2522-
__ addi(cnt2, cnt2, -wordSize / 2);
2531+
if (AvoidUnalignedAccesses && (base_offset1 % 8) == 0) {
2532+
// Load another 4 bytes from strL to make sure main loop is 8-byte aligned
2533+
// cnt2 is >= 68 here, no need to check it for >= 0
2534+
__ lwu(tmpL, Address(strL));
2535+
__ addi(strL, strL, wordSize / 2);
2536+
__ load_long_misaligned(tmpU, Address(strU), tmp4, (base_offset2 % 8) != 0 ? 4 : 8);
2537+
__ addi(strU, strU, wordSize);
2538+
__ inflate_lo32(tmp3, tmpL);
2539+
__ mv(tmpL, tmp3);
2540+
__ xorr(tmp3, tmpU, tmpL);
2541+
__ bnez(tmp3, CALCULATE_DIFFERENCE);
2542+
__ subi(cnt2, cnt2, wordSize / 2);
2543+
}
25232544

25242545
// we are now 8-bytes aligned on strL
25252546
__ subi(cnt2, cnt2, wordSize * 2);
@@ -4493,7 +4514,7 @@ class StubGenerator: public StubCodeGenerator {
44934514

44944515
if (multi_block) {
44954516
int total_adds = vset_sew == Assembler::e32 ? 240 : 608;
4496-
__ addi(consts, consts, -total_adds);
4517+
__ subi(consts, consts, total_adds);
44974518
__ addi(ofs, ofs, vset_sew == Assembler::e32 ? 64 : 128);
44984519
__ ble(ofs, limit, multi_block_loop);
44994520
__ mv(c_rarg0, ofs); // return ofs

0 commit comments

Comments
 (0)