Skip to content

Commit c3e8b93

Browse files
committed
2009-10-21 Satoshi Nakagawa <[email protected]>
Reviewed by Darin Adler. Fixed Japanese text search problems. Treat small kana letters and kana letters as different characters in search. Do not ignore diacritic marks in search for Japanese texts. https://bugs.webkit.org/show_bug.cgi?id=30437 * fast/text/international/japanese-kana-letters-expected.txt: Added. * fast/text/international/japanese-kana-letters.html: Added. 2009-10-21 Satoshi Nakagawa <[email protected]> Reviewed by Darin Adler. Fixed Japanese text search problems. Treat small kana letters and kana letters as different characters in search. Do not ignore diacritic marks in search for Japanese texts. https://bugs.webkit.org/show_bug.cgi?id=30437 Test: fast/text/international/japanese-kana-letters.html * editing/TextIterator.cpp: (WebCore::createCollator): (WebCore::collator): (WebCore::createSearcher): Canonical link: https://commits.webkit.org/41353@main git-svn-id: https://svn.webkit.org/repository/webkit/trunk@49899 268f45cc-cd09-0410-ab3c-d52691b4dbfc
1 parent 544e3e2 commit c3e8b93

File tree

5 files changed

+239
-0
lines changed

5 files changed

+239
-0
lines changed

LayoutTests/ChangeLog

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
2009-10-21 Satoshi Nakagawa <[email protected]>
2+
3+
Reviewed by Darin Adler.
4+
5+
Fixed Japanese text search problems.
6+
Treat small kana letters and kana letters as different characters in search.
7+
Do not ignore diacritic marks in search for Japanese texts.
8+
9+
https://bugs.webkit.org/show_bug.cgi?id=30437
10+
11+
* fast/text/international/japanese-kana-letters-expected.txt: Added.
12+
* fast/text/international/japanese-kana-letters.html: Added.
13+
114
2009-10-20 Pavel Feldman <[email protected]>
215

316
Reviewed by Timothy Hatcher.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SUCCESS: Found hiragana and katakana correctly.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
<html>
2+
<head>
3+
<script>
4+
function canFind(target, specimen)
5+
{
6+
getSelection().empty();
7+
document.body.innerHTML = specimen;
8+
document.execCommand("FindString", false, target);
9+
var result = getSelection().rangeCount != 0;
10+
getSelection().empty();
11+
return result;
12+
}
13+
14+
function runTests()
15+
{
16+
if (window.layoutTestController)
17+
layoutTestController.dumpAsText();
18+
19+
var smallHiraganaA = String.fromCharCode(0x3041);
20+
var hiraganaA = String.fromCharCode(0x3042);
21+
var smallKatakanaA = String.fromCharCode(0x30a1);
22+
var katakanaA = String.fromCharCode(0x30a2);
23+
var halfwidthSmallKatakanaA = String.fromCharCode(0xff67);
24+
var halfwidthKatakanaA = String.fromCharCode(0xff71);
25+
var hiraganaKa = String.fromCharCode(0x304b);
26+
var hiraganaGa = String.fromCharCode(0x304c);
27+
28+
var success = true;
29+
30+
var message = "FAILURE:";
31+
32+
if (!canFind(smallHiraganaA, smallHiraganaA)) {
33+
success = false;
34+
message += " Cannot find small hiragana A when searching for small hiragana A.";
35+
}
36+
37+
if (!canFind(hiraganaA, hiraganaA)) {
38+
success = false;
39+
message += " Cannot find hiragana A when searching for hiragana A.";
40+
}
41+
42+
if (!canFind(smallKatakanaA, smallKatakanaA)) {
43+
success = false;
44+
message += " Cannot find small katakana A when searching for small katakana A.";
45+
}
46+
47+
if (!canFind(katakanaA, katakanaA)) {
48+
success = false;
49+
message += " Cannot find katakana A when searching for katakana A.";
50+
}
51+
52+
if (!canFind(halfwidthSmallKatakanaA, halfwidthSmallKatakanaA)) {
53+
success = false;
54+
message += " Cannot find halfwidth small katakana A when searching for halfwidth small katakana A.";
55+
}
56+
57+
if (!canFind(halfwidthKatakanaA, halfwidthKatakanaA)) {
58+
success = false;
59+
message += " Cannot find halfwidth katakana A when searching for halfwidth katakana A.";
60+
}
61+
62+
if (!canFind(smallHiraganaA, smallKatakanaA)) {
63+
success = false;
64+
message += " Cannot find small katakana A when searching for small hiragana A.";
65+
}
66+
67+
if (!canFind(hiraganaA, halfwidthKatakanaA)) {
68+
success = false;
69+
message += " Cannot find halfwidth katakana A when searching for hiragana A.";
70+
}
71+
72+
if (canFind(smallHiraganaA, hiraganaA)) {
73+
success = false;
74+
message += " Found small hiragana A when searching for hiragana A.";
75+
}
76+
77+
if (canFind(smallKatakanaA, katakanaA)) {
78+
success = false;
79+
message += " Found small katakana A when searching for katakana A.";
80+
}
81+
82+
if (canFind(halfwidthSmallKatakanaA, halfwidthKatakanaA)) {
83+
success = false;
84+
message += " Found halfwidth small katakana A when searching for halfwidth katakana A.";
85+
}
86+
87+
if (canFind(smallHiraganaA, katakanaA)) {
88+
success = false;
89+
message += " Found small hiragana A when searching for katakana A.";
90+
}
91+
92+
if (canFind(katakanaA, halfwidthSmallKatakanaA)) {
93+
success = false;
94+
message += " Found katakana A when searching for halfwidth small katakana A.";
95+
}
96+
97+
if (canFind(halfwidthKatakanaA, smallHiraganaA)) {
98+
success = false;
99+
message += " Found halfwidth katakana A when searching for small hiragana A.";
100+
}
101+
102+
if (canFind(hiraganaKa, hiraganaGa)) {
103+
success = false;
104+
message += " Found hiragana Ka when searching for hiragana Ga.";
105+
}
106+
107+
if (success)
108+
message = "SUCCESS: Found hiragana and katakana correctly.";
109+
110+
document.body.innerHTML = message;
111+
}
112+
</script>
113+
</head>
114+
<body onload="runTests()"></body>
115+
</html>

WebCore/ChangeLog

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
2009-10-21 Satoshi Nakagawa <[email protected]>
2+
3+
Reviewed by Darin Adler.
4+
5+
Fixed Japanese text search problems.
6+
Treat small kana letters and kana letters as different characters in search.
7+
Do not ignore diacritic marks in search for Japanese texts.
8+
9+
https://bugs.webkit.org/show_bug.cgi?id=30437
10+
11+
Test: fast/text/international/japanese-kana-letters.html
12+
13+
* editing/TextIterator.cpp:
14+
(WebCore::createCollator):
15+
(WebCore::collator):
16+
(WebCore::createSearcher):
17+
118
2009-10-20 Eric Z. Ayers <[email protected]>
219

320
Reviewed by Timothy Hatcher.

WebCore/editing/TextIterator.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,6 +1432,95 @@ static const size_t minimumSearchBufferSize = 8192;
14321432
static bool searcherInUse;
14331433
#endif
14341434

1435+
// Tailored collation rules for Japanese text search.
1436+
// The default Unicode Collation Algorithm is unnatural in Japanese.
1437+
// These rules intend to treat the following characters as different characters.
1438+
//
1439+
// - Small kana letters and normal kana letters
1440+
// - Voiceless letters, voiced letters and semi-voiced letters
1441+
//
1442+
// This is original work built in reference to the following Unicode standard documents.
1443+
//
1444+
// - http://unicode.org/reports/tr10/
1445+
// - http://unicode.org/Public/UCA/latest/allkeys.txt
1446+
//
1447+
static const UChar japaneseKanaCollationRules[] = {
1448+
'&', 0x3041, '=', 0x30a1, '=', 0xff67, '<', 0x3042,
1449+
'=', 0x30a2, '=', 0xff71, '<', 0x3043, '=', 0x30a3,
1450+
'=', 0xff68, '<', 0x3044, '=', 0x30a4, '=', 0xff72,
1451+
'<', 0x3045, '=', 0x30a5, '=', 0xff69, '<', 0x3046,
1452+
'=', 0x30a6, '=', 0xff73, '<', 0x3094, '=', 0x30f4,
1453+
'<', 0x3047, '=', 0x30a7, '=', 0xff6a, '<', 0x3048,
1454+
'=', 0x30a8, '=', 0xff74, '<', 0x3049, '=', 0x30a9,
1455+
'=', 0xff6b, '<', 0x304a, '=', 0x30aa, '=', 0xff75,
1456+
'<', 0x3095, '=', 0x30f5, '<', 0x304b, '=', 0x30ab,
1457+
'=', 0xff76, '<', 0x304c, '=', 0x30ac, '<', 0x304d,
1458+
'=', 0x30ad, '=', 0xff77, '<', 0x304e, '=', 0x30ae,
1459+
'<', 0x304f, '=', 0x30af, '=', 0xff78, '<', 0x3050,
1460+
'=', 0x30b0, '<', 0x3096, '=', 0x30f6, '<', 0x3051,
1461+
'=', 0x30b1, '=', 0xff79, '<', 0x3052, '=', 0x30b2,
1462+
'<', 0x3053, '=', 0x30b3, '=', 0xff7a, '<', 0x3054,
1463+
'=', 0x30b4, '<', 0x3055, '=', 0x30b5, '=', 0xff7b,
1464+
'<', 0x3056, '=', 0x30b6, '<', 0x3057, '=', 0x30b7,
1465+
'=', 0xff7c, '<', 0x3058, '=', 0x30b8, '<', 0x3059,
1466+
'=', 0x30b9, '=', 0xff7d, '<', 0x305a, '=', 0x30ba,
1467+
'<', 0x305b, '=', 0x30bb, '=', 0xff7e, '<', 0x305c,
1468+
'=', 0x30bc, '<', 0x305d, '=', 0x30bd, '=', 0xff7f,
1469+
'<', 0x305e, '=', 0x30be, '<', 0x305f, '=', 0x30bf,
1470+
'=', 0xff80, '<', 0x3060, '=', 0x30c0, '<', 0x3061,
1471+
'=', 0x30c1, '=', 0xff81, '<', 0x3062, '=', 0x30c2,
1472+
'<', 0x3063, '=', 0x30c3, '=', 0xff6f, '<', 0x3064,
1473+
'=', 0x30c4, '=', 0xff82, '<', 0x3065, '=', 0x30c5,
1474+
'<', 0x3066, '=', 0x30c6, '=', 0xff83, '<', 0x3067,
1475+
'=', 0x30c7, '<', 0x3068, '=', 0x30c8, '=', 0xff84,
1476+
'<', 0x3069, '=', 0x30c9, '<', 0x306a, '=', 0x30ca,
1477+
'=', 0xff85, '<', 0x306b, '=', 0x30cb, '=', 0xff86,
1478+
'<', 0x306c, '=', 0x30cc, '=', 0xff87, '<', 0x306d,
1479+
'=', 0x30cd, '=', 0xff88, '<', 0x306e, '=', 0x30ce,
1480+
'=', 0xff89, '<', 0x306f, '=', 0x30cf, '=', 0xff8a,
1481+
'<', 0x3070, '=', 0x30d0, '<', 0x3071, '=', 0x30d1,
1482+
'<', 0x3072, '=', 0x30d2, '=', 0xff8b, '<', 0x3073,
1483+
'=', 0x30d3, '<', 0x3074, '=', 0x30d4, '<', 0x3075,
1484+
'=', 0x30d5, '=', 0xff8c, '<', 0x3076, '=', 0x30d6,
1485+
'<', 0x3077, '=', 0x30d7, '<', 0x3078, '=', 0x30d8,
1486+
'=', 0xff8d, '<', 0x3079, '=', 0x30d9, '<', 0x307a,
1487+
'=', 0x30da, '<', 0x307b, '=', 0x30db, '=', 0xff8e,
1488+
'<', 0x307c, '=', 0x30dc, '<', 0x307d, '=', 0x30dd,
1489+
'<', 0x307e, '=', 0x30de, '=', 0xff8f, '<', 0x307f,
1490+
'=', 0x30df, '=', 0xff90, '<', 0x3080, '=', 0x30e0,
1491+
'=', 0xff91, '<', 0x3081, '=', 0x30e1, '=', 0xff92,
1492+
'<', 0x3082, '=', 0x30e2, '=', 0xff93, '<', 0x3083,
1493+
'=', 0x30e3, '=', 0xff6c, '<', 0x3084, '=', 0x30e4,
1494+
'=', 0xff94, '<', 0x3085, '=', 0x30e5, '=', 0xff6d,
1495+
'<', 0x3086, '=', 0x30e6, '=', 0xff95, '<', 0x3087,
1496+
'=', 0x30e7, '=', 0xff6e, '<', 0x3088, '=', 0x30e8,
1497+
'=', 0xff96, '<', 0x3089, '=', 0x30e9, '=', 0xff97,
1498+
'<', 0x308a, '=', 0x30ea, '=', 0xff98, '<', 0x308b,
1499+
'=', 0x30eb, '=', 0xff99, '<', 0x308c, '=', 0x30ec,
1500+
'=', 0xff9a, '<', 0x308d, '=', 0x30ed, '=', 0xff9b,
1501+
'<', 0x308e, '=', 0x30ee, '<', 0x308f, '=', 0x30ef,
1502+
'=', 0xff9c, '<', 0x30f7, '<', 0x3090, '=', 0x30f0,
1503+
'<', 0x30f8, '<', 0x3091, '=', 0x30f1, '<', 0x3092,
1504+
'=', 0x30f2, '=', 0xff66, '<', 0x3093, '=', 0x30f3,
1505+
'=', 0xff9d, 0
1506+
};
1507+
1508+
static UCollator* createCollator()
1509+
{
1510+
// Set tailored collation rules to fix Japanese text search.
1511+
// See the comments before japaneseKanaCollationRules for details.
1512+
UErrorCode status = U_ZERO_ERROR;
1513+
UCollator* collator = ucol_openRules(japaneseKanaCollationRules, -1, UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, 0, &status);
1514+
ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1515+
return collator;
1516+
}
1517+
1518+
static UCollator* collator()
1519+
{
1520+
static UCollator* collator = createCollator();
1521+
return collator;
1522+
}
1523+
14351524
static UStringSearch* createSearcher()
14361525
{
14371526
// Provide a non-empty pattern and non-empty text so usearch_open will not fail,
@@ -1440,6 +1529,10 @@ static UStringSearch* createSearcher()
14401529
UErrorCode status = U_ZERO_ERROR;
14411530
UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
14421531
ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1532+
status = U_ZERO_ERROR;
1533+
usearch_setCollator(searcher, collator(), &status);
1534+
ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1535+
usearch_reset(searcher);
14431536
return searcher;
14441537
}
14451538

0 commit comments

Comments
 (0)