aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Chalard <jchalard@google.com>2013-04-03 19:27:19 -0700
committerAndroid Git Automerger <android-git-automerger@android.com>2013-04-03 19:27:19 -0700
commit3b9d7fd1792da09c1d436eceb49ee4ea6bc47f07 (patch)
tree0af9ee3b616eddbe58c5498ef1266755d60bbd20
parentfdf3c49c8bd4d4b444d018e5ec8a94a5e1d3ba32 (diff)
parent151e4e182c86a2b7aea534bda777e9e9aafa549b (diff)
downloadlatinime-3b9d7fd1792da09c1d436eceb49ee4ea6bc47f07.tar.gz
latinime-3b9d7fd1792da09c1d436eceb49ee4ea6bc47f07.tar.xz
latinime-3b9d7fd1792da09c1d436eceb49ee4ea6bc47f07.zip
am 151e4e18: am 065f126f: Merge "Fix two nasty bugs with surrogate pairs."
* commit '151e4e182c86a2b7aea534bda777e9e9aafa549b': Fix two nasty bugs with surrogate pairs.
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java26
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java28
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java114
3 files changed, 141 insertions, 27 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 5c805598a..e7c7e2b8a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -620,34 +620,34 @@ public final class FusionDictionary implements Iterable<Word> {
* Helper method to find a word in a given branch.
*/
@SuppressWarnings("unused")
- public static CharGroup findWordInTree(Node node, final String s) {
+ public static CharGroup findWordInTree(Node node, final String string) {
int index = 0;
final StringBuilder checker = DBG ? new StringBuilder() : null;
+ final int[] codePoints = getCodePoints(string);
CharGroup currentGroup;
- final int codePointCountInS = s.codePointCount(0, s.length());
do {
- int indexOfGroup = findIndexOfChar(node, s.codePointAt(index));
+ int indexOfGroup = findIndexOfChar(node, codePoints[index]);
if (CHARACTER_NOT_FOUND == indexOfGroup) return null;
currentGroup = node.mData.get(indexOfGroup);
- if (s.length() - index < currentGroup.mChars.length) return null;
+ if (codePoints.length - index < currentGroup.mChars.length) return null;
int newIndex = index;
- while (newIndex < s.length() && newIndex - index < currentGroup.mChars.length) {
- if (currentGroup.mChars[newIndex - index] != s.codePointAt(newIndex)) return null;
+ while (newIndex < codePoints.length && newIndex - index < currentGroup.mChars.length) {
+ if (currentGroup.mChars[newIndex - index] != codePoints[newIndex]) return null;
newIndex++;
}
index = newIndex;
if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
- if (index < codePointCountInS) {
+ if (index < codePoints.length) {
node = currentGroup.mChildren;
}
- } while (null != node && index < codePointCountInS);
+ } while (null != node && index < codePoints.length);
- if (index < codePointCountInS) return null;
+ if (index < codePoints.length) return null;
if (!currentGroup.isTerminal()) return null;
- if (DBG && !s.equals(checker.toString())) return null;
+ if (DBG && !codePoints.equals(checker.toString())) return null;
return currentGroup;
}
@@ -847,12 +847,12 @@ public final class FusionDictionary implements Iterable<Word> {
@Override
public Word next() {
Position currentPos = mPositions.getLast();
- mCurrentString.setLength(mCurrentString.length() - currentPos.length);
+ mCurrentString.setLength(currentPos.length);
do {
if (currentPos.pos.hasNext()) {
final CharGroup currentGroup = currentPos.pos.next();
- currentPos.length = currentGroup.mChars.length;
+ currentPos.length = mCurrentString.length();
for (int i : currentGroup.mChars)
mCurrentString.append(Character.toChars(i));
if (null != currentGroup.mChildren) {
@@ -866,7 +866,7 @@ public final class FusionDictionary implements Iterable<Word> {
} else {
mPositions.removeLast();
currentPos = mPositions.getLast();
- mCurrentString.setLength(mCurrentString.length() - mPositions.getLast().length);
+ mCurrentString.setLength(mPositions.getLast().length);
}
} while (true);
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
index ade010981..bd8729203 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
@@ -72,15 +72,12 @@ public class BinaryDictIOTests extends AndroidTestCase {
private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
- private static final String[] CHARACTERS = {
- "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
- "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
- };
-
public BinaryDictIOTests() {
super();
- final Random random = new Random(123456);
+ final long time = System.currentTimeMillis();
+ Log.e(TAG, "Testing dictionary: seed is " + time);
+ final Random random = new Random(time);
sWords.clear();
generateWords(MAX_UNIGRAMS, random);
@@ -132,13 +129,16 @@ public class BinaryDictIOTests extends AndroidTestCase {
/**
* Generates a random word.
*/
- private String generateWord(final int value) {
- final int lengthOfChars = CHARACTERS.length;
+ private String generateWord(final Random random) {
StringBuilder builder = new StringBuilder("a");
- long lvalue = Math.abs((long)value);
- while (lvalue > 0) {
- builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
- lvalue /= lengthOfChars;
+ int count = random.nextInt() % 30; // Arbitrarily 30 chars max
+ while (count > 0) {
+ final long r = Math.abs(random.nextInt());
+ if (r < 0) continue;
+ // Don't insert 0~20, but insert any other code point.
+ // Code points are in the range 0~0x10FFFF.
+ builder.appendCodePoint((int)(20 + r % (0x10FFFF - 20)));
+ --count;
}
return builder.toString();
}
@@ -146,7 +146,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
private void generateWords(final int number, final Random random) {
final Set<String> wordSet = CollectionUtils.newHashSet();
while (wordSet.size() < number) {
- wordSet.add(generateWord(random.nextInt()));
+ wordSet.add(generateWord(random));
}
sWords.addAll(wordSet);
}
@@ -555,7 +555,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
// Test a word that isn't contained within the dictionary.
final Random random = new Random((int)System.currentTimeMillis());
for (int i = 0; i < 1000; ++i) {
- final String word = generateWord(random.nextInt());
+ final String word = generateWord(random);
if (sWords.indexOf(word) != -1) continue;
runGetTerminalPosition(buffer, word, i, false);
}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
new file mode 100644
index 000000000..fe3781d80
--- /dev/null
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.Word;
+
+import junit.framework.TestCase;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Random;
+
+/**
+ * Unit tests for BinaryDictInputOutput.
+ */
+public class FusionDictionaryTest extends TestCase {
+ private static final ArrayList<String> sWords = new ArrayList<String>();
+ private static final int MAX_UNIGRAMS = 1000;
+
+ private void prepare(final long seed) {
+ System.out.println("Seed is " + seed);
+ final Random random = new Random(seed);
+ sWords.clear();
+ generateWords(MAX_UNIGRAMS, random);
+ }
+
+ /**
+ * Generates a random word.
+ */
+ private String generateWord(final Random random) {
+ StringBuilder builder = new StringBuilder("a");
+ int count = random.nextInt() % 30;
+ while (count > 0) {
+ final long r = Math.abs(random.nextInt());
+ if (r < 0) continue;
+ // Don't insert 0~20, but insert any other code point.
+ // Code points are in the range 0~0x10FFFF.
+ if (builder.length() < 7)
+ builder.appendCodePoint((int)(20 +r % (0x10FFFF - 20)));
+ --count;
+ }
+ if (builder.length() == 1) return generateWord(random);
+ return builder.toString();
+ }
+
+ private void generateWords(final int number, final Random random) {
+ while (sWords.size() < number) {
+ sWords.add(generateWord(random));
+ }
+ }
+
+ private void checkDictionary(final FusionDictionary dict, final ArrayList<String> words,
+ int limit) {
+ assertNotNull(dict);
+ for (final String word : words) {
+ if (--limit < 0) return;
+ final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
+ if (null == cg) {
+ System.out.println("word " + dumpWord(word));
+ dumpDict(dict);
+ }
+ assertNotNull(cg);
+ }
+ }
+
+ private String dumpWord(final String word) {
+ final StringBuilder sb = new StringBuilder("");
+ for (int i = 0; i < word.length(); i = word.offsetByCodePoints(i, 1)) {
+ sb.append(word.codePointAt(i));
+ sb.append(" ");
+ }
+ return sb.toString();
+ }
+
+ private void dumpDict(final FusionDictionary dict) {
+ for (Word w : dict) {
+ System.out.println("Word " + dumpWord(w.mWord));
+ }
+ }
+
+ // Test the flattened array contains the expected number of nodes, and
+ // that it does not contain any duplicates.
+ public void testFusion() {
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new DictionaryOptions(new HashMap<String, String>(),
+ false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
+ final long time = System.currentTimeMillis();
+ prepare(time);
+ for (int i = 0; i < sWords.size(); ++i) {
+ System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i)));
+ dict.add(sWords.get(i), 180, null, false);
+ dumpDict(dict);
+ checkDictionary(dict, sWords, i);
+ }
+ }
+}