aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java68
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java5
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java3
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java91
4 files changed, 163 insertions, 4 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 1a85e71ce..7a1b9dcb7 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -16,10 +16,11 @@
package com.android.inputmethod.latin.makedict;
-import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import java.io.IOException;
import java.util.ArrayList;
@@ -124,4 +125,69 @@ public class BinaryDictIOUtils {
readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams,
header.mFormatOptions);
}
+
+ /**
+ * Gets the address of the last CharGroup of the exact matching word in the dictionary.
+ * If no match is found, returns NOT_VALID_WORD.
+ *
+ * @param buffer the buffer to read.
+ * @param word the word we search for.
+ * @return the address of the terminal node.
+ * @throws IOException
+ * @throws UnsupportedFormatException
+ */
+ public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
+ final String word) throws IOException, UnsupportedFormatException {
+ if (word == null) return FormatSpec.NOT_VALID_WORD;
+ if (buffer.position() != 0) buffer.position(0);
+
+ final FileHeader header = BinaryDictInputOutput.readHeader(buffer);
+ int wordPos = 0;
+ final int wordLen = word.codePointCount(0, word.length());
+ for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) {
+ if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
+ int groupOffset = buffer.position() - header.mHeaderSize;
+ final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer);
+ groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount);
+
+ for (int i = 0; i < charGroupCount; ++i) {
+ final int charGroupPos = buffer.position();
+ final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
+ buffer.position(), header.mFormatOptions);
+ boolean same = true;
+ for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
+ p < currentInfo.mCharacters.length;
+ ++p, j = word.offsetByCodePoints(j, 1)) {
+ if (wordPos + p >= wordLen
+ || word.codePointAt(j) != currentInfo.mCharacters[p]) {
+ same = false;
+ break;
+ }
+ }
+
+ if (same) {
+ if (wordPos + currentInfo.mCharacters.length == wordLen) {
+ if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) {
+ return FormatSpec.NOT_VALID_WORD;
+ } else {
+ return charGroupPos;
+ }
+ }
+ wordPos += currentInfo.mCharacters.length;
+ if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
+ return FormatSpec.NOT_VALID_WORD;
+ }
+ buffer.position(currentInfo.mChildrenAddress);
+ break;
+ }
+ groupOffset = currentInfo.mEndAddress;
+
+ // not found
+ if (i >= charGroupCount - 1) {
+ return FormatSpec.NOT_VALID_WORD;
+ }
+ }
+ }
+ return FormatSpec.NOT_VALID_WORD;
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index c865702d6..1d3e94bb7 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -1242,8 +1242,9 @@ public class BinaryDictInputOutput {
* @param formatOptions file format options.
* @return the word, as a string.
*/
- private static String getWordAtAddress(final FusionDictionaryBufferInterface buffer,
- final int headerSize, final int address, final FormatOptions formatOptions) {
+ /* packages for tests */ static String getWordAtAddress(
+ final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
+ final FormatOptions formatOptions) {
final String cachedString = wordCache.get(address);
if (null != cachedString) return cachedString;
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index f8f13b197..adc6037bb 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -207,6 +207,9 @@ public final class FormatSpec {
static final int MAX_TERMINAL_FREQUENCY = 255;
static final int MAX_BIGRAM_FREQUENCY = 15;
+ // This option needs to be the same numeric value as the one in binary_format.h.
+ static final int NOT_VALID_WORD = -99;
+
/**
* Options about file format.
*/
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
index 4c2d3f6fe..24776d536 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.CollectionUtils;
import com.android.inputmethod.latin.UserHistoryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
-import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@@ -475,4 +475,93 @@ public class BinaryDictIOTests extends AndroidTestCase {
Log.d(TAG, result);
}
}
+
+ // Tests for getTerminalPosition
+ private String getWordFromBinary(final FusionDictionaryBufferInterface buffer,
+ final int address) {
+ if (buffer.position() != 0) buffer.position(0);
+
+ FileHeader header = null;
+ try {
+ header = BinaryDictInputOutput.readHeader(buffer);
+ } catch (IOException e) {
+ return null;
+ } catch (UnsupportedFormatException e) {
+ return null;
+ }
+ if (header == null) return null;
+ return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize,
+ address - header.mHeaderSize, header.mFormatOptions);
+ }
+
+ private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer,
+ final String word, int index, boolean contained) {
+ final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
+ long diff = -1;
+ int position = -1;
+ try {
+ final long now = System.nanoTime();
+ position = BinaryDictIOUtils.getTerminalPosition(buffer, word);
+ diff = System.nanoTime() - now;
+ } catch (IOException e) {
+ Log.e(TAG, "IOException while getTerminalPosition: " + e);
+ } catch (UnsupportedFormatException e) {
+ Log.e(TAG, "UnsupportedFormatException while getTermianlPosition: " + e);
+ }
+
+ assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
+ if (contained) assertEquals(getWordFromBinary(buffer, position), word);
+ return diff;
+ }
+
+ public void testGetTerminalPosition() {
+ File file = null;
+ try {
+ file = File.createTempFile("runReadUnigrams", ".dict");
+ } catch (IOException e) {
+ // do nothing
+ }
+ assertNotNull(file);
+
+ final FusionDictionary dict = new FusionDictionary(new Node(),
+ new FusionDictionary.DictionaryOptions(
+ new HashMap<String, String>(), false, false));
+ addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
+ timeWritingDictToFile(file, dict, VERSION3_WITH_LINKEDLIST_NODE);
+
+ final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY);
+
+ try {
+ // too long word
+ final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+ assertEquals(FormatSpec.NOT_VALID_WORD,
+ BinaryDictIOUtils.getTerminalPosition(buffer, longWord));
+
+ // null
+ assertEquals(FormatSpec.NOT_VALID_WORD,
+ BinaryDictIOUtils.getTerminalPosition(buffer, null));
+
+ // empty string
+ assertEquals(FormatSpec.NOT_VALID_WORD,
+ BinaryDictIOUtils.getTerminalPosition(buffer, ""));
+ } catch (IOException e) {
+ } catch (UnsupportedFormatException e) {
+ }
+
+ // Test a word that is contained within the dictionary.
+ long sum = 0;
+ for (int i = 0; i < sWords.size(); ++i) {
+ final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true);
+ sum += time == -1 ? 0 : time;
+ }
+ Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
+
+ // Test a word that isn't contained within the dictionary.
+ final Random random = new Random((int)System.currentTimeMillis());
+ for (int i = 0; i < 1000; ++i) {
+ final String word = generateWord(random.nextInt());
+ if (sWords.indexOf(word) != -1) continue;
+ runGetTerminalPosition(buffer, word, i, false);
+ }
+ }
}