diff options
author | 2012-09-19 14:38:17 +0900 | |
---|---|---|
committer | 2012-09-20 18:02:16 +0900 | |
commit | d36245fad292ea660ca49f38a3ec36e07727dda5 (patch) | |
tree | dc490be912aa746d32bc0e3c648eeb59e57bdbe5 /java | |
parent | 84c1bbd76d144c2d777952079b9e8d8fea98c9b2 (diff) | |
download | latinime-d36245fad292ea660ca49f38a3ec36e07727dda5.tar.gz latinime-d36245fad292ea660ca49f38a3ec36e07727dda5.tar.xz latinime-d36245fad292ea660ca49f38a3ec36e07727dda5.zip |
Add getTerminalPosition.
Change-Id: If04d779db23b1aea2cc12e5e9b8cecfcb35a5737
Diffstat (limited to 'java')
3 files changed, 73 insertions, 3 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 1a85e71ce..7a1b9dcb7 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -16,10 +16,11 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; +import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import java.io.IOException; import java.util.ArrayList; @@ -124,4 +125,69 @@ public class BinaryDictIOUtils { readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } + + /** + * Gets the address of the last CharGroup of the exact matching word in the dictionary. + * If no match is found, returns NOT_VALID_WORD. + * + * @param buffer the buffer to read. + * @param word the word we search for. + * @return the address of the terminal node. + * @throws IOException + * @throws UnsupportedFormatException + */ + public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, + final String word) throws IOException, UnsupportedFormatException { + if (word == null) return FormatSpec.NOT_VALID_WORD; + if (buffer.position() != 0) buffer.position(0); + + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); + int wordPos = 0; + final int wordLen = word.codePointCount(0, word.length()); + for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) { + if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; + int groupOffset = buffer.position() - header.mHeaderSize; + final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); + groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount); + + for (int i = 0; i < charGroupCount; ++i) { + final int charGroupPos = buffer.position(); + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, + buffer.position(), header.mFormatOptions); + boolean same = true; + for (int p = 0, j = word.offsetByCodePoints(0, wordPos); + p < currentInfo.mCharacters.length; + ++p, j = word.offsetByCodePoints(j, 1)) { + if (wordPos + p >= wordLen + || word.codePointAt(j) != currentInfo.mCharacters[p]) { + same = false; + break; + } + } + + if (same) { + if (wordPos + currentInfo.mCharacters.length == wordLen) { + if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) { + return FormatSpec.NOT_VALID_WORD; + } else { + return charGroupPos; + } + } + wordPos += currentInfo.mCharacters.length; + if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { + return FormatSpec.NOT_VALID_WORD; + } + buffer.position(currentInfo.mChildrenAddress); + break; + } + groupOffset = currentInfo.mEndAddress; + + // not found + if (i >= charGroupCount - 1) { + return FormatSpec.NOT_VALID_WORD; + } + } + } + return FormatSpec.NOT_VALID_WORD; + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index c865702d6..1d3e94bb7 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -1242,8 +1242,9 @@ public class BinaryDictInputOutput { * @param formatOptions file format options. * @return the word, as a string. */ - private static String getWordAtAddress(final FusionDictionaryBufferInterface buffer, - final int headerSize, final int address, final FormatOptions formatOptions) { + /* packages for tests */ static String getWordAtAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions formatOptions) { final String cachedString = wordCache.get(address); if (null != cachedString) return cachedString; diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index f8f13b197..adc6037bb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -207,6 +207,9 @@ public final class FormatSpec { static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_BIGRAM_FREQUENCY = 15; + // This option needs to be the same numeric value as the one in binary_format.h. + static final int NOT_VALID_WORD = -99; + /** * Options about file format. */ |