diff options
44 files changed, 803 insertions, 604 deletions
diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index 84898680e..b931c66d1 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -56,22 +57,23 @@ public class DictionaryWriter extends AbstractDictionaryWriter { // TODO: Create "cache dictionary" to cache fresh words for frequently updated dictionaries, // considering performance regression. @Override - public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, - final int shortcutFreq, final boolean isNotAWord) { + public void addUnigramWord(final String word, final String shortcutTarget, + final int probability, final int shortcutProbability, final boolean isNotAWord) { if (shortcutTarget == null) { - mFusionDictionary.add(word, frequency, null, isNotAWord); + mFusionDictionary.add(word, new ProbabilityInfo(probability), null, isNotAWord); } else { // TODO: Do this in the subclass, with this class taking an arraylist. final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList(); - shortcutTargets.add(new WeightedString(shortcutTarget, shortcutFreq)); - mFusionDictionary.add(word, frequency, shortcutTargets, isNotAWord); + shortcutTargets.add(new WeightedString(shortcutTarget, shortcutProbability)); + mFusionDictionary.add(word, new ProbabilityInfo(probability), shortcutTargets, + isNotAWord); } } @Override - public void addBigramWords(final String word0, final String word1, final int frequency, + public void addBigramWords(final String word0, final String word1, final int probability, final boolean isValid, final long lastModifiedTime) { - mFusionDictionary.setBigram(word0, word1, frequency); + mFusionDictionary.setBigram(word0, word1, new ProbabilityInfo(probability)); } @Override diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 3b427720a..432b8a5a9 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1350,8 +1350,17 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen public void setSuggestedWords(final SuggestedWords suggestedWords, final boolean shouldShow) { mInputLogic.setSuggestedWords(suggestedWords); if (mSuggestionStripView != null) { - mSuggestionStripView.setSuggestions(suggestedWords, - SubtypeLocaleUtils.isRtlLanguage(mSubtypeSwitcher.getCurrentSubtype())); + final boolean showSuggestions; + if (SuggestedWords.EMPTY == suggestedWords + || suggestedWords.mIsPunctuationSuggestions) { + showSuggestions = !mSuggestionStripView.maybeShowImportantNoticeTitle(); + } else { + showSuggestions = true; + } + if (showSuggestions) { + mSuggestionStripView.setSuggestions(suggestedWords, + SubtypeLocaleUtils.isRtlLanguage(mSubtypeSwitcher.getCurrentSubtype())); + } mKeyboardSwitcher.onAutoCorrectionStateChanged(suggestedWords.mWillAutoCorrect); setSuggestionStripShownInternal(shouldShow, true /* needsInputViewShown */); } diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index e7d1c98a9..1bc322724 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -20,7 +20,6 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.FileNotFoundException; import java.io.IOException; @@ -117,94 +116,6 @@ public abstract class AbstractDictDecoder implements DictDecoder { } /** - * A utility class for reading a PtNode. - */ - protected static class PtNodeReader { - protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { - return dictBuffer.readUnsignedByte(); - } - - protected static int readParentAddress(final DictBuffer dictBuffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - return BinaryDictDecoderUtils.readSInt24(dictBuffer); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - - protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - return address; - } else { - switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: - return dictBuffer.readUnsignedByte(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: - return dictBuffer.readUnsignedShort(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: - return dictBuffer.readUnsignedInt24(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - } - - // Reads shortcuts and returns the read length. - protected static int readShortcut(final DictBuffer dictBuffer, - final ArrayList<WeightedString> shortcutTargets) { - final int pointerBefore = dictBuffer.position(); - dictBuffer.readUnsignedShort(); // skip the size - while (true) { - final int targetFlags = dictBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(dictBuffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return dictBuffer.position() - pointerBefore; - } - - protected static int readBigramAddresses(final DictBuffer dictBuffer, - final ArrayList<PendingAttribute> bigrams, final int baseAddress) { - int readLength = 0; - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - final int bigramFlags = dictBuffer.readUnsignedByte(); - ++readLength; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = baseAddress + readLength; - switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * dictBuffer.readUnsignedByte(); - readLength += 1; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * dictBuffer.readUnsignedShort(); - readLength += 2; - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: - bigramAddress += sign * dictBuffer.readUnsignedInt24(); - readLength += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return readLength; - } - } - - /** * Check whether the header contains the expected information. This is a no-error method, * that will return an error code and never throw a checked exception. * @return an error code, either ERROR_* or SUCCESS. @@ -264,9 +175,4 @@ public abstract class AbstractDictDecoder implements DictDecoder { public boolean hasNextPtNodeArray() { return false; } - - @Override - @UsedForTesting - public void skipPtNode(final FormatOptions formatOptions) { - } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 782ada3f4..fc5788de9 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -166,15 +166,6 @@ public final class BinaryDictDecoderUtils { return size; } - @UsedForTesting - static int getCharArraySize(final int[] chars, final int start, final int end) { - int size = 0; - for (int i = start; i < end; ++i) { - size += getCharSize(chars[i]); - } - return size; - } - /** * Writes a char array to a byte buffer. * @@ -253,41 +244,6 @@ public final class BinaryDictDecoderUtils { } /** - * Writes an array of code points with our character format to an OutputStream. - * - * This will also write the terminator byte. - * - * @param stream the OutputStream to write to. - * @param codePoints the array of code points - * @return the size written, in bytes. - */ - // TODO: Merge this method with writeCharArray and rename the various write* methods to - // make the difference clear. - @UsedForTesting - static int writeCodePoints(final OutputStream stream, final int[] codePoints, - final int startIndex, final int endIndex) - throws IOException { - int written = 0; - for (int i = startIndex; i < endIndex; ++i) { - final int codePoint = codePoints[i]; - final int charSize = getCharSize(codePoint); - if (1 == charSize) { - stream.write((byte) codePoint); - } else { - stream.write((byte) (0xFF & (codePoint >> 16))); - stream.write((byte) (0xFF & (codePoint >> 8))); - stream.write((byte) (0xFF & codePoint)); - } - written += charSize; - } - if (endIndex - startIndex > 1) { - stream.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR); - written += FormatSpec.PTNODE_TERMINATOR_SIZE; - } - return written; - } - - /** * Reads a string from a DictBuffer. This is the converse of the above method. */ static String readString(final DictBuffer dictBuffer) { @@ -321,50 +277,6 @@ public final class BinaryDictDecoderUtils { } } - // Input methods: Read a binary dictionary to memory. - // readDictionaryBinary is the public entry point for them. - - static int readSInt24(final DictBuffer dictBuffer) { - final int retval = dictBuffer.readUnsignedInt24(); - final int sign = ((retval & FormatSpec.MSB24) != 0) ? -1 : 1; - return sign * (retval & FormatSpec.SINT24_MAX); - } - - static int readChildrenAddress(final DictBuffer dictBuffer, - final int optionFlags, final FormatOptions options) { - if (options.supportsDynamicUpdate()) { - final int address = dictBuffer.readUnsignedInt24(); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - if ((address & FormatSpec.MSB24) != 0) { - return -(address & FormatSpec.SINT24_MAX); - } else { - return address; - } - } - switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: - return dictBuffer.readUnsignedByte(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: - return dictBuffer.readUnsignedShort(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: - return dictBuffer.readUnsignedInt24(); - case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - - static int readParentAddress(final DictBuffer dictBuffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int parentAddress = dictBuffer.readUnsignedInt24(); - final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1; - return sign * (parentAddress & FormatSpec.SINT24_MAX); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - /** * Reads and returns the PtNode count out of a buffer and forwards the pointer. */ @@ -408,7 +320,7 @@ public final class BinaryDictDecoderUtils { private static WeightedString getWordAtPositionWithParentAddress(final DictDecoder dictDecoder, final int pos, final FormatOptions options) { int currentPos = pos; - int frequency = Integer.MIN_VALUE; + ProbabilityInfo probabilityInfo = null; final StringBuilder builder = new StringBuilder(); // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { @@ -424,13 +336,15 @@ public final class BinaryDictDecoderUtils { MakedictLog.d("Too many jumps - probably a bug"); } } while (BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, options)); - if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; + if (probabilityInfo == null) { + probabilityInfo = currentInfo.mProbabilityInfo; + } builder.insert(0, new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length)); if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break; currentPos = currentInfo.mParentAddress + currentInfo.mOriginalAddress; } - return new WeightedString(builder.toString(), frequency); + return new WeightedString(builder.toString(), probabilityInfo); } private static WeightedString getWordAtPositionWithoutParentAddress( @@ -448,7 +362,7 @@ public final class BinaryDictDecoderUtils { groupPos = info.mEndAddress; if (info.mOriginalAddress == pos) { builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); - result = new WeightedString(builder.toString(), info.mFrequency); + result = new WeightedString(builder.toString(), info.mProbabilityInfo); break; // and return } if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { @@ -527,13 +441,13 @@ public final class BinaryDictDecoderUtils { } nodeArrayContents.add( new PtNode(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, + info.mProbabilityInfo, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); } else { nodeArrayContents.add( new PtNode(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, + info.mProbabilityInfo, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); } @@ -611,7 +525,7 @@ public final class BinaryDictDecoderUtils { newDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord); } else { - newDict.add(wordProperty.mWord, wordProperty.getProbability(), + newDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord); } } @@ -620,7 +534,7 @@ public final class BinaryDictDecoderUtils { // words that are not also registered as unigrams so we don't have to avoid // them explicitly here. for (final WeightedString bigram : wordProperty.mBigrams) { - newDict.setBigram(wordProperty.mWord, bigram.mWord, bigram.getProbability()); + newDict.setBigram(wordProperty.mWord, bigram.mWord, bigram.mProbabilityInfo); } } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index eff8fc375..9c5d1b9d7 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -673,7 +673,7 @@ public class BinaryDictEncoderUtils { /* package */ static byte makePtNodeFlags(final PtNode node, final int childrenOffset, final FormatOptions formatOptions) { - return (byte) makePtNodeFlags(node.mChars.length > 1, node.mFrequency >= 0, + return (byte) makePtNodeFlags(node.mChars.length > 1, node.isTerminal(), getByteSize(childrenOffset), node.mShortcutTargets != null && !node.mShortcutTargets.isEmpty(), node.mBigrams != null, node.mIsNotAWord, node.mIsBlacklistEntry, formatOptions); @@ -833,10 +833,10 @@ public class BinaryDictEncoderUtils { + ptNode.mCachedAddressAfterUpdate); } // Sanity checks. - if (DBG && ptNode.mFrequency > FormatSpec.MAX_TERMINAL_FREQUENCY) { + if (DBG && ptNode.getProbability() > FormatSpec.MAX_TERMINAL_FREQUENCY) { throw new RuntimeException("A node has a frequency > " + FormatSpec.MAX_TERMINAL_FREQUENCY - + " : " + ptNode.mFrequency); + + " : " + ptNode.mProbabilityInfo.toString()); } dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict); } @@ -871,7 +871,7 @@ public class BinaryDictEncoderUtils { for (final PtNode ptNode : ptNodeArray.mData) { ++ptNodes; if (ptNode.mChars.length > maxRuns) maxRuns = ptNode.mChars.length; - if (ptNode.mFrequency >= 0) { + if (ptNode.isTerminal()) { if (ptNodeArray.mCachedAddressAfterUpdate < firstTerminalAddress) firstTerminalAddress = ptNodeArray.mCachedAddressAfterUpdate; if (ptNodeArray.mCachedAddressAfterUpdate > lastTerminalAddress) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index dea9f2e28..54446df49 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -21,7 +21,6 @@ import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; @@ -91,21 +90,23 @@ public final class BinaryDictIOUtils { stack.pop(); continue; } - PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions); - for (int i = 0; i < info.mCharacters.length; ++i) { - pushedChars[index++] = info.mCharacters[i]; + final PtNodeInfo ptNodeInfo = dictDecoder.readPtNode(p.mAddress, formatOptions); + for (int i = 0; i < ptNodeInfo.mCharacters.length; ++i) { + pushedChars[index++] = ptNodeInfo.mCharacters[i]; } p.mPosition++; - final boolean isMovedPtNode = isMovedPtNode(info.mFlags, + final boolean isMovedPtNode = isMovedPtNode(ptNodeInfo.mFlags, formatOptions); - final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags, + final boolean isDeletedPtNode = isDeletedPtNode(ptNodeInfo.mFlags, formatOptions); - if (!isMovedPtNode && !isDeletedPtNode - && info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word - words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); - frequencies.put(info.mOriginalAddress, info.mFrequency); - if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); + if (!isMovedPtNode && !isDeletedPtNode && ptNodeInfo.isTerminal()) {// found word + words.put(ptNodeInfo.mOriginalAddress, new String(pushedChars, 0, index)); + frequencies.put( + ptNodeInfo.mOriginalAddress, ptNodeInfo.mProbabilityInfo.mProbability); + if (ptNodeInfo.mBigrams != null) { + bigrams.put(ptNodeInfo.mOriginalAddress, ptNodeInfo.mBigrams); + } } if (p.mPosition == p.mNumOfPtNode) { @@ -127,8 +128,8 @@ public final class BinaryDictIOUtils { p.mAddress = dictDecoder.getPosition(); } - if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) { - final Position childrenPos = new Position(info.mChildrenAddress, index); + if (!isMovedPtNode && hasChildrenAddress(ptNodeInfo.mChildrenAddress)) { + final Position childrenPos = new Position(ptNodeInfo.mChildrenAddress, index); stack.push(childrenPos); } } @@ -203,8 +204,7 @@ public final class BinaryDictIOUtils { if (same) { // found the PtNode matches the word. if (wordPos + currentInfo.mCharacters.length == wordLen) { - if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL - || isDeletedNode) { + if (!currentInfo.isTerminal() || isDeletedNode) { return FormatSpec.NOT_VALID_WORD; } else { return ptNodePos; diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index bba1d434f..a93a8bbad 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -228,8 +228,6 @@ public interface DictDecoder { } } - public void skipPtNode(final FormatOptions formatOptions); - /** * @return whether this decoder has a valid binary dictionary that it can decode. */ diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index ca4a2e9bb..8f73b27b5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -107,24 +107,26 @@ public final class FusionDictionary implements Iterable<WordProperty> { } /** - * PtNode is a group of characters, with a frequency, shortcut targets, bigrams, and children - * (Pt means Patricia Trie). + * PtNode is a group of characters, with probability information, shortcut targets, bigrams, + * and children (Pt means Patricia Trie). * * This is the central class of the in-memory representation. A PtNode is what can * be seen as a traditional "trie node", except it can hold several characters at the * same time. A PtNode essentially represents one or several characters in the middle * of the trie tree; as such, it can be a terminal, and it can have children. * In this in-memory representation, whether the PtNode is a terminal or not is represented - * in the frequency, where NOT_A_TERMINAL (= -1) means this is not a terminal and any other - * value is the frequency of this terminal. A terminal may have non-null shortcuts and/or - * bigrams, but a non-terminal may not. Moreover, children, if present, are null. + * by mProbabilityInfo. The PtNode is a terminal when the mProbabilityInfo is not null and the + * PtNode is not a terminal when the mProbabilityInfo is null. A terminal may have non-null + * shortcuts and/or bigrams, but a non-terminal may not. Moreover, children, if present, + * are non-null. */ public static final class PtNode { - public static final int NOT_A_TERMINAL = -1; + private static final int NOT_A_TERMINAL = -1; final int mChars[]; ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mBigrams; - int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. + // null == mProbabilityInfo indicates this is not a terminal. + ProbabilityInfo mProbabilityInfo; int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal. PtNodeArray mChildren; boolean mIsNotAWord; // Only a shortcut @@ -140,11 +142,11 @@ public final class FusionDictionary implements Iterable<WordProperty> { int mCachedAddressAfterUpdate; // The address of this PtNode (after update) public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency, + final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, final boolean isNotAWord, final boolean isBlacklistEntry) { mChars = chars; - mFrequency = frequency; - mTerminalId = frequency; + mProbabilityInfo = probabilityInfo; + mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; @@ -153,11 +155,11 @@ public final class FusionDictionary implements Iterable<WordProperty> { } public PtNode(final int[] chars, final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams, final int frequency, + final ArrayList<WeightedString> bigrams, final ProbabilityInfo probabilityInfo, final boolean isNotAWord, final boolean isBlacklistEntry, final PtNodeArray children) { mChars = chars; - mFrequency = frequency; + mProbabilityInfo = probabilityInfo; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = children; @@ -177,11 +179,15 @@ public final class FusionDictionary implements Iterable<WordProperty> { } public boolean isTerminal() { - return NOT_A_TERMINAL != mFrequency; + return mProbabilityInfo != null; } - public int getFrequency() { - return mFrequency; + public int getProbability() { + if (isTerminal()) { + return mProbabilityInfo.mProbability; + } else { + return NOT_A_TERMINAL; + } } public boolean getIsNotAWord() { @@ -213,18 +219,18 @@ public final class FusionDictionary implements Iterable<WordProperty> { } /** - * Adds a word to the bigram list. Updates the probability if the word already + * Adds a word to the bigram list. Updates the probability information if the word already * exists. */ - public void addBigram(final String word, final int probability) { + public void addBigram(final String word, final ProbabilityInfo probabilityInfo) { if (mBigrams == null) { mBigrams = new ArrayList<WeightedString>(); } WeightedString bigram = getBigram(word); if (bigram != null) { - bigram.setProbability(probability); + bigram.mProbabilityInfo = probabilityInfo; } else { - bigram = new WeightedString(word, probability); + bigram = new WeightedString(word, probabilityInfo); mBigrams.add(bigram); } } @@ -270,12 +276,11 @@ public final class FusionDictionary implements Iterable<WordProperty> { * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only * updated if they are higher than the existing ones. */ - public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets, + private void update(final ProbabilityInfo probabilityInfo, + final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams, final boolean isNotAWord, final boolean isBlacklistEntry) { - if (frequency > mFrequency) { - mFrequency = frequency; - } + mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo); if (shortcutTargets != null) { if (mShortcutTargets == null) { mShortcutTargets = shortcutTargets; @@ -286,8 +291,9 @@ public final class FusionDictionary implements Iterable<WordProperty> { final WeightedString existingShortcut = getShortcut(shortcut.mWord); if (existingShortcut == null) { mShortcutTargets.add(shortcut); - } else if (existingShortcut.getProbability() < shortcut.getProbability()) { - existingShortcut.setProbability(shortcut.getProbability()); + } else { + existingShortcut.mProbabilityInfo = ProbabilityInfo.max( + existingShortcut.mProbabilityInfo, shortcut.mProbabilityInfo); } } } @@ -302,8 +308,9 @@ public final class FusionDictionary implements Iterable<WordProperty> { final WeightedString existingBigram = getBigram(bigram.mWord); if (existingBigram == null) { mBigrams.add(bigram); - } else if (existingBigram.getProbability() < bigram.getProbability()) { - existingBigram.setProbability(bigram.getProbability()); + } else { + existingBigram.mProbabilityInfo = ProbabilityInfo.max( + existingBigram.mProbabilityInfo, bigram.mProbabilityInfo); } } } @@ -393,13 +400,13 @@ public final class FusionDictionary implements Iterable<WordProperty> { * they will be added to the dictionary as necessary. * * @param word the word to add. - * @param frequency the frequency of the word, in the range [0..255]. + * @param probabilityInfo probability information of the word. * @param shortcutTargets a list of shortcut targets for this word, or null. * @param isNotAWord true if this should not be considered a word (e.g. shortcut only) */ - public void add(final String word, final int frequency, + public void add(final String word, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { - add(getCodePoints(word), frequency, shortcutTargets, isNotAWord, + add(getCodePoints(word), probabilityInfo, shortcutTargets, isNotAWord, false /* isBlacklistEntry */); } @@ -412,7 +419,8 @@ public final class FusionDictionary implements Iterable<WordProperty> { */ public void addBlacklistEntry(final String word, final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) { - add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */); + add(getCodePoints(word), new ProbabilityInfo(0), shortcutTargets, isNotAWord, + true /* isBlacklistEntry */); } /** @@ -438,21 +446,22 @@ public final class FusionDictionary implements Iterable<WordProperty> { * * @param word0 the previous word of the context * @param word1 the next word of the context - * @param frequency the bigram frequency + * @param probabilityInfo the bigram probability info */ - public void setBigram(final String word0, final String word1, final int frequency) { + public void setBigram(final String word0, final String word1, + final ProbabilityInfo probabilityInfo) { PtNode ptNode0 = findWordInTree(mRootNodeArray, word0); if (ptNode0 != null) { final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1); if (ptNode1 == null) { - add(getCodePoints(word1), 0, null, false /* isNotAWord */, + add(getCodePoints(word1), new ProbabilityInfo(0), null, false /* isNotAWord */, false /* isBlacklistEntry */); // The PtNode for the first word may have moved by the above insertion, // if word1 and word2 share a common stem that happens not to have been // a cutting point until now. In this case, we need to refresh ptNode. ptNode0 = findWordInTree(mRootNodeArray, word0); } - ptNode0.addBigram(word1, frequency); + ptNode0.addBigram(word1, probabilityInfo); } else { throw new RuntimeException("First word of bigram not found " + word0); } @@ -465,15 +474,15 @@ public final class FusionDictionary implements Iterable<WordProperty> { * an exception is thrown. * * @param word the word, as an int array. - * @param frequency the frequency of the word, in the range [0..255]. + * @param probabilityInfo the probability information of the word. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so) * @param isBlacklistEntry true if this is a blacklisted word, false otherwise */ - private void add(final int[] word, final int frequency, + private void add(final int[] word, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord, final boolean isBlacklistEntry) { - assert(frequency >= 0 && frequency <= 255); + assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY); if (word.length >= Constants.DICTIONARY_MAX_WORD_LENGTH) { MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length); return; @@ -501,7 +510,8 @@ public final class FusionDictionary implements Iterable<WordProperty> { // No node at this point to accept the word. Create one. final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length), - shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry); + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, + isBlacklistEntry); currentNodeArray.mData.add(insertionIndex, newPtNode); if (DBG) checkStack(currentNodeArray); } else { @@ -511,15 +521,15 @@ public final class FusionDictionary implements Iterable<WordProperty> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old PtNode was not a terminal, // make it one by filling in its frequency and other attributes - currentPtNode.update(frequency, shortcutTargets, null, isNotAWord, + currentPtNode.update(probabilityInfo, shortcutTargets, null, isNotAWord, isBlacklistEntry); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final PtNode newNode = new PtNode( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency, isNotAWord, - isBlacklistEntry); + shortcutTargets, null /* bigrams */, probabilityInfo, + isNotAWord, isBlacklistEntry); currentPtNode.mChildren = new PtNodeArray(); currentPtNode.mChildren.mData.add(newNode); } @@ -527,7 +537,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new shortcuts to the existing shortcut list if it already exists. - currentPtNode.update(frequency, shortcutTargets, null, + currentPtNode.update(probabilityInfo, shortcutTargets, null, currentPtNode.mIsNotAWord && isNotAWord, currentPtNode.mIsBlacklistEntry || isBlacklistEntry); } else { @@ -537,7 +547,7 @@ public final class FusionDictionary implements Iterable<WordProperty> { final PtNode newOldWord = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex, currentPtNode.mChars.length), currentPtNode.mShortcutTargets, - currentPtNode.mBigrams, currentPtNode.mFrequency, + currentPtNode.mBigrams, currentPtNode.mProbabilityInfo, currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry, currentPtNode.mChildren); newChildren.mData.add(newOldWord); @@ -546,16 +556,17 @@ public final class FusionDictionary implements Iterable<WordProperty> { if (charIndex + differentCharIndex >= word.length) { newParent = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), - shortcutTargets, null /* bigrams */, frequency, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isBlacklistEntry, newChildren); } else { newParent = new PtNode( Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), - null /* shortcutTargets */, null /* bigrams */, -1, - false /* isNotAWord */, false /* isBlacklistEntry */, newChildren); + null /* shortcutTargets */, null /* bigrams */, + null /* probabilityInfo */, false /* isNotAWord */, + false /* isBlacklistEntry */, newChildren); final PtNode newWord = new PtNode(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, null /* bigrams */, frequency, + shortcutTargets, null /* bigrams */, probabilityInfo, isNotAWord, isBlacklistEntry); final int addIndex = word[charIndex + differentCharIndex] > currentPtNode.mChars[differentCharIndex] ? 1 : 0; @@ -617,8 +628,8 @@ public final class FusionDictionary implements Iterable<WordProperty> { private static int findInsertionIndex(final PtNodeArray nodeArray, int character) { final ArrayList<PtNode> data = nodeArray.mData; final PtNode reference = new PtNode(new int[] { character }, - null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */, - false /* isBlacklistEntry */); + null /* shortcutTargets */, null /* bigrams */, null /* probabilityInfo */, + false /* isNotAWord */, false /* isBlacklistEntry */); int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR); return result >= 0 ? result : -result - 1; } @@ -752,8 +763,9 @@ public final class FusionDictionary implements Iterable<WordProperty> { currentPos.length = mCurrentString.length(); mPositions.addLast(currentPos); } - if (currentPtNode.mFrequency >= 0) { - return new WordProperty(mCurrentString.toString(), currentPtNode.mFrequency, + if (currentPtNode.isTerminal()) { + return new WordProperty(mCurrentString.toString(), + currentPtNode.mProbabilityInfo, currentPtNode.mShortcutTargets, currentPtNode.mBigrams, currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry); } diff --git a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java index dafbc04b9..9dcd63f0c 100644 --- a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java +++ b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java @@ -30,6 +30,21 @@ public final class ProbabilityInfo { public final int mLevel; public final int mCount; + public static ProbabilityInfo max(final ProbabilityInfo probabilityInfo1, + final ProbabilityInfo probabilityInfo2) { + if (probabilityInfo1 == null) { + return probabilityInfo2; + } + if (probabilityInfo2 == null) { + return probabilityInfo1; + } + if (probabilityInfo1.mProbability > probabilityInfo2.mProbability) { + return probabilityInfo1; + } else { + return probabilityInfo2; + } + } + public ProbabilityInfo(final int probability) { this(probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP, 0, 0); } diff --git a/java/src/com/android/inputmethod/latin/makedict/PtNodeInfo.java b/java/src/com/android/inputmethod/latin/makedict/PtNodeInfo.java index 188de7a0f..4760aa8d5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/PtNodeInfo.java +++ b/java/src/com/android/inputmethod/latin/makedict/PtNodeInfo.java @@ -29,24 +29,28 @@ public final class PtNodeInfo { public final int mEndAddress; public final int mFlags; public final int[] mCharacters; - public final int mFrequency; + public final ProbabilityInfo mProbabilityInfo; public final int mChildrenAddress; public final int mParentAddress; public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<PendingAttribute> mBigrams; public PtNodeInfo(final int originalAddress, final int endAddress, final int flags, - final int[] characters, final int frequency, final int parentAddress, + final int[] characters, final ProbabilityInfo probabilityInfo, final int parentAddress, final int childrenAddress, final ArrayList<WeightedString> shortcutTargets, final ArrayList<PendingAttribute> bigrams) { mOriginalAddress = originalAddress; mEndAddress = endAddress; mFlags = flags; mCharacters = characters; - mFrequency = frequency; + mProbabilityInfo = probabilityInfo; mParentAddress = parentAddress; mChildrenAddress = childrenAddress; mShortcutTargets = shortcutTargets; mBigrams = bigrams; } + + public boolean isTerminal() { + return mProbabilityInfo != null; + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index d35f780cf..fd6138d41 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -20,7 +20,6 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import android.util.Log; @@ -38,10 +37,82 @@ import java.util.Arrays; public class Ver2DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver2DictDecoder.class.getSimpleName(); - protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { - private static int readFrequency(final DictBuffer dictBuffer) { + /** + * A utility class for reading a PtNode. + */ + protected static class PtNodeReader { + private static ProbabilityInfo readProbabilityInfo(final DictBuffer dictBuffer) { + // Ver2 dicts don't contain historical information. + return new ProbabilityInfo(dictBuffer.readUnsignedByte()); + } + + protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } + + protected static int readChildrenAddress(final DictBuffer dictBuffer, + final int ptNodeFlags) { + switch (ptNodeFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: + return dictBuffer.readUnsignedByte(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: + return dictBuffer.readUnsignedShort(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: + return dictBuffer.readUnsignedInt24(); + case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + + // Reads shortcuts and returns the read length. + protected static int readShortcut(final DictBuffer dictBuffer, + final ArrayList<WeightedString> shortcutTargets) { + final int pointerBefore = dictBuffer.position(); + dictBuffer.readUnsignedShort(); // skip the size + while (true) { + final int targetFlags = dictBuffer.readUnsignedByte(); + final String word = CharEncoding.readString(dictBuffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return dictBuffer.position() - pointerBefore; + } + + protected static int readBigramAddresses(final DictBuffer dictBuffer, + final ArrayList<PendingAttribute> bigrams, final int baseAddress) { + int readLength = 0; + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + final int bigramFlags = dictBuffer.readUnsignedByte(); + ++readLength; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = baseAddress + readLength; + switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * dictBuffer.readUnsignedByte(); + readLength += 1; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * dictBuffer.readUnsignedShort(); + readLength += 2; + break; + case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: + bigramAddress += sign * dictBuffer.readUnsignedInt24(); + readLength += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute( + bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; + } + return readLength; + } } protected final File mDictionaryBinaryFile; @@ -95,8 +166,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder { } final DictionaryHeader header = super.readHeader(mDictBuffer); final int version = header.mFormatOptions.mVersion; - if (!(version >= 2 && version <= 3)) { - throw new UnsupportedFormatException("File header has a wrong version : " + version); + if (version != FormatSpec.VERSION2) { + throw new UnsupportedFormatException("File header has a wrong version : " + version); } return header; } @@ -108,12 +179,6 @@ public class Ver2DictDecoder extends AbstractDictDecoder { int addressPointer = ptNodePos; final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; - - final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); - if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; - } - final int characters[]; if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; @@ -133,14 +198,14 @@ public class Ver2DictDecoder extends AbstractDictDecoder { addressPointer += CharEncoding.getCharSize(character); characters = new int[] { character }; } - final int frequency; + final ProbabilityInfo probabilityInfo; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - frequency = PtNodeReader.readFrequency(mDictBuffer); + probabilityInfo = PtNodeReader.readProbabilityInfo(mDictBuffer); addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE; } else { - frequency = PtNode.NOT_A_TERMINAL; + probabilityInfo = null; } - int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); + int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags); if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { childrenAddress += addressPointer; } @@ -166,8 +231,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder { } else { bigrams = null; } - return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); + return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo, + FormatSpec.NO_PARENT_ADDRESS, childrenAddress, shortcutTargets, bigrams); } @Override @@ -223,40 +288,4 @@ public class Ver2DictDecoder extends AbstractDictDecoder { public boolean hasNextPtNodeArray() { return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS; } - - @Override - public void skipPtNode(final FormatOptions formatOptions) { - final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - PtNodeReader.readParentAddress(mDictBuffer, formatOptions); - BinaryDictIOUtils.skipString(mDictBuffer, - (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readFrequency(mDictBuffer); - if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { - final int shortcutsSize = mDictBuffer.readUnsignedShort(); - mDictBuffer.position(mDictBuffer.position() + shortcutsSize - - FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); - } - if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) { - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - final int bigramFlags = mDictBuffer.readUnsignedByte(); - switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: - mDictBuffer.readUnsignedByte(); - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: - mDictBuffer.readUnsignedShort(); - break; - case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: - mDictBuffer.readUnsignedInt24(); - break; - } - if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break; - } - if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - throw new RuntimeException("Too many bigrams in a PtNode."); - } - } - } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index 3de083ef3..634f63137 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -227,7 +227,7 @@ public class Ver2DictEncoder implements DictEncoder { final PtNode target = FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; - final int unigramFrequencyForThisWord = target.mFrequency; + final int unigramFrequencyForThisWord = target.getProbability(); final int offset = addressOfBigram - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), @@ -251,7 +251,7 @@ public class Ver2DictEncoder implements DictEncoder { writePtNodeFlags(ptNode, formatOptions); writeParentPosition(parentPosition, ptNode, formatOptions); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); - writeFrequency(ptNode.mFrequency); + writeFrequency(ptNode.getProbability()); writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 83707480d..1c74852fe 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -81,14 +81,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder { // Insert unigrams to the fusion dictionary. for (final WordProperty wordProperty : wordProperties) { - // TODO: Support probability that is -1. - final int probability = wordProperty.getProbability() < 0 ? - 0 : wordProperty.getProbability(); if (wordProperty.mIsBlacklistEntry) { fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord); } else { - fusionDict.add(wordProperty.mWord, probability, + fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, wordProperty.mShortcutTargets, wordProperty.mIsNotAWord); } } @@ -99,7 +96,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } final String word0 = wordProperty.mWord; for (final WeightedString bigram : wordProperty.mBigrams) { - fusionDict.setBigram(word0, bigram.mWord, bigram.getProbability()); + fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); } } return fusionDict; diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index b93a0a525..1fc61e10a 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -43,12 +43,12 @@ public final class WordProperty implements Comparable<WordProperty> { private int mHashCode = 0; - public WordProperty(final String word, final int probability, + public WordProperty(final String word, final ProbabilityInfo probabilityInfo, final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams, final boolean isNotAWord, final boolean isBlacklistEntry) { mWord = word; - mProbabilityInfo = new ProbabilityInfo(probability); + mProbabilityInfo = probabilityInfo; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mIsNotAWord = isNotAWord; diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java index a89f70e1f..4063edccc 100644 --- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java +++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripLayoutHelper.java @@ -504,15 +504,9 @@ final class SuggestionStripLayoutHelper { titleView, infoIcon, null, moreIcon, null); final CharSequence importantNoticeTitle = titleView.getText(); titleView.setTextScaleX(1.0f); // Reset textScaleX. - // When the suggestions strip is displayed first time, stripWidth may be zero. - // Then importantNoticeTitle will be displayed as is without auto text scaleX. - // TODO: Fix the logic to always have a correct value of stripWidth. - if (width > 0) { - // Auto text scaleX to show entire important notice title should be on the strip. - final float titleScaleX = getTextScaleX( - importantNoticeTitle, width, titleView.getPaint()); - titleView.setTextScaleX(titleScaleX); - } + final float titleScaleX = getTextScaleX( + importantNoticeTitle, width, titleView.getPaint()); + titleView.setTextScaleX(titleScaleX); } static void setLayoutWeight(final View v, final float weight, final int height) { @@ -529,7 +523,7 @@ final class SuggestionStripLayoutHelper { final TextPaint paint) { paint.setTextScaleX(1.0f); final int width = getTextWidth(text, paint); - if (width <= maxWidth) { + if (width <= maxWidth || maxWidth <= 0) { return 1.0f; } return maxWidth / (float)width; diff --git a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripView.java b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripView.java index b04a2cb5b..68c891bf3 100644 --- a/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripView.java +++ b/java/src/com/android/inputmethod/latin/suggestions/SuggestionStripView.java @@ -46,6 +46,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; import com.android.inputmethod.latin.suggestions.MoreSuggestions.MoreSuggestionsListener; import com.android.inputmethod.latin.utils.CollectionUtils; +import com.android.inputmethod.latin.utils.ImportantNoticeUtils; import com.android.inputmethod.research.ResearchLogger; import java.util.ArrayList; @@ -220,11 +221,20 @@ public final class SuggestionStripView extends RelativeLayout implements OnClick return false; } - // TODO: This method should be called after this View has been attached and displayed. - public void showImportantNoticeTitle() { - mLayoutHelper.layoutImportantNotice(mImportantNoticeStrip, getWidth()); + // This method checks if we should show the important notice (checks on permanent storage if + // it has been shown once already or not, and if in the setup wizard). If applicable, it shows + // the notice. In all cases, it returns true if it was shown, false otherwise. + public boolean maybeShowImportantNoticeTitle() { + if (!ImportantNoticeUtils.hasNewImportantNotice(getContext()) + || ImportantNoticeUtils.isInSystemSetupWizard(getContext())) { + return false; + } + final int width = getWidth(); + if (width <= 0) return false; + mLayoutHelper.layoutImportantNotice(mImportantNoticeStrip, width); mStripVisibilityGroup.showImportantNoticeStrip(); mImportantNoticeStrip.setOnClickListener(this); + return true; } public void clear() { @@ -415,4 +425,11 @@ public final class SuggestionStripView extends RelativeLayout implements OnClick super.onDetachedFromWindow(); dismissMoreSuggestionsPanel(); } + + @Override + protected void onSizeChanged(int w, int h, int oldw, int oldh) { + // Called by the framework when the size is known. Show the important notice if applicable. + // This may be overriden by showing suggestions later, if applicable. + maybeShowImportantNoticeTitle(); + } } diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index c7061c8c3..eb24df601 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -57,7 +57,9 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_pt_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ - patricia_trie_reading_utils.cpp) \ + patricia_trie_reading_utils.cpp \ + ver2_patricia_trie_node_reader.cpp \ + ver2_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_buffers.cpp \ ver4_dict_constants.cpp \ @@ -65,7 +67,8 @@ LATIN_IME_CORE_SRC_FILES := \ ver4_patricia_trie_node_writer.cpp \ ver4_patricia_trie_policy.cpp \ ver4_patricia_trie_reading_utils.cpp \ - ver4_patricia_trie_writing_helper.cpp) \ + ver4_patricia_trie_writing_helper.cpp \ + ver4_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \ bigram_dict_content.cpp \ probability_dict_content.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp index 824d442e4..086d98b4a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp @@ -16,9 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" -#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" #include "utils/char_utils.h" namespace latinime { @@ -266,27 +264,17 @@ int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inW // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPtReadingHelper::nextPtNodeArray() { - if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { - // Reading invalid position because of a bug or a broken dictionary. - AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", - mReadingState.mPos, mBuffer->getTailPosition()); - ASSERT(false); + int ptNodeCountInArray = 0; + int firstPtNodePos = NOT_A_DICT_POS; + if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid( + mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) { mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - mReadingState.mPos -= mBuffer->getOriginalBufferSize(); - } - mReadingState.mRemainingPtNodeCountInThisArray = - PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, - &mReadingState.mPos); - if (usesAdditionalBuffer) { - mReadingState.mPos += mBuffer->getOriginalBufferSize(); - } + mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray; + mReadingState.mPos = firstPtNodePos; // Count up nodes and node arrays to avoid infinite loop. mReadingState.mTotalPtNodeIndexInThisArrayChain += mReadingState.mRemainingPtNodeCountInThisArray; @@ -317,29 +305,17 @@ void DynamicPtReadingHelper::nextPtNodeArray() { // Follow the forward link and read the next node array if exists. void DynamicPtReadingHelper::followForwardLink() { - if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) { - // Reading invalid position because of bug or broken dictionary. - AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", - mReadingState.mPos, mBuffer->getTailPosition()); - ASSERT(false); + int nextPtNodeArrayPos = NOT_A_DICT_POS; + if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid( + mReadingState.mPos, &nextPtNodeArrayPos)) { mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - mReadingState.mPos -= mBuffer->getOriginalBufferSize(); - } - const int forwardLinkPosition = - DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); - if (usesAdditionalBuffer) { - mReadingState.mPos += mBuffer->getOriginalBufferSize(); - } mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; - if (DynamicPtReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { + if (nextPtNodeArrayPos != NOT_A_DICT_POS) { // Follow the forward link. - mReadingState.mPos += forwardLinkPosition; + mReadingState.mPos = nextPtNodeArrayPos; nextPtNodeArray(); } else { // All node arrays have been read. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index bcc5c7857..cc7b5ff70 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -26,9 +26,9 @@ namespace latinime { -class BufferWithExtendableBuffer; class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; +class PtNodeArrayReader; /* * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and @@ -74,10 +74,10 @@ class DynamicPtReadingHelper { std::vector<int> *const mTerminalPositions; }; - DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, - const PtNodeReader *const ptNodeReader) - : mIsError(false), mReadingState(), mBuffer(buffer), - mPtNodeReader(ptNodeReader), mReadingStateStack() {} + DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader, + const PtNodeArrayReader *const ptNodeArrayReader) + : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader), + mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {} ~DynamicPtReadingHelper() {} @@ -252,8 +252,8 @@ class DynamicPtReadingHelper { // TODO: Introduce error code to track what caused the error. bool mIsError; PtNodeReadingState mReadingState; - const BufferWithExtendableBuffer *const mBuffer; const PtNodeReader *const mPtNodeReader; + const PtNodeArrayReader *const mPtNodeArrayReader; std::vector<PtNodeReadingState> mReadingStateStack; void nextPtNodeArray(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h new file mode 100644 index 000000000..6078d8285 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_ARRAY_READER_H +#define LATINIME_PT_NODE_ARRAY_READER_H + +#include "defines.h" + +namespace latinime { + +// Interface class used to read PtNode array information. +class PtNodeArrayReader { + public: + virtual ~PtNodeArrayReader() {} + + // Returns if the position is valid or not. + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const = 0; + + // Returns if the position is valid or not. NOT_A_DICT_POS is set to outNextPtNodeArrayPos when + // the next array doesn't exist. + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const = 0; + + protected: + PtNodeArrayReader() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeArrayReader); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 84731eb17..faaf44162 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -53,6 +53,21 @@ class PtNodeParams { memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); } + // PtNode read from version 2 dictionary. + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int codePointCount, const int *const codePoints, const int probability, + const int childrenPos, const int shortcutPos, const int bigramPos, + const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS), + mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos), + mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos), + mBigramPos(bigramPos), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + // PtNode with a terminal id. PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, @@ -205,9 +220,7 @@ class PtNodeParams { private: // This class have a public copy constructor to be used as a return value. - - // Disallowing the assignment operator. - PtNodeParams &operator=(PtNodeParams &ptNodeParams); + DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams); const int mHeadPos; const PatriciaTrieReadingUtils::NodeFlags mFlags; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 960c1b936..8172e70b6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - int pos = getRootPosition(); - int wordPos = 0; - - while (true) { - // If we already traversed the tree further than the word is long, there means - // there was no match (or we would have found it). - if (wordPos >= length) return NOT_A_DICT_POS; - int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, - &pos); - const int wChar = forceLowerCaseSearch - ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; - while (true) { - // If there are no more PtNodes in this array, it means we could not - // find a matching character for this depth, therefore there is no match. - if (0 >= ptNodeCount) return NOT_A_DICT_POS; - const int ptNodePos = pos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, - &pos); - if (character == wChar) { - // This is the correct PtNode. Only one PtNode may start with the same char within - // a PtNode array, so either we found our match in this array, or there is - // no match and we can return NOT_A_DICT_POS. So we will check all the - // characters in this PtNode indeed does match. - if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { - character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, - &pos); - while (NOT_A_CODE_POINT != character) { - ++wordPos; - // If we shoot the length of the word we search for, or if we find a single - // character that does not match, as explained above, it means the word is - // not in the dictionary (by virtue of this PtNode being the only one to - // match the word on the first character, but not matching the whole word). - if (wordPos >= length) return NOT_A_DICT_POS; - if (inWord[wordPos] != character) return NOT_A_DICT_POS; - character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( - mDictRoot, &pos); - } - } - // If we come here we know that so far, we do match. Either we are on a terminal - // and we match the length, in which case we found it, or we traverse children. - // If we don't match the length AND don't have children, then a word in the - // dictionary fully matches a prefix of the searched word but not the full word. - ++wordPos; - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - if (wordPos == length) { - return ptNodePos; - } - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - return NOT_A_DICT_POS; - } - // We have children and we are still shorter than the word we are searching for, so - // we need to traverse children. Put the pointer on the children position, and - // break - pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, - flags, &pos); - break; - } else { - // This PtNode does not match, so skip the remaining part and go to the next. - if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, - &pos); - } - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, - flags, &pos); - } - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - mShortcutListPolicy.skipAllShortcuts(&pos); - } - if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - mBigramListPolicy.skipAllBigrams(&pos); - } - } - --ptNodeCount; - } - } + DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int PatriciaTriePolicy::getProbability(const int unigramProbability, @@ -336,99 +257,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::isTerminal(flags)) { - return NOT_A_PROBABILITY; - } - if (PatriciaTrieReadingUtils::isNotAWord(flags) - || PatriciaTrieReadingUtils::isBlacklisted(flags)) { + const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) { // If this is not a word, or if it's a blacklisted entry, it should behave as // having no probability outside of the suggestion process (where it should be used // for shortcuts). return NOT_A_PROBABILITY; } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition( - mDictRoot, &pos), NOT_A_PROBABILITY); + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - return NOT_A_DICT_POS; - } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); - } - return pos; + return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos(); } int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); - if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { - return NOT_A_DICT_POS; - } - PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); - if (PatriciaTrieReadingUtils::isTerminal(flags)) { - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); - } - if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); - } - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - mShortcutListPolicy.skipAllShortcuts(&pos);; - } - return pos; + return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos(); } int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *childDicNodes) const { - int pos = ptNodePos; - const PatriciaTrieReadingUtils::NodeFlags flags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); + PatriciaTrieReadingUtils::NodeFlags flags; + int mergedNodeCodePointCount = 0; int mergedNodeCodePoints[MAX_WORD_LENGTH]; - const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); - const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? - PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos) - : NOT_A_PROBABILITY; - const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? - PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - mDictRoot, flags, &pos) : NOT_A_DICT_POS; - if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { - getShortcutsStructurePolicy()->skipAllShortcuts(&pos); - } - if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - getBigramsStructurePolicy()->skipAllBigrams(&pos); - } - if (mergedNodeCodePointCount <= 0) { - AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); - ASSERT(false); - return pos; - } + int probability = NOT_A_PROBABILITY; + int childrenPos = NOT_A_DICT_POS; + int shortcutPos = NOT_A_DICT_POS; + int bigramPos = NOT_A_DICT_POS; + int siblingPos = NOT_A_DICT_POS; + PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), + getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, + &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), - PatriciaTrieReadingUtils::isBlacklisted(flags) || - PatriciaTrieReadingUtils::isNotAWord(flags), + PatriciaTrieReadingUtils::isBlacklisted(flags) + || PatriciaTrieReadingUtils::isNotAWord(flags), mergedNodeCodePointCount, mergedNodeCodePoints); - return pos; + return siblingPos; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 319c81569..1ce7f85d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -24,6 +24,8 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -40,7 +42,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mMmappedBuffer.get()->getBufferSize() - mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), + mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), + mPtNodeArrayReader(mDictRoot, mDictBufferSize) {} AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -143,6 +147,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const int mDictBufferSize; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; + const Ver2ParticiaTrieNodeReader mPtNodeReader; + const Ver2PtNodeArrayReader mPtNodeArrayReader; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp index 82b3593c8..b4eee5572 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp @@ -17,6 +17,8 @@ #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { @@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; return base + offset; } +/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, + int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, + int *const outBigramPos, int *const outSiblingPos) { + int readingPos = ptNodePos; + const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos); + *outFlags = flags; + *outCodePointCount = getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos); + *outProbability = isTerminal(flags) ? + readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY; + *outChildrenPos = hasChildrenInFlags(flags) ? + readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS; + *outShortcutPos = NOT_A_DICT_POS; + if (hasShortcutTargets(flags)) { + *outShortcutPos = readingPos; + shortcutPolicy->skipAllShortcuts(&readingPos); + } + *outBigramPos = NOT_A_DICT_POS; + if (hasBigrams(flags)) { + *outBigramPos = readingPos; + bigramPolicy->skipAllBigrams(&readingPos); + } + *outSiblingPos = readingPos; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index b28f58336..fa1430ce6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h @@ -23,6 +23,9 @@ namespace latinime { +class DictionaryShortcutsStructurePolicy; +class DictionaryBigramsStructurePolicy; + // TODO: Move to pt_common class PatriciaTrieReadingUtils { public: @@ -101,6 +104,13 @@ class PatriciaTrieReadingUtils { return nodeFlags; } + static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint, + int *const outProbability, int *const outChildrenPos, int *const outShortcutPos, + int *const outBigramPos, int *const outSiblingPos); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..778d7a408 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" + +namespace latinime { + +const PtNodeParams Ver2ParticiaTrieNodeReader::fetchNodeInfoInBufferFromPtNodePos( + const int ptNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mDictSize) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mDictSize); + ASSERT(false); + return PtNodeParams(); + } + PatriciaTrieReadingUtils::NodeFlags flags; + int mergedNodeCodePointCount = 0; + int mergedNodeCodePoints[MAX_WORD_LENGTH]; + int probability = NOT_A_PROBABILITY; + int childrenPos = NOT_A_DICT_POS; + int shortcutPos = NOT_A_DICT_POS; + int bigramPos = NOT_A_DICT_POS; + int siblingPos = NOT_A_DICT_POS; + PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy, + mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, + &childrenPos, &shortcutPos, &bigramPos, &siblingPos); + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return PtNodeParams(); + } + return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints, + probability, childrenPos, shortcutPos, bigramPos, siblingPos); +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h new file mode 100644 index 000000000..dd1a0da51 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { + +class DictionaryBigramsStructurePolicy; +class DictionaryShortcutsStructurePolicy; + +class Ver2ParticiaTrieNodeReader : public PtNodeReader { + public: + Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize, + const DictionaryBigramsStructurePolicy *const bigramPolicy, + const DictionaryShortcutsStructurePolicy *const shortcutPolicy) + : mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy), + mShortuctPolicy(shortcutPolicy) {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader); + + const uint8_t *const mDictBuffer; + const int mDictSize; + const DictionaryBigramsStructurePolicy *const mBigramPolicy; + const DictionaryShortcutsStructurePolicy *const mShortuctPolicy; +}; +} // namespace latinime +#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp new file mode 100644 index 000000000..125ea31dc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" + +namespace latinime { + +bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const { + if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + ptNodeArrayPos, mDictSize); + ASSERT(false); + return false; + } + int readingPos = ptNodeArrayPos; + const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + mDictBuffer, &readingPos); + *outPtNodeCount = ptNodeCountInArray; + *outFirstPtNodePos = readingPos; + return true; +} + +bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const { + if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + forwordLinkPos, mDictSize); + ASSERT(false); + return false; + } + // Ver2 dicts don't have forward links. + *outNextPtNodeArrayPos = NOT_A_DICT_POS; + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h new file mode 100644 index 000000000..77404adf8 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H +#define LATINIME_VER2_PT_NODE_ARRAY_READER_H + +#include <stdint.h> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" + +namespace latinime { + +class Ver2PtNodeArrayReader : public PtNodeArrayReader { + public: + Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize) + : mDictBuffer(dictBuffer), mDictSize(dictSize) {}; + + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const; + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader); + + const uint8_t *const mDictBuffer; + const int mDictSize; +}; +} // namespace latinime +#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h index 69576d8e5..66845bbd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -24,13 +24,14 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" -#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" namespace latinime { class BufferWithExtendableBuffer; class Ver4BigramListPolicy; class Ver4DictBuffers; +class Ver4PatriciaTrieNodeReader; +class Ver4PtNodeArrayReader; class Ver4ShortcutListPolicy; /* @@ -39,11 +40,12 @@ class Ver4ShortcutListPolicy; class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { public: Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, - Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, + Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader, + const PtNodeArrayReader *const ptNodeArrayReader, Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) - : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), - mReadingHelper(mTrieBuffer, mPtNodeReader), - mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + : mTrieBuffer(trieBuffer), mBuffers(buffers), + mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), + mShortcutPolicy(shortcutPolicy) {} virtual ~Ver4PatriciaTrieNodeWriter() {} @@ -114,7 +116,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { BufferWithExtendableBuffer *const mTrieBuffer; Ver4DictBuffers *const mBuffers; - const Ver4PatriciaTrieNodeReader *const mPtNodeReader; DynamicPtReadingHelper mReadingHelper; Ver4BigramListPolicy *const mBigramPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 75d85988c..efc29a0c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -43,7 +43,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); while (!readingHelper.isEnd()) { const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); @@ -70,7 +70,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodePos(ptNodePos); return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); @@ -78,7 +78,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } @@ -158,7 +158,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len shortcutLength); return false; } - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); bool addedNewUnigram = false; if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord, @@ -397,7 +397,7 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( &mTerminalPtNodePositionsForIteratingWords); - DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 9ba5be0c3..692163058 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -29,6 +29,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -47,8 +48,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), - mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, - &mShortcutPolicy), + mPtNodeArrayReader(mDictBuffer), + mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader, + &mBigramPolicy, &mShortcutPolicy), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), @@ -132,6 +134,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4BigramListPolicy mBigramPolicy; Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; + Ver4PtNodeArrayReader mPtNodeArrayReader; Ver4PatriciaTrieNodeWriter mNodeWriter; DynamicPtUpdatingHelper mUpdatingHelper; Ver4PatriciaTrieWritingHelper mWritingHelper; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 672097455..acf099122 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -26,6 +26,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -74,14 +75,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int *const outUnigramCount, int *const outBigramCount) { Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), mBuffers->getProbabilityDictContent()); + Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), mBuffers->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), mBuffers->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), - mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); - DynamicPtReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); + DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted @@ -124,7 +126,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); @@ -136,12 +138,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instances for the GCed dictionary. Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), buffersToWrite->getProbabilityDictContent()); + Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), buffersToWrite->getTerminalPositionLookupTable()); Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), - buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); + buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, + &newShortcutPolicy); // Re-assign terminal IDs for valid terminal PtNodes. TerminalPositionLookupTable::TerminalIdMap terminalIdMap; if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( @@ -163,8 +167,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, mBuffers->getShortcutDictContent())) { return false; } - DynamicPtReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), - &newPtNodeReader); + DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp new file mode 100644 index 000000000..bbdf40cdd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const { + if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + ptNodeArrayPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = ptNodeArrayPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + dictBuf, &readingPos); + if (usesAdditionalBuffer) { + readingPos += mBuffer->getOriginalBufferSize(); + } + if (ptNodeCountInArray < 0) { + AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray); + return false; + } + *outPtNodeCount = ptNodeCountInArray; + *outFirstPtNodePos = readingPos; + return true; +} + +bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const { + if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + forwordLinkPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = forwordLinkPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int nextPtNodeArrayOffset = + DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos); + if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) { + *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset; + } else { + *outNextPtNodeArrayPos = NOT_A_DICT_POS; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h new file mode 100644 index 000000000..d81808efc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PT_NODE_ARRAY_READER_H +#define LATINIME_VER4_PT_NODE_ARRAY_READER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class Ver4PtNodeArrayReader : public PtNodeArrayReader { + public: + Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {}; + + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const; + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader); + + const BufferWithExtendableBuffer *const mBuffer; +}; +} // namespace latinime +#endif /* LATINIME_VER4_PT_NODE_ARRAY_READER_H */ diff --git a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java index e211d94b8..cf528d010 100644 --- a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java @@ -20,6 +20,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.SmallTest; import com.android.inputmethod.latin.makedict.FusionDictionary; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import java.util.HashMap; @@ -33,16 +34,16 @@ public class FusionDictionaryTests extends AndroidTestCase { FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(new HashMap<String,String>())); - dict.add("abc", 10, null, false /* isNotAWord */); + dict.add("abc", new ProbabilityInfo(10), null, false /* isNotAWord */); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "abc")); - dict.add("aa", 10, null, false /* isNotAWord */); + dict.add("aa", new ProbabilityInfo(10), null, false /* isNotAWord */); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aaa")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "aa")); - dict.add("babcd", 10, null, false /* isNotAWord */); - dict.add("bacde", 10, null, false /* isNotAWord */); + dict.add("babcd", new ProbabilityInfo(10), null, false /* isNotAWord */); + dict.add("bacde", new ProbabilityInfo(10), null, false /* isNotAWord */); assertNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "ba")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "babcd")); assertNotNull(FusionDictionary.findWordInTree(dict.mRootNodeArray, "bacde")); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index bc4452fd6..75a1aa3ca 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -151,8 +151,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); } } - dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, - false /* isNotAWord */); + dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), + (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); } } @@ -162,7 +162,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (int w2 : bigrams.valueAt(i)) { - dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); + dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 8d2f5fbbf..b6795ea6d 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.WordProperty; import com.android.inputmethod.latin.utils.CombinedFormatUtils; @@ -112,7 +113,7 @@ public class CombinedInputOutput { String line; String word = null; - int freq = 0; + ProbabilityInfo probabilityInfo = new ProbabilityInfo(0); boolean isNotAWord = false; ArrayList<WeightedString> bigrams = new ArrayList<WeightedString>(); ArrayList<WeightedString> shortcuts = new ArrayList<WeightedString>(); @@ -121,9 +122,10 @@ public class CombinedInputOutput { final String args[] = line.trim().split(","); if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) { if (null != word) { - dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); + dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, + isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.getProbability()); + dict.setBigram(word, s.mWord, s.mProbabilityInfo); } } if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>(); @@ -135,14 +137,19 @@ public class CombinedInputOutput { if (CombinedFormatUtils.WORD_TAG.equals(params[0])) { word = params[1]; } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { - freq = Integer.parseInt(params[1]); + probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + probabilityInfo.mTimestamp, probabilityInfo.mLevel, + probabilityInfo.mCount); } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { final String[] historicalInfoParams = params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { throw new RuntimeException("Wrong format (historical info) : " + line); } - // TODO: Use parsed historical info. + probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) { isNotAWord = "true".equals(params[1]); } @@ -168,34 +175,40 @@ public class CombinedInputOutput { } } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) { String secondWordOfBigram = null; - int bigramFreq = 0; + ProbabilityInfo bigramProbabilityInfo = new ProbabilityInfo(0); for (String param : args) { final String params[] = param.split("=", 2); if (2 != params.length) throw new RuntimeException("Wrong format : " + line); if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) { secondWordOfBigram = params[1]; } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) { - bigramFreq = Integer.parseInt(params[1]); + bigramProbabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]), + bigramProbabilityInfo.mTimestamp, bigramProbabilityInfo.mLevel, + bigramProbabilityInfo.mCount); } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) { final String[] historicalInfoParams = params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR); if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) { throw new RuntimeException("Wrong format (historical info) : " + line); } - // TODO: Use parsed historical info. + bigramProbabilityInfo = new ProbabilityInfo( + bigramProbabilityInfo.mProbability, + Integer.parseInt(historicalInfoParams[0]), + Integer.parseInt(historicalInfoParams[1]), + Integer.parseInt(historicalInfoParams[2])); } } if (null != secondWordOfBigram) { - bigrams.add(new WeightedString(secondWordOfBigram, bigramFreq)); + bigrams.add(new WeightedString(secondWordOfBigram, bigramProbabilityInfo)); } else { throw new RuntimeException("Wrong format : " + line); } } } if (null != word) { - dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); + dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.getProbability()); + dict.setBigram(word, s.mWord, s.mProbabilityInfo); } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 9947608ea..ce9b9f306 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -118,9 +118,10 @@ public class Diff extends Dicttool.Command { hasDifferences = true; } else { // We found the word. Compare frequencies, shortcuts, bigrams - if (word0Property.getProbability() != word1PtNode.getFrequency()) { + if (word0Property.getProbability() != word1PtNode.getProbability()) { System.out.println("Probability changed: " + word0Property.mWord + " " - + word0Property.getProbability() + " -> " + word1PtNode.getFrequency()); + + word0Property.getProbability() + " -> " + + word1PtNode.getProbability()); hasDifferences = true; } if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index c1eb0f8e7..178df5cec 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -72,7 +72,7 @@ public class Info extends Dicttool.Command { return; } System.out.println("Word: " + word); - System.out.println(" Freq: " + ptNode.getFrequency()); + System.out.println(" Freq: " + ptNode.getProbability()); if (ptNode.getIsNotAWord()) { System.out.println(" Is not a word"); } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index c6c60b8e2..2ac842a80 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -20,6 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.WordProperty; import java.io.BufferedReader; @@ -66,6 +67,7 @@ public class XmlDictInputOutput { private static final int START = 1; private static final int WORD = 2; private static final int UNKNOWN = 3; + private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1; FusionDictionary mDictionary; int mState; // the state of the parser @@ -90,7 +92,8 @@ public class XmlDictInputOutput { final FusionDictionary dict = mDictionary; for (final String shortcutOnly : mShortcutsMap.keySet()) { if (dict.hasWord(shortcutOnly)) continue; - dict.add(shortcutOnly, 1, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); + dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY), + mShortcutsMap.get(shortcutOnly), true /* isNotAWord */); } mDictionary = null; mShortcutsMap.clear(); @@ -138,7 +141,8 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { - mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */); + mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord), + false /* isNotAWord */); mState = START; } } @@ -319,7 +323,7 @@ public class XmlDictInputOutput { final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); for (final WeightedString bigram : bigramList) { if (!dict.hasWord(bigram.mWord)) continue; - dict.setBigram(firstWord, bigram.mWord, bigram.getProbability()); + dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo); } } return dict; @@ -354,7 +358,6 @@ public class XmlDictInputOutput { } // TODO: use an XMLSerializer if this gets big destination.write("<wordlist format=\"2\""); - final HashMap<String, String> options = dict.mOptions.mAttributes; for (final String key : dict.mOptions.mAttributes.keySet()) { final String value = dict.mOptions.mAttributes.get(key); destination.write(" " + key + "=\"" + value + "\""); diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 9ed4dd5a2..69b49f015 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -24,6 +24,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; +import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.Ver2DictEncoder; @@ -53,11 +54,11 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE); testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions); - dict.add("foo", TEST_FREQ, null, false /* isNotAWord */); - dict.add("fta", 1, null, false /* isNotAWord */); - dict.add("ftb", 1, null, false /* isNotAWord */); - dict.add("bar", 1, null, false /* isNotAWord */); - dict.add("fool", 1, null, false /* isNotAWord */); + dict.add("foo", new ProbabilityInfo(TEST_FREQ), null, false /* isNotAWord */); + dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); final File dst = File.createTempFile("testGetRawDict", ".tmp"); dst.deleteOnExit(); @@ -87,7 +88,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { assertEquals("Wrong id attribute", ID, resultDict.mOptions.mAttributes.get( DictionaryHeader.DICTIONARY_ID_KEY)); assertEquals("Dictionary can't be read back correctly", - FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(), + FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getProbability(), TEST_FREQ); } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java index c6e497615..283abcdd1 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictEncoderFlattenTreeTests.java @@ -33,11 +33,11 @@ public class BinaryDictEncoderFlattenTreeTests extends TestCase { public void testFlattenNodes() { final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new DictionaryOptions(new HashMap<String, String>())); - dict.add("foo", 1, null, false /* isNotAWord */); - dict.add("fta", 1, null, false /* isNotAWord */); - dict.add("ftb", 1, null, false /* isNotAWord */); - dict.add("bar", 1, null, false /* isNotAWord */); - dict.add("fool", 1, null, false /* isNotAWord */); + dict.add("foo", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("fta", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("ftb", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("bar", new ProbabilityInfo(1), null, false /* isNotAWord */); + dict.add("fool", new ProbabilityInfo(1), null, false /* isNotAWord */); final ArrayList<PtNodeArray> result = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); assertEquals(4, result.size()); diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java index 191546433..d0d47da96 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java @@ -101,7 +101,7 @@ public class FusionDictionaryTest extends TestCase { prepare(time); for (int i = 0; i < sWords.size(); ++i) { System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i))); - dict.add(sWords.get(i), 180, null, false); + dict.add(sWords.get(i), new ProbabilityInfo(180), null, false); dumpDict(dict); checkDictionary(dict, sWords, i); } |