diff options
54 files changed, 1038 insertions, 604 deletions
diff --git a/java/res/xml-sw600dp/key_styles_enter.xml b/java/res/xml-sw600dp/key_styles_enter.xml new file mode 100644 index 000000000..1d8ccfae3 --- /dev/null +++ b/java/res/xml-sw600dp/key_styles_enter.xml @@ -0,0 +1,191 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2013, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> + +<merge + xmlns:latin="http://schemas.android.com/apk/res/com.android.inputmethod.latin" +> + <!-- TODO: Stop using many conditional cases for emoji_key_as_more_key. There are way too many to maintain. --> + <!-- Navigate more keys style --> + <switch> + <!-- latin:passwordInput="true" --> + <case + latin:imeAction="actionNext" + latin:navigatePrevious="true" + > + <key-style + latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" + latin:moreKeys="!text/action_previous_as_more_key" /> + </case> + <case + latin:imeAction="actionNext" + latin:navigatePrevious="false" + > + <key-style + latin:styleName="navigateMoreKeysStyle" /> + </case> + <case + latin:imeAction="actionPrevious" + latin:navigateNext="true" + > + <key-style + latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" + latin:moreKeys="!text/action_next_as_more_key" /> + </case> + <case + latin:imeAction="actionPrevious" + latin:navigateNext="false" + > + <key-style + latin:styleName="navigateMoreKeysStyle" /> + </case> + <case + latin:navigateNext="true" + latin:navigatePrevious="true" + > + <key-style + latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" + latin:moreKeys="!fixedColumnOrder!2,!needsDividers!,!text/action_previous_as_more_key,!text/action_next_as_more_key" /> + </case> + <case + latin:navigateNext="true" + latin:navigatePrevious="false" + > + <key-style + latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" + latin:moreKeys="!text/action_next_as_more_key" /> + </case> + <case + latin:navigateNext="false" + latin:navigatePrevious="true" + > + <key-style + latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" + latin:moreKeys="!text/action_previous_as_more_key" /> + </case> + <case + latin:navigateNext="false" + latin:navigatePrevious="false" + > + <key-style + latin:styleName="navigateMoreKeysStyle" /> + </case> + <default> + <key-style + latin:styleName="navigateMoreKeysStyle" /> + </default> + </switch> + <!-- Enter key style --> + <key-style + latin:styleName="defaultEnterKeyStyle" + latin:code="!code/key_enter" + latin:keyIcon="!icon/enter_key" + latin:keyLabelFlags="preserveCase|autoXScale|followKeyLabelRatio" + latin:keyActionFlags="noKeyPreview" + latin:backgroundType="functional" + latin:parentStyle="navigateMoreKeysStyle" /> + <key-style + latin:styleName="shiftEnterKeyStyle" + latin:code="!code/key_shift_enter" + latin:parentStyle="defaultEnterKeyStyle" /> + <key-style + latin:styleName="defaultActionEnterKeyStyle" + latin:code="!code/key_enter" + latin:keyIcon="!icon/undefined" + latin:backgroundType="action" + latin:parentStyle="defaultEnterKeyStyle" /> + <switch> + <!-- Shift + Enter in textMultiLine field. --> + <case + latin:isMultiLine="true" + latin:keyboardLayoutSetElement="alphabetManualShifted|alphabetShiftLockShifted" + > + <key-style + latin:styleName="enterKeyStyle" + latin:parentStyle="shiftEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionGo" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabel="!text/label_go_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionNext" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabel="!text/label_next_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionPrevious" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabel="!text/label_previous_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionDone" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabel="!text/label_done_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionSend" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabel="!text/label_send_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionSearch" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyIcon="!icon/search_key" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <case + latin:imeAction="actionCustomLabel" + > + <key-style + latin:styleName="enterKeyStyle" + latin:keyLabelFlags="fromCustomActionLabel" + latin:parentStyle="defaultActionEnterKeyStyle" /> + </case> + <!-- imeAction is either actionNone or actionUnspecified. --> + <default> + <key-style + latin:styleName="enterKeyStyle" + latin:parentStyle="defaultEnterKeyStyle" /> + </default> + </switch> +</merge> diff --git a/java/res/xml-sw600dp/rows_symbols_shift.xml b/java/res/xml-sw600dp/rows_symbols_shift.xml index aad047ff6..92299f65d 100644 --- a/java/res/xml-sw600dp/rows_symbols_shift.xml +++ b/java/res/xml-sw600dp/rows_symbols_shift.xml @@ -70,5 +70,7 @@ latin:keyWidth="10%p" /> <include latin:keyboardLayout="@xml/row_symbols_shift4" /> + <include + latin:keyboardLayout="@xml/key_f2" /> </Row> </merge> diff --git a/java/res/xml/key_styles_enter.xml b/java/res/xml/key_styles_enter.xml index 568c60270..083e6a67d 100644 --- a/java/res/xml/key_styles_enter.xml +++ b/java/res/xml/key_styles_enter.xml @@ -190,6 +190,7 @@ > <key-style latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" latin:moreKeys="!text/emoji_key_as_more_key" /> </case> <case @@ -207,6 +208,7 @@ > <key-style latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" latin:moreKeys="!text/emoji_key_as_more_key" /> </case> <case @@ -242,6 +244,7 @@ > <key-style latin:styleName="navigateMoreKeysStyle" + latin:keyLabelFlags="hasPopupHint|preserveCase" latin:moreKeys="!text/emoji_key_as_more_key" /> </case> <default> diff --git a/java/res/xml/row_symbols_shift4.xml b/java/res/xml/row_symbols_shift4.xml index 72d24a321..0909374f4 100644 --- a/java/res/xml/row_symbols_shift4.xml +++ b/java/res/xml/row_symbols_shift4.xml @@ -23,8 +23,4 @@ <include latin:keyboardLayout="@xml/key_space_symbols" /> <include latin:keyboardLayout="@xml/keys_comma_period" /> - - <include - latin:keyboardLayout="@xml/key_f2" /> - </merge> diff --git a/java/res/xml/rows_symbols_shift.xml b/java/res/xml/rows_symbols_shift.xml index 45ada2a61..c4bdb9f38 100644 --- a/java/res/xml/rows_symbols_shift.xml +++ b/java/res/xml/rows_symbols_shift.xml @@ -60,5 +60,8 @@ latin:keyWidth="15%p" /> <include latin:keyboardLayout="@xml/row_symbols_shift4" /> + <Key + latin:keyStyle="enterKeyStyle" + latin:keyWidth="fillRight" /> </Row> </merge> diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java index 9f9fdaa6f..dd98c1703 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java @@ -327,6 +327,9 @@ public final class KeyboardState { } mIsAlphabetMode = false; mIsEmojiMode = true; + // Remember caps lock mode and reset alphabet shift state. + mPrevMainKeyboardWasShiftLocked = mAlphabetShiftState.isShiftLocked(); + mAlphabetShiftState.setShiftLocked(false); mSwitchActions.setEmojiKeyboard(); } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index e744d8d76..65ff95458 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -196,9 +196,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private int mLastSelectionStart = NOT_A_CURSOR_POSITION; private int mLastSelectionEnd = NOT_A_CURSOR_POSITION; - // Whether we are expecting an onUpdateSelection event to fire. If it does when we don't - // "expect" it, it means the user actually moved the cursor. - private boolean mExpectingUpdateSelection; private int mDeleteCount; private long mLastKeyTime; private final TreeSet<Long> mCurrentlyPressedHardwareKeys = CollectionUtils.newTreeSet(); @@ -1083,7 +1080,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { ResearchLogger.latinIME_onUpdateSelection(mLastSelectionStart, mLastSelectionEnd, oldSelStart, oldSelEnd, newSelStart, newSelEnd, composingSpanStart, - composingSpanEnd, mExpectingUpdateSelection, mConnection); + composingSpanEnd, mConnection); } final boolean selectionChanged = mLastSelectionStart != newSelStart @@ -1102,14 +1099,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // TODO: revisit this when LatinIME supports hardware keyboards. // NOTE: the test harness subclasses LatinIME and overrides isInputViewShown(). // TODO: find a better way to simulate actual execution. - if (isInputViewShown() && !mExpectingUpdateSelection + if (isInputViewShown() && !mConnection.isBelatedExpectedUpdate(oldSelStart, newSelStart)) { - // TAKE CARE: there is a race condition when we enter this test even when the user - // did not explicitly move the cursor. This happens when typing fast, where two keys - // turn this flag on in succession and both onUpdateSelection() calls arrive after - // the second one - the first call successfully avoids this test, but the second one - // enters. For the moment we rely on noComposingSpan to further reduce the impact. - // TODO: the following is probably better done in resetEntireInputState(). // it should only happen when the cursor moved, and the very purpose of the // test below is to narrow down whether this happened or not. Likewise with @@ -1154,7 +1145,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mRecapitalizeStatus.deactivate(); mKeyboardSwitcher.updateShiftState(); } - mExpectingUpdateSelection = false; // Make a note of the cursor position mLastSelectionStart = newSelStart; @@ -1733,7 +1723,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } handleCharacter(primaryCode, keyX, keyY, spaceState); } - mExpectingUpdateSelection = true; return didAutoCorrect; } @@ -1799,7 +1788,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } else { commitTyped(LastComposedWord.NOT_A_SEPARATOR); } - mExpectingUpdateSelection = true; } final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (Character.isLetterOrDigit(codePointBeforeCursor) @@ -1989,7 +1977,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mWordComposer.setBatchInputWord(batchInputText); mConnection.setComposingText(batchInputText, 1); } - mExpectingUpdateSelection = true; mConnection.endBatchEdit(); if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0, suggestedWords); @@ -2043,9 +2030,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } private void handleBackspace(final int spaceState) { - // We revert these in this method if the deletion doesn't happen. mDeleteCount++; - mExpectingUpdateSelection = true; // In many cases, we may have to put the keyboard in auto-shift state again. However // we want to wait a few milliseconds before doing it to avoid the keyboard flashing @@ -2138,10 +2123,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (codePointBeforeCursor == Constants.NOT_A_CODE) { - // Nothing to delete before the cursor. We have to revert the deletion states - // that were updated at the beginning of this method. - mDeleteCount--; - mExpectingUpdateSelection = false; + // Nothing to delete before the cursor. return; } final int lengthToDelete = @@ -2650,7 +2632,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection, separator, mWordComposer.isBatchMode(), suggestedWords); } - mExpectingUpdateSelection = true; commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD, separator); if (!typedWord.equals(autoCorrection)) { @@ -2721,7 +2702,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // typed word. final String replacedWord = mWordComposer.getTypedWord(); LatinImeLogger.logOnManualSuggestion(replacedWord, suggestion, index, suggestedWords); - mExpectingUpdateSelection = true; commitChosenWord(suggestion, LastComposedWord.COMMIT_TYPE_MANUAL_PICK, LastComposedWord.NOT_A_SEPARATOR); if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index 3dbeee099..b8636eecd 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -43,7 +43,7 @@ public interface DictDecoder { public FileHeader readHeader() throws IOException, UnsupportedFormatException; /** - * Reads PtNode from nodeAddress. + * Reads PtNode from ptNodePos. * @param ptNodePos the position of PtNode. * @param formatOptions the format options. * @return PtNodeInfo. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 64538c102..add03c323 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -51,9 +51,30 @@ public class Ver4DictDecoder extends AbstractDictDecoder { protected DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; - private DictBuffer mShortcutBuffer; private BigramContentReader mBigramReader; - private SparseTable mShortcutAddressTable; + private ShortcutContentReader mShortcutReader; + + /** + * Raw PtNode info straight out of a trie file in version 4 dictionary. + */ + protected static final class Ver4PtNodeInfo { + public final int mFlags; + public final int[] mCharacters; + public final int mTerminalId; + public final int mChildrenPos; + public final int mParentPos; + public final int mNodeSize; + + public Ver4PtNodeInfo(final int flags, final int[] characters, final int terminalId, + final int childrenPos, final int parentPos, final int nodeSize) { + mFlags = flags; + mCharacters = characters; + mTerminalId = terminalId; + mChildrenPos = childrenPos; + mParentPos = parentPos; + mNodeSize = nodeSize; + } + } @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { @@ -110,8 +131,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder { mBigramReader = new BigramContentReader(mDictDirectory.getName(), mDictDirectory, mBufferFactory, false); mBigramReader.openBuffers(); - mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); - loadShortcutAddressSparseTable(); + mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory, + mBufferFactory); + mShortcutReader.openBuffers(); } @Override @@ -136,21 +158,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder { return header; } - // TODO: Let's have something like SparseTableContentsReader in this class. - private void loadShortcutAddressSparseTable() throws IOException { - final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - final File contentFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX - + FormatSpec.SHORTCUT_CONTENT_ID); - final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX - + FormatSpec.SHORTCUT_CONTENT_ID); - mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, - new File[] { contentFile, timestampsFile }, - FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); - } - /** * An auxiliary class for reading bigrams. */ @@ -194,34 +201,71 @@ public class Ver4DictDecoder extends AbstractDictDecoder { final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, new SparseTableContentReaderInterface() { - @Override - public void read(final DictBuffer buffer) { - while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, - // remaining bigram entries are ignored. - final int bigramFlags = buffer.readUnsignedByte(); - final int targetTerminalId = buffer.readUnsignedInt24(); - terminalAddressTableBuffer.position( - targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24(); - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - targetAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { - break; + @Override + public void read(final DictBuffer buffer) { + while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, + // remaining bigram entries are ignored. + final int bigramFlags = buffer.readUnsignedByte(); + final int targetTerminalId = buffer.readUnsignedInt24(); + terminalAddressTableBuffer.position(targetTerminalId + * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + final int targetAddress = + terminalAddressTableBuffer.readUnsignedInt24(); + bigrams.add(new PendingAttribute(bigramFlags + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + targetAddress)); + if (0 == (bigramFlags + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { + break; + } + } + if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + throw new RuntimeException("Too many bigrams in a PtNode (" + + bigrams.size() + " but max is " + + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); + } } - } - if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() - + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); - } - } - }); + }); if (bigrams.isEmpty()) return null; return bigrams; } } + /** + * An auxiliary class for reading shortcuts. + */ + protected static class ShortcutContentReader extends SparseTableContentReader { + public ShortcutContentReader(final String name, final File baseDir, + final DictionaryBufferFactory factory) { + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, + new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, factory); + } + + public ArrayList<WeightedString> readShortcuts(final int terminalId) { + final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); + read(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, + new SparseTableContentReaderInterface() { + @Override + public void read(final DictBuffer buffer) { + while (true) { + final int flags = buffer.readUnsignedByte(); + final String word = CharEncoding.readString(buffer); + shortcuts.add(new WeightedString(word, + flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); + if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { + break; + } + } + } + }); + if (shortcuts.isEmpty()) return null; + return shortcuts; + } + } + protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); @@ -233,80 +277,82 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } } - private ArrayList<WeightedString> readShortcuts(final int terminalId) { - if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null; - - final ArrayList<WeightedString> ret = CollectionUtils.newArrayList(); - final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, - terminalId); - mShortcutBuffer.position(posOfShortcuts); - while (true) { - final int flags = mShortcutBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(mShortcutBuffer); - ret.add(new WeightedString(word, - flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return ret; - } + private final int[] mCharacterBufferForReadingVer4PtNodeInfo + = new int[FormatSpec.MAX_WORD_LENGTH]; + /** + * Reads PtNode from ptNodePos in the trie file and returns Ver4PtNodeInfo. + * + * @param ptNodePos the position of PtNode. + * @param options the format options. + * @return Ver4PtNodeInfo. + */ // TODO: Make this buffer thread safe. // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. - private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; - @Override - public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { - int addressPointer = ptNodePos; + protected Ver4PtNodeInfo readVer4PtNodeInfo(final int ptNodePos, final FormatOptions options) { + int readingPos = ptNodePos; final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; + readingPos += FormatSpec.PTNODE_FLAGS_SIZE; - final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); + final int parentPos = PtNodeReader.readParentAddress(mDictBuffer, options); if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; + readingPos += FormatSpec.PARENT_ADDRESS_SIZE; } final int characters[]; if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); while (FormatSpec.INVALID_CHARACTER != character && index < FormatSpec.MAX_WORD_LENGTH) { - mCharacterBuffer[index++] = character; + mCharacterBufferForReadingVer4PtNodeInfo[index++] = character; character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); } - characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); + characters = Arrays.copyOfRange(mCharacterBufferForReadingVer4PtNodeInfo, 0, index); } else { final int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); characters = new int[] { character }; } final int terminalId; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { terminalId = PtNodeReader.readTerminalId(mDictBuffer); - addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; + readingPos += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { terminalId = PtNode.NOT_A_TERMINAL; } + int childrenPos = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); + if (childrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenPos += readingPos; + } + readingPos += BinaryDictIOUtils.getChildrenAddressSize(flags, options); + + return new Ver4PtNodeInfo(flags, characters, terminalId, childrenPos, parentPos, + readingPos - ptNodePos); + } + + @Override + public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { + final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options); + final int frequency; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); + if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) { + frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId); } else { frequency = PtNode.NOT_A_TERMINAL; } - int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); - final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); - final ArrayList<PendingAttribute> bigrams = - mBigramReader.readTargetsAndFrequencies(terminalId, - mTerminalAddressTableBuffer); - - return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); + + final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts( + nodeInfo.mTerminalId); + final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies( + nodeInfo.mTerminalId, mTerminalAddressTableBuffer); + + return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags, + nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos, + shortcutTargets, bigrams); } private void deleteDictFiles() { diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java index 265d55d22..ad509923a 100644 --- a/java/src/com/android/inputmethod/research/ResearchLogger.java +++ b/java/src/com/android/inputmethod/research/ResearchLogger.java @@ -1161,8 +1161,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang public static void latinIME_onUpdateSelection(final int lastSelectionStart, final int lastSelectionEnd, final int oldSelStart, final int oldSelEnd, final int newSelStart, final int newSelEnd, final int composingSpanStart, - final int composingSpanEnd, final boolean expectingUpdateSelection, - final RichInputConnection connection) { + final int composingSpanEnd, final RichInputConnection connection) { String word = ""; if (connection != null) { TextRange range = connection.getWordRangeAtCursor(WHITESPACE_SEPARATORS, 1); @@ -1174,7 +1173,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang final String scrubbedWord = researchLogger.scrubWord(word); researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONUPDATESELECTION, lastSelectionStart, lastSelectionEnd, oldSelStart, oldSelEnd, newSelStart, newSelEnd, - composingSpanStart, composingSpanEnd, expectingUpdateSelection, + composingSpanStart, composingSpanEnd, false /* expectingUpdateSelection */, false /* expectingUpdateSelectionFromLogger */, scrubbedWord); } diff --git a/native/jni/Android.mk b/native/jni/Android.mk index ca6a77997..194cc8e6e 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -72,16 +72,19 @@ LATIN_IME_CORE_SRC_FILES := \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ - dictionary_structure_with_buffer_policy_factory.cpp \ + structure/dictionary_structure_with_buffer_policy_factory.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ + patricia_trie_policy.cpp \ + patricia_trie_reading_utils.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/v3/, \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_utils.cpp \ dynamic_patricia_trie_writing_helper.cpp \ - dynamic_patricia_trie_writing_utils.cpp \ - patricia_trie_policy.cpp \ - patricia_trie_reading_utils.cpp) \ + dynamic_patricia_trie_writing_utils.cpp) \ + suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 8f21c50ec..f623b8d9d 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -25,7 +25,7 @@ #include "jni_common.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 49cfdecac..c319a38d5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -99,7 +99,7 @@ class DicNode { virtual ~DicNode() {} // Init for copy - void initByCopy(const DicNode *dicNode) { + void initByCopy(const DicNode *const dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init(&dicNode->mDicNodeProperties); @@ -107,25 +107,25 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordNodePos which is used for bigram - void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { + // Init for root with prevWordPtNodePos which is used for bigram + void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); - mDicNodeState.init(prevWordNodePos); + mDicNodeState.init(prevWordPtNodePos); PROF_NODE_RESET(mProfiler); } // Init for root with previous word - void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { + void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -138,7 +138,7 @@ class DicNode { mDicNodeState.mDicNodeStatePrevWord.init( dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeProperties.getProbability(), - dicNode->mDicNodeProperties.getPos(), + dicNode->mDicNodeProperties.getPtNodePos(), dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), @@ -148,26 +148,27 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentNode) { + void initAsPassingChild(DicNode *parentDicNode) { mIsUsed = true; - mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; - const int c = parentNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); - mDicNodeState.init(&parentNode->mDicNodeState); - PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); + mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; + const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); + mDicNodeState.init(&parentDicNode->mDicNodeState); + PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); } - void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void initAsChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); + mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -234,7 +235,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -246,29 +247,30 @@ class DicNode { } // Used to get bigram probability in DicNodeUtils - int getPos() const { - return mDicNodeProperties.getPos(); + int getPtNodePos() const { + return mDicNodeProperties.getPtNodePos(); } // Used to get bigram probability in DicNodeUtils - int getPrevWordPos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordTerminalPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } // Used in DicNodeUtils - int getChildrenPos() const { - return mDicNodeProperties.getChildrenPos(); + int getChildrenPtNodeArrayPos() const { + return mDicNodeProperties.getChildrenPtNodeArrayPos(); } int getProbability() const { return mDicNodeProperties.getProbability(); } - AK_FORCE_INLINE bool isTerminalWordNode() const { - const bool isTerminalNodes = mDicNodeProperties.isTerminal(); - const int currentNodeDepth = getNodeCodePointCount(); - const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); - return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; + AK_FORCE_INLINE bool isTerminalDicNode() const { + const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); + const int currentDicNodeDepth = getNodeCodePointCount(); + const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth(); + return isTerminalPtNode && currentDicNodeDepth > 0 + && currentDicNodeDepth == terminalDicNodeDepth; } bool shouldBeFilteredBySafetyNetForBigram() const { @@ -374,8 +376,8 @@ class DicNode { } // Used to commit input partially - int getPrevWordNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -410,7 +412,7 @@ class DicNode { // TODO: Remove once touch path is merged into ProximityInfoState // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. int getNodeCodePoint() const { - const int codePoint = mDicNodeProperties.getNodeCodePoint(); + const int codePoint = mDicNodeProperties.getDicNodeCodePoint(); const DigraphUtils::DigraphCodePointIndex digraphIndex = mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { @@ -423,8 +425,8 @@ class DicNode { // Utils for cost calculation // //////////////////////////////// AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { - return mDicNodeProperties.getNodeCodePoint() - == dicNode->mDicNodeProperties.getNodeCodePoint(); + return mDicNodeProperties.getDicNodeCodePoint() + == dicNode->mDicNodeProperties.getDicNodeCodePoint(); } // TODO: remove diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index ec65114c7..5540b6df5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -22,7 +22,6 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/char_utils.h" namespace latinime { @@ -32,19 +31,20 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); + const int prevWordPtNodePos, DicNode *const newRootDicNode) { + newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *const prevWordLastNode, DicNode *const newRootNode) { - newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { + newRootDicNode->initAsRootWithPreviousWord( + prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); } -/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { - destNode->initByCopy(srcNode); +/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, + DicNode *const destDicNode) { + destDicNode->initByCopy(srcDicNode); } /////////////////////////////////// @@ -52,14 +52,14 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNodeVector *childDicNodes) { + DicNodeVector *const childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } if (!dicNode->isLeavingNode()) { childDicNodes->pushPassingChild(dicNode); } else { - dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); + dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); } } @@ -71,11 +71,11 @@ namespace latinime { */ /* static */ float DicNodeUtils::getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) @@ -85,19 +85,19 @@ namespace latinime { /* static */ int DicNodeUtils::getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - const int unigramProbability = node->getProbability(); - const int wordPos = node->getPos(); - const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + const int unigramProbability = dicNode->getProbability(); + const int ptNodePos = dicNode->getPtNodePos(); + const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); + if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, - wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, + prevWordTerminalPtNodePos, ptNodePos, unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); @@ -109,7 +109,7 @@ namespace latinime { // TODO: Move to char_utils? /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, int *dest) { + const int *const src1, const int16_t length1, int *const dest) { int actualLength0 = 0; for (int i = 0; i < length0; ++i) { if (src0[i] == 0) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 3fb351a61..3f1514a52 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -31,20 +31,20 @@ class MultiBigramMap; class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, - const int16_t length1, int *dest); + const int16_t length1, int *const dest); static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *newRootNode); + const int prevWordPtNodePos, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *prevWordLastNode, DicNode *newRootNode); - static void initByCopy(DicNode *srcNode, DicNode *destNode); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); + static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); static void getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *const multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); @@ -53,7 +53,7 @@ class DicNodeUtils { static int getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 42addae8d..9364e7751 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,14 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, - hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 9e0f62ceb..c41a7243a 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -24,15 +24,14 @@ namespace latinime { /** - * Node for traversing the lexicon trie. + * PtNode information related to the DicNode from the lexicon trie. */ -// TODO: Introduce a dictionary node class which has attribute members required to understand the -// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), - mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} + : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), + mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), + mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} @@ -40,57 +39,57 @@ class DicNodeProperties { void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth) { - mPos = pos; - mChildrenPos = childrenPos; - mNodeCodePoint = nodeCodePoint; + mPtNodePos = pos; + mChildrenPtNodeArrayPos = childrenPos; + mDicNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; - mHasChildren = hasChildren; + mHasChildrenPtNodes = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } // Init for copy - void init(const DicNodeProperties *const nodeProp) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = nodeProp->mNodeCodePoint; - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth; - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth; + mLeavingDepth = dicNodeProp->mLeavingDepth; } // Init as passing child - void init(const DicNodeProperties *const nodeProp, const int codePoint) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child + mLeavingDepth = dicNodeProp->mLeavingDepth; } - int getPos() const { - return mPos; + int getPtNodePos() const { + return mPtNodePos; } - int getChildrenPos() const { - return mChildrenPos; + int getChildrenPtNodeArrayPos() const { + return mChildrenPtNodeArrayPos; } int getProbability() const { return mProbability; } - int getNodeCodePoint() const { - return mNodeCodePoint; + int getDicNodeCodePoint() const { + return mDicNodeCodePoint; } uint16_t getDepth() const { @@ -107,7 +106,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildren || mDepth != mLeavingDepth; + return mHasChildrenPtNodes || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { @@ -118,12 +117,12 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - int mPos; - int mChildrenPos; + int mPtNodePos; + int mChildrenPtNodeArrayPos; int mProbability; - int mNodeCodePoint; + int mDicNodeCodePoint; bool mIsTerminal; - bool mHasChildren; + bool mHasChildrenPtNodes; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index b8986203d..dba57056b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -30,7 +30,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { + mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -50,7 +50,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -60,7 +60,7 @@ class DicNodeStatePrevWord { mPrevWordCount = prevWord->mPrevWordCount; mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; - mPrevWordNodePos = prevWord->mPrevWordNodePos; + mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); } @@ -71,7 +71,7 @@ class DicNodeStatePrevWord { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; int twoWordsLen = DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); if (twoWordsLen >= MAX_WORD_LENGTH) { @@ -116,8 +116,8 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int getPrevWordNodePos() const { - return mPrevWordNodePos; + int getPrevWordPtNodePos() const { + return mPrevWordPtNodePos; } int getPrevWordCodePointAt(const int id) const { @@ -147,7 +147,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordLength; int16_t mPrevWordStart; int16_t mPrevWordProbability; - int mPrevWordNodePos; + int mPrevWordPtNodePos; int mSecondWordFirstInputIndex; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 71f4ef6ea..c2a15a312 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); @@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; - int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 59ead1894..264b46056 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 41f82049f..610de48ab 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy { virtual int getRootPosition() const = 0; - virtual void createAndGetAllChildNodes(const DicNode *const dicNode, + virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord(const int *const inWord, + virtual int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; virtual int getProbability(const int unigramProbability, diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 50f2bbd8d..5070491f4 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_DICT_POS) { + if (mPrevWordPtNodePos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index e0b1c67d9..6e4dda44d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), + : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { @@ -86,11 +86,9 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPos() const { return mPrevWordPos; } + int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } // TODO: REMOVE - void setPrevWordPos(int pos) { mPrevWordPos = pos; } - // TODO: Use proper parameter when changed - int getDicRootPos() const { return 0; } + void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -119,26 +117,13 @@ class DicTraverseSession { return true; } - void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const { - for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { - if (!mProximityInfoStates[i].isUsed()) { - continue; - } - const int pointerId = node->getInputIndex(i); - const std::vector<int> *const searchKeyVector = - mProximityInfoStates[i].getSearchKeyVector(pointerId); - outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(), - searchKeyVector->end()); - } - } - - ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const { + ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const { ProximityType proximityType = UNRELATED_CHAR; for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { if (!mProximityInfoStates[i].isUsed()) { continue; } - const int pointerId = node->getInputIndex(i); + const int pointerId = dicNode->getInputIndex(i); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); // TODO: Make this more generic @@ -192,7 +177,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPos; + int mPrevWordPtNodePos; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 73ccebc88..2eda414f4 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); - traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } @@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } @@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() - ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); + ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); @@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } break; case UNRELATED_CHAR: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; default: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; } } - // Push the node for look-ahead correction + // Push the dicNode for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } @@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode( if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { return; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { @@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode( /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word - * (by the space omission error correction) search path if input dicNode is on a terminal node. + * (by the space omission error correction) search path if input dicNode is on a terminal. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { @@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, processExpandedDicNode(traverseSession, childDicNode); } -// Process the node codepoint as a digraph. This means that composite glyphs like the German +// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, @@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any - * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ @@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, } /** - * Weight child node by aligning it to the key + * Weight child dicNode by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index 1926b9831..de9fc9bbc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index b1170e251..d97501265 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -17,8 +17,8 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 0504b59d5..5de456656 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -22,7 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index a28535fda..baa9cf142 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h index 8cebc3b16..8cebc3b16 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 8a84bd261..960c1b936 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -15,22 +15,22 @@ */ -#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { -void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - int nextPos = dicNode->getChildrenPos(); + int nextPos = dicNode->getChildrenPtNodeArrayPos(); if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", nextPos, mDictBufferSize); @@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, -// it is possible to check for this with advantageous complexity. For each node, we search +// it is possible to check for this with advantageous complexity. For each PtNode array, we search // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller -// than the position we look for, and we have to descend the z node). +// than the position we look for, and we have to descend the z PtNode). /* Parameters : * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) @@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will - // only traverse nodes that are actually a part of the terminal we are searching, so each time - // we enter this loop we are one depth level further than last time. - // The only reason we count nodes is because we want to reduce the probability of infinite + // only traverse PtNodes that are actually a part of the terminal we are searching, so each + // time we enter this loop we are one depth level further than last time. + // The only reason we count PtNodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { @@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = true; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the - // position we should descend THIS node. So we trick the lastCandidatePtNodePos - // so that we will descend this PtNode, not the previous one. + // position we should descend THIS PtNode. So we trick the + // lastCandidatePtNodePos so that we will descend this PtNode, not the previous + // one. lastCandidatePtNodePos = startPos; found = true; } else { @@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = false; } } else { - // Even if we don't have children here, we could still be on the last PtNode of / + // Even if we don't have children here, we could still be on the last PtNode of // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); @@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( return 0; } -// This function gets the position of the terminal node of the exact matching word in the +// This function gets the position of the terminal PtNode of the exact matching word in the // dictionary. If no match is found, it returns NOT_A_DICT_POS. -int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 0f8662aea..4b4c39dfa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp index 7df55815f..82b3593c8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h index 8420ee95a..8420ee95a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index 5724c5d88..126b7681e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index 9755120b0..ab59cc645 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp index 2fa3111d3..4fd2484e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h index 3b36d425f..fac078d0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -20,8 +20,8 @@ #include <stdint.h> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index a8ea69f3c..1ec4d3595 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include <cstdio> #include <cstring> @@ -23,11 +23,11 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" @@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024 const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), @@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return codePointCount; } -int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { @@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; @@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index be97ee1a5..2c722e8ed 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index f108c219f..f3410affc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -22,7 +22,7 @@ namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; -const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; +const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; // Visits all PtNodes in post-order depth first manner. @@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { mReadingState.mPos = NOT_A_DICT_POS; return; } - mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; + mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } - mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( - dictBuf, &mReadingState.mPos); + mReadingState.mRemainingPtNodeCountInThisArray = + PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, + &mReadingState.mPos); if (usesAdditionalBuffer) { mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. - mReadingState.mTotalNodeCount += mReadingState.mNodeCount; - mReadingState.mNodeArrayCount++; - if (mReadingState.mNodeCount < 0 - || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP - || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { + mReadingState.mTotalPtNodeIndexInThisArrayChain += + mReadingState.mRemainingPtNodeCountInThisArray; + mReadingState.mPtNodeArrayIndexInThisArrayChain++; + if (mReadingState.mRemainingPtNodeCountInThisArray < 0 + || mReadingState.mTotalPtNodeIndexInThisArrayChain + > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP + || mReadingState.mPtNodeArrayIndexInThisArrayChain + > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", - mReadingState.mNodeCount, mReadingState.mTotalNodeCount, - MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, - MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); + mReadingState.mRemainingPtNodeCountInThisArray, + mReadingState.mTotalPtNodeIndexInThisArrayChain, + MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, + mReadingState.mPtNodeArrayIndexInThisArrayChain, + MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - if (mReadingState.mNodeCount == 0) { + if (mReadingState.mRemainingPtNodeCountInThisArray == 0) { // Empty node array. Try following forward link. followForwardLink(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h index a71c06971..f8d32c9cd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h @@ -21,9 +21,9 @@ #include <vector> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { @@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodeArrayPos; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextPtNodeArray(); @@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodePos; - mReadingState.mNodeCount = 1; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); fetchPtNodeInfo(); } @@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper { // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount; + return mReadingState.mTotalCodePointCountSinceInitialization; } // Return code point count include the last read node's code points. AK_FORCE_INLINE int getTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); + return mReadingState.mTotalCodePointCountSinceInitialization + + mNodeReader.getCodePointCount(); } AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( @@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE void readNextSiblingNode() { - mReadingState.mNodeCount -= 1; + mReadingState.mRemainingPtNodeCountInThisArray -= 1; mReadingState.mPos = mNodeReader.getSiblingNodePos(); - if (mReadingState.mNodeCount <= 0) { + if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { // All nodes in the current node array have been read. followForwardLink(); if (!isEnd()) { @@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper { // Read the first child node of the current node. AK_FORCE_INLINE void readChildNode() { if (mNodeReader.hasChildren()) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; + mReadingState.mTotalCodePointCountSinceInitialization += + mNodeReader.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. @@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper { // Read the parent node of the current node. AK_FORCE_INLINE void readParentNode() { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; - mReadingState.mNodeCount = 1; + mReadingState.mTotalCodePointCountSinceInitialization += + mNodeReader.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; fetchPtNodeInfo(); } else { mReadingState.mPos = NOT_A_DICT_POS; @@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { - return mReadingState.mPosOfLastPtNodeArrayHead; + return mReadingState.mPosOfThisPtNodeArrayHead; } AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { @@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); - class ReadingState { + // This class encapsulates the reading state of a position in the dictionary. It points at a + // specific PtNode in the dictionary. + class PtNodeReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. - ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), - mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), - mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} + PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0), + mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0), + mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), + mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {} int mPos; - // Node count of a node array. - int mNodeCount; - int mPrevTotalCodePointCount; - int mTotalNodeCount; - int mNodeArrayCount; + // Remaining node count in the current array. + int mRemainingPtNodeCountInThisArray; + int mTotalCodePointCountSinceInitialization; + // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. + int mTotalPtNodeIndexInThisArrayChain; + // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty + // PtNode arrays. + int mPtNodeArrayIndexInThisArrayChain; int mPosOfLastForwardLinkField; - int mPosOfLastPtNodeArrayHead; + int mPosOfThisPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; + static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; // TODO: Introduce error code to track what caused the error. bool mIsError; - ReadingState mReadingState; + PtNodeReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; DynamicPatriciaTrieNodeReader mNodeReader; int mMergedNodeCodePoints[MAX_WORD_LENGTH]; - std::vector<ReadingState> mReadingStateStack; + std::vector<PtNodeReadingState> mReadingStateStack; void nextPtNodeArray(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp index d68446db6..e94925365 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h index 67c3cc57e..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index 052558bfc..e149d6371 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -14,16 +14,16 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index ca8664729..ca8664729 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp index 30ff10cd6..67733660b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include <cstddef> #include <cstdlib> diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h index af76bc6b5..5654105ee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h @@ -20,7 +20,7 @@ #include <cstddef> #include "defines.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp new file mode 100644 index 000000000..b9ee4891c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" + +namespace latinime { + +void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const { + // TODO: Implement. +} + +int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const { + // TODO: Implement. + return 0; +} + +int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_PROBABILITY; +} + +int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { + // TODO: Implement. + return NOT_A_DICT_POS; +} + +bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, + const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1, const int probability) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::flush(const char *const filePath) { + // TODO: Implement. +} + +void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { + // TODO: Implement. +} + +bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { + // TODO: Implement. + return false; +} + +void Ver4PatriciaTriePolicy::getProperty(const char *const query, char *const outResult, + const int maxResultLength) { + // TODO: Implement. +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h new file mode 100644 index 000000000..86a4f8bce --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#define LATINIME_VER4_PATRICIA_TRIE_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" + +namespace latinime { + +class DicNode; +class DicNodeVector; + +// TODO: Implement. +class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { + public: + ~Ver4PatriciaTriePolicy() {} + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const; + + int getCodePointsAndProbabilityAndReturnCodePointCount( + const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const; + + int getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + + int getShortcutPositionOfPtNode(const int ptNodePos) const; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; + + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return 0; + } + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return 0; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return 0; + } + + bool addUnigramWord(const int *const word, const int length, const int probability); + + bool addBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1, const int probability); + + bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, char *const outResult, + const int maxResultLength); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); +}; +} // namespace latinime +#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 5f781d50b..f65583ee4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -20,7 +20,7 @@ #include <cstring> #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 007c19e0a..fd0ac9eb6 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -81,7 +81,7 @@ class TypingTraversal : public Traversal { return false; } const int point0Index = dicNode->getInputIndex(0); - return dicNode->isTerminalWordNode() + return dicNode->isTerminalDicNode() && traverseSession->getProximityInfoState(0)-> hasSpaceProximity(point0Index); } @@ -96,7 +96,7 @@ class TypingTraversal : public Traversal { if (dicNode->isCompletion(inputSize)) { return false; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return false; } const int16_t pointIndex = dicNode->getInputIndex(0); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 32c07e106..d670aad43 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -26,7 +26,6 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -60,9 +59,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; private static final int NUM_OF_SHORTCUTS = 5; - private static final int USE_BYTE_ARRAY = 1; - private static final int USE_BYTE_BUFFER = 2; - private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); private static final SparseArray<List<Integer>> sEmptyBigrams = CollectionUtils.newSparseArray(); @@ -71,21 +67,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { CollectionUtils.newSparseArray(); private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); - private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); - private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); - private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); - private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); - private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = - new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); - private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP = - new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */, - true /* hasTimestamp */); - - private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - public BinaryDictDecoderEncoderTests() { this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); } @@ -124,17 +105,6 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } } - private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) { - if (formatOptions.mVersion == FormatSpec.VERSION4) { - return new Ver4DictEncoder(getContext().getCacheDir()); - } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { - return new Ver3DictEncoder(file); - } else { - throw new RuntimeException("The format option has a wrong version : " - + formatOptions.mVersion); - } - } - private void generateWords(final int number, final Random random, final int[] codePointSet) { final Set<String> wordSet = CollectionUtils.newHashSet(); while (wordSet.size() < number) { @@ -186,7 +156,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { long now = -1, diff = -1; try { - final DictEncoder dictEncoder = getDictEncoder(file, formatOptions); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions, + getContext().getCacheDir()); now = System.currentTimeMillis(); // If you need to dump the dict to a textual file, uncomment the line below and the @@ -241,54 +212,21 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private String outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions) { String result = " : buffer type = " - + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); + + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); result += " : version = " + formatOptions.mVersion; return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; } - private DictionaryOptions getDictionaryOptions(final String id, final String version) { - final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(), - false, false); - options.mAttributes.put("version", version); - options.mAttributes.put("dictionary", id); - return options; - } - - private File setUpDictionaryFile(final String name, final String version) { - File file = null; - try { - file = new File(getContext().getCacheDir(), name + "." + version - + TEST_DICT_FILE_EXTENSION); - file.createNewFile(); - } catch (IOException e) { - // do nothing - } - assertTrue("Failed to create the dictionary file.", file.exists()); - return file; - } - - private DictDecoder getDictDecoder(final File file, final int bufferType, - final FormatOptions formatOptions, final DictionaryOptions dictOptions) { - if (formatOptions.mVersion == FormatSpec.VERSION4) { - final FileHeader header = new FileHeader(0, dictOptions, formatOptions); - return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(), - header.getId() + "." + header.getVersion()), bufferType); - } else { - return FormatSpec.getDictDecoder(file, bufferType); - } - } // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, - final HashMap<String, List<String>> shortcutMap, final int bufferType, - final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + final HashMap<String, List<String>> shortcutMap, final int bufferType) { long now, diff = -1; FusionDictionary dict = null; try { - final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, - dictOptions); + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); now = System.currentTimeMillis(); dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); diff = System.currentTimeMillis() - now; @@ -310,17 +248,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final String dictName = "runReadAndWrite"; final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = setUpDictionaryFile(dictName, dictVersion); + final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.getDictionaryOptions(dictName, dictVersion)); addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, bigrams); checkDictionary(dict, words, bigrams, shortcuts); final long write = timeWritingDictToFile(file, dict, formatOptions); - final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType, - formatOptions, dict.mOptions); + final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); return "PROF: read=" + read + "ms, write=" + write + "ms :" + message + " : " + outputOptions(bufferType, formatOptions); @@ -349,8 +287,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final byte[] buffer = new byte[50 * 3]; final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); for (final String word : sWords) { - Log.d("testReadAndWriteString", "write : " + word); - Arrays.fill(buffer, (byte)0); + Arrays.fill(buffer, (byte) 0); CharEncoding.writeString(buffer, 0, word); dictBuffer.position(0); final String str = CharEncoding.readString(dictBuffer); @@ -361,12 +298,18 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testReadAndWriteWithByteBuffer() { final List<String> results = CollectionUtils.newArrayList(); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION2); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -376,12 +319,18 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testReadAndWriteWithByteArray() { final List<String> results = CollectionUtils.newArrayList(); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); - runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION2); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -437,8 +386,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType, - final FormatOptions formatOptions, final DictionaryOptions dictOptions) { + final SparseArray<List<Integer>> bigrams, final int bufferType) { FileInputStream inStream = null; final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); @@ -448,8 +396,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { long now = -1, diff = -1; try { - final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, - dictOptions); + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); now = System.currentTimeMillis(); dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; @@ -476,20 +423,20 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final FormatSpec.FormatOptions formatOptions, final String message) { final String dictName = "runReadUnigrams"; final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = setUpDictionaryFile(dictName, dictVersion); + final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); // making the dictionary from lists of words. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.getDictionaryOptions(dictName, dictVersion)); addUnigrams(words.size(), dict, words, null /* shortcutMap */); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, - formatOptions, dict.mOptions); + long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, - bufferType, formatOptions, dict.mOptions); + bufferType); return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap + " : " + message + " : " + outputOptions(bufferType, formatOptions); @@ -508,13 +455,18 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { final ArrayList<String> results = CollectionUtils.newArrayList(); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, - VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION2); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -524,13 +476,18 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testReadUnigramsAndBigramsBinaryWithByteArray() { final ArrayList<String> results = CollectionUtils.newArrayList(); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); - runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, - VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION2); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -578,16 +535,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final FormatOptions formatOptions, final String message) { final String dictName = "testGetTerminalPosition"; final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = setUpDictionaryFile(dictName, dictVersion); + final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(dictName, dictVersion)); + BinaryDictUtils.getDictionaryOptions(dictName, dictVersion)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict, formatOptions); - final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY, - formatOptions, dict.mOptions); + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY); try { dictDecoder.openDictBuffer(); } catch (IOException e) { @@ -638,19 +595,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public void testGetTerminalPosition() { final ArrayList<String> results = CollectionUtils.newArrayList(); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION2); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); - - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION2); - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); - runGetTerminalPositionTests(USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, BinaryDictUtils.VERSION2); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); + + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, BinaryDictUtils.VERSION2); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITHOUT_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); + runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, + BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP); for (final String result : results) { Log.d(TAG, result); @@ -660,7 +627,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private void runTestDeleteWord(final FormatOptions formatOptions) { final String dictName = "testDeleteWord"; final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = setUpDictionaryFile(dictName, dictVersion); + final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, + getContext().getCacheDir()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions( @@ -668,15 +636,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); timeWritingDictToFile(file, dict, formatOptions); - final DictUpdater dictUpdater; - if (formatOptions.mVersion == 3) { - dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); - } else if (formatOptions.mVersion == 4) { - dictUpdater = new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); - } else { - throw new RuntimeException("DictUpdater for version " + formatOptions.mVersion - + " doesn't exist."); - } + final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); try { MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, @@ -696,7 +656,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testDeleteWord() { - runTestDeleteWord(VERSION3_WITH_DYNAMIC_UPDATE); - runTestDeleteWord(VERSION4_WITH_DYNAMIC_UPDATE); + runTestDeleteWord(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); + runTestDeleteWord(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index afe5adb73..592f86e94 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -23,6 +23,7 @@ import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -30,24 +31,16 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.File; import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.Random; @LargeTest public class BinaryDictIOUtilsTests extends AndroidTestCase { private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName(); - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3, true); private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); public static final int DEFAULT_MAX_UNIGRAMS = 1500; private final int mMaxUnigrams; - private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - - private static final int VERSION3 = 3; - private static final int VERSION4 = 4; - private static final String[] CHARACTERS = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", @@ -141,7 +134,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { int position = FormatSpec.NOT_VALID_WORD; try { - final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_READONLY_BYTEBUFFER); position = dictDecoder.getTerminalPosition(word); } catch (IOException e) { @@ -159,7 +152,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { * @throws IOException * @throws UnsupportedFormatException */ - private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder, + private static PtNodeInfo findWordByDictDecoder(final DictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { int position = dictDecoder.getTerminalPosition(word); if (position != FormatSpec.NOT_VALID_WORD) { @@ -176,7 +169,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { PtNodeInfo info = null; try { dictDecoder.openDictBuffer(); - info = findWordByBinaryDictReader(dictDecoder, word); + info = findWordByDictDecoder(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { } @@ -186,16 +179,10 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { // return amount of time to insert a word private long insertAndCheckWord(final File file, final String word, final int frequency, final boolean exist, final ArrayList<WeightedString> bigrams, - final ArrayList<WeightedString> shortcuts, final int formatVersion) { + final ArrayList<WeightedString> shortcuts, final FormatOptions formatOptions) { long amountOfTime = -1; try { - final DictUpdater dictUpdater; - if (formatVersion == VERSION3) { - dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); - } else { - throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't" - + " exist."); - } + final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); if (!exist) { assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); @@ -212,18 +199,14 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { return amountOfTime; } - private void deleteWord(final File file, final String word, final int formatVersion) { + private void deleteWord(final File file, final String word, final FormatOptions formatOptions) { try { - final DictUpdater dictUpdater; - if (formatVersion == VERSION3) { - dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); - } else { - throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't" - + " exist."); - } + final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions); dictUpdater.deleteWord(word); } catch (IOException e) { + Log.e(TAG, "Raised an IOException while deleting a word", e); } catch (UnsupportedFormatException e) { + Log.e(TAG, "Raised an UnsupportedFormatException while deleting a word", e); } } @@ -242,23 +225,21 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } } - private void runTestInsertWord(final int formatVersion) { - File file = null; - try { - file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION, - getContext().getCacheDir()); - } catch (IOException e) { - fail("IOException while creating temporary file: " + e); - } + private void runTestInsertWord(final FormatOptions formatOptions) { + final String testName = "testInsertWord"; + final String version = Long.toString(System.currentTimeMillis()); + final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, + getContext().getCacheDir()); // set an initial dictionary. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); + BinaryDictUtils.getDictionaryOptions(testName, version)); dict.add("abcd", 10, null, false); try { - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions, + getContext().getCacheDir()); + dictEncoder.writeDictionary(dict, formatOptions); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } catch (UnsupportedFormatException e) { @@ -266,54 +247,52 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); - insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion); + insertAndCheckWord(file, "abcde", 10, false, null, null, formatOptions); - insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion); + insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatOptions); checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn")); - insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion); + insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatOptions); checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd")); // update the existing word. - insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion); + insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatOptions); // split 1 - insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion); + insertAndCheckWord(file, "ab", 20, false, null, null, formatOptions); // split 2 - insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion); + insertAndCheckWord(file, "ami", 30, false, null, null, formatOptions); - deleteWord(file, "ami", formatVersion); + deleteWord(file, "ami", formatOptions); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami")); - insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion); + insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatOptions); - deleteWord(file, "abcd", formatVersion); + deleteWord(file, "abcd", formatOptions); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd")); } public void testInsertWord() { - runTestInsertWord(VERSION3); + runTestInsertWord(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); } - private void runTestInsertWordWithBigrams(final int formatVersion) { - File file = null; - try { - file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION, - getContext().getCacheDir()); - } catch (IOException e) { - fail("IOException while creating temporary file: " + e); - } + private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) { + final String testName = "testInsertWordWithBigrams"; + final String version = Long.toString(System.currentTimeMillis()); + File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, + getContext().getCacheDir()); // set an initial dictionary. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); + BinaryDictUtils.getDictionaryOptions(testName, version)); dict.add("abcd", 10, null, false); dict.add("efgh", 15, null, false); try { - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions, + getContext().getCacheDir()); + dictEncoder.writeDictionary(dict, formatOptions); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); } catch (UnsupportedFormatException e) { @@ -323,8 +302,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { final ArrayList<WeightedString> banana = new ArrayList<WeightedString>(); banana.add(new WeightedString("banana", 10)); - insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion); - insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion); + insertAndCheckWord(file, "banana", 0, false, null, null, formatOptions); + insertAndCheckWord(file, "recursive", 60, true, banana, null, formatOptions); final PtNodeInfo info = findWordFromFile(file, "recursive"); int bananaPos = getWordPosition(file, "banana"); @@ -334,27 +313,24 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } public void testInsertWordWithBigrams() { - runTestInsertWordWithBigrams(VERSION3); + runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); } - private void runTestRandomWords(final int formatVersion) { - File file = null; - try { - file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION, - getContext().getCacheDir()); - } catch (IOException e) { - } - assertNotNull(file); + private void runTestRandomWords(final FormatOptions formatOptions) { + final String testName = "testRandomWord"; + final String version = Long.toString(System.currentTimeMillis()); + final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions, + getContext().getCacheDir()); // set an initial dictionary. final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, - false)); + BinaryDictUtils.getDictionaryOptions(testName, version)); dict.add("initial", 10, null, false); try { - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); + final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions, + getContext().getCacheDir()); + dictEncoder.writeDictionary(dict, formatOptions); } catch (IOException e) { assertTrue(false); } catch (UnsupportedFormatException e) { @@ -366,7 +342,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { int cnt = 0; for (final String word : sWords) { final long diff = insertAndCheckWord(file, word, - cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion); + cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatOptions); maxTimeToInsert = Math.max(maxTimeToInsert, diff); minTimeToInsert = Math.min(minTimeToInsert, diff); sum += diff; @@ -377,13 +353,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); } - Log.d(TAG, "Test version " + formatVersion); + Log.d(TAG, "Test version " + formatOptions); Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms."); Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms."); Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms."); } public void testRandomWords() { - runTestRandomWords(VERSION3); + runTestRandomWords(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java new file mode 100644 index 000000000..f476738f3 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; + +import java.io.File; +import java.util.HashMap; + +public class BinaryDictUtils { + public static final int USE_BYTE_ARRAY = 1; + public static final int USE_BYTE_BUFFER = 2; + + public static final String TEST_DICT_FILE_EXTENSION = ".testDict"; + + public static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); + public static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); + public static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); + public static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); + public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = + new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); + public static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE_AND_TIMESTAMP = + new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */, + true /* hasTimestamp */); + + public static DictionaryOptions getDictionaryOptions(final String id, final String version) { + final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(), + false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */); + options.mAttributes.put("dictionary", id); + options.mAttributes.put("version", version); + return options; + } + + public static File getDictFile(final String name, final String version, + final FormatOptions formatOptions, final File directory) { + if (formatOptions.mVersion == 2 || formatOptions.mVersion == 3) { + return new File(directory, name + "." + version + TEST_DICT_FILE_EXTENSION); + } else if (formatOptions.mVersion == 4) { + return new File(directory, name + "." + version); + } else { + throw new RuntimeException("the format option has a wrong version : " + + formatOptions.mVersion); + } + } + + public static DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions, + final File cacheDir) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + return new Ver4DictEncoder(cacheDir); + } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { + return new Ver3DictEncoder(file); + } else { + throw new RuntimeException("The format option has a wrong version : " + + formatOptions.mVersion); + } + } + + public static DictUpdater getDictUpdater(final File file, final FormatOptions formatOptions) { + if (formatOptions.mVersion == FormatSpec.VERSION4) { + return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); + } else if (formatOptions.mVersion == 3) { + return new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER); + } else { + throw new RuntimeException("The format option has a wrong version : " + + formatOptions.mVersion); + } + } +} |