diff options
Diffstat (limited to 'java/src/com/android/inputmethod')
25 files changed, 210 insertions, 1509 deletions
diff --git a/java/src/com/android/inputmethod/keyboard/Key.java b/java/src/com/android/inputmethod/keyboard/Key.java index b975b9c70..c8c4d30ef 100644 --- a/java/src/com/android/inputmethod/keyboard/Key.java +++ b/java/src/com/android/inputmethod/keyboard/Key.java @@ -348,8 +348,7 @@ public class Key implements Comparable<Key> { if (StringUtils.codePointCount(mLabel) == 1) { // Use the first letter of the hint label if shiftedLetterActivated flag is // specified. - if (hasShiftedLetterHint() && isShiftedLetterActivated() - && !TextUtils.isEmpty(mHintLabel)) { + if (hasShiftedLetterHint() && isShiftedLetterActivated()) { mCode = mHintLabel.codePointAt(0); } else { mCode = mLabel.codePointAt(0); @@ -687,7 +686,8 @@ public class Key implements Comparable<Key> { } public final boolean hasShiftedLetterHint() { - return (mLabelFlags & LABEL_FLAGS_HAS_SHIFTED_LETTER_HINT) != 0; + return (mLabelFlags & LABEL_FLAGS_HAS_SHIFTED_LETTER_HINT) != 0 + && !TextUtils.isEmpty(mHintLabel); } public final boolean hasHintLabel() { @@ -710,8 +710,9 @@ public class Key implements Comparable<Key> { return (mLabelFlags & LABEL_FLAGS_AUTO_SCALE) == LABEL_FLAGS_AUTO_SCALE; } - public final boolean isShiftedLetterActivated() { - return (mLabelFlags & LABEL_FLAGS_SHIFTED_LETTER_ACTIVATED) != 0; + private final boolean isShiftedLetterActivated() { + return (mLabelFlags & LABEL_FLAGS_SHIFTED_LETTER_ACTIVATED) != 0 + && !TextUtils.isEmpty(mHintLabel); } public final int getMoreKeysColumn() { diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java index dd98c1703..0c80ce206 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardState.java @@ -304,6 +304,7 @@ public final class KeyboardState { mSwitchActions.setSymbolsKeyboard(); mIsAlphabetMode = false; mIsSymbolShifted = false; + mRecapitalizeMode = RecapitalizeStatus.NOT_A_RECAPITALIZE_MODE; // Reset alphabet shift state. mAlphabetShiftState.setShiftLocked(false); mSwitchState = SWITCH_STATE_SYMBOL_BEGIN; @@ -316,6 +317,7 @@ public final class KeyboardState { mSwitchActions.setSymbolsShiftedKeyboard(); mIsAlphabetMode = false; mIsSymbolShifted = true; + mRecapitalizeMode = RecapitalizeStatus.NOT_A_RECAPITALIZE_MODE; // Reset alphabet shift state. mAlphabetShiftState.setShiftLocked(false); mSwitchState = SWITCH_STATE_SYMBOL_BEGIN; @@ -327,6 +329,7 @@ public final class KeyboardState { } mIsAlphabetMode = false; mIsEmojiMode = true; + mRecapitalizeMode = RecapitalizeStatus.NOT_A_RECAPITALIZE_MODE; // Remember caps lock mode and reset alphabet shift state. mPrevMainKeyboardWasShiftLocked = mAlphabetShiftState.isShiftLocked(); mAlphabetShiftState.setShiftLocked(false); diff --git a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java index 1b722c5ce..f708c2cdd 100644 --- a/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java +++ b/java/src/com/android/inputmethod/keyboard/internal/KeyboardTextsSet.java @@ -635,8 +635,8 @@ public final class KeyboardTextsSet { /* 118 */ "\\%,\u2030", }; - /* Language az: Azerbaijani */ - private static final String[] LANGUAGE_az = { + /* Language az_AZ: Azerbaijani (Azerbaijan) */ + private static final String[] LANGUAGE_az_AZ = { // U+00E2: "â" LATIN SMALL LETTER A WITH CIRCUMFLEX /* 0 */ "\u00E2", // U+0259: "ə" LATIN SMALL LETTER SCHWA @@ -2454,8 +2454,8 @@ public final class KeyboardTextsSet { /* 53 */ "!text/double_9qm_rqm", }; - /* Language ne: Nepali */ - private static final String[] LANGUAGE_ne = { + /* Language ne_NP: Nepali (Nepal) */ + private static final String[] LANGUAGE_ne_NP = { /* 0~ */ null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, @@ -3519,7 +3519,7 @@ public final class KeyboardTextsSet { "DEFAULT", LANGUAGE_DEFAULT, /* default */ "af", LANGUAGE_af, /* Afrikaans */ "ar", LANGUAGE_ar, /* Arabic */ - "az", LANGUAGE_az, /* Azerbaijani */ + "az" /* "az_AZ" */, LANGUAGE_az_AZ, /* Azerbaijani (Azerbaijan) */ "be" /* "be_BY" */, LANGUAGE_be_BY, /* Belarusian (Belarus) */ "bg", LANGUAGE_bg, /* Bulgarian */ "ca", LANGUAGE_ca, /* Catalan */ @@ -3551,7 +3551,7 @@ public final class KeyboardTextsSet { "mk", LANGUAGE_mk, /* Macedonian */ "mn" /* "mn_MN" */, LANGUAGE_mn_MN, /* Mongolian (Mongolia) */ "nb", LANGUAGE_nb, /* Norwegian Bokmål */ - "ne", LANGUAGE_ne, /* Nepali */ + "ne" /* "ne_NP" */, LANGUAGE_ne_NP, /* Nepali (Nepal) */ "nl", LANGUAGE_nl, /* Dutch */ "pl", LANGUAGE_pl, /* Polish */ "pt", LANGUAGE_pt, /* Portuguese */ diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java index e6fb9807e..1aee22baf 100644 --- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java @@ -21,7 +21,7 @@ import android.util.Log; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; +import com.android.inputmethod.latin.makedict.Ver2DictEncoder; import java.io.File; import java.io.IOException; @@ -64,7 +64,7 @@ abstract public class AbstractDictionaryWriter { final String tempFilePath = file.getAbsolutePath() + ".temp"; final File tempFile = new File(tempFilePath); try { - final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile); + final DictEncoder dictEncoder = new Ver2DictEncoder(tempFile); writeDictionary(dictEncoder, attributeMap); tempFile.renameTo(file); } catch (IOException e) { diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index db4234c63..95ac3e203 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -367,6 +367,7 @@ public final class BinaryDictionary extends Dictionary { public static class LanguageModelParam { public final int[] mWord0; public final int[] mWord1; + // TODO: this needs to be a list of shortcuts public final int[] mShortcutTarget; public final int mUnigramProbability; public final int mBigramProbability; @@ -375,7 +376,7 @@ public final class BinaryDictionary extends Dictionary { public final boolean mIsBlacklisted; public final int mTimestamp; - // Constructor for unigram. + // Constructor for unigram. TODO: support shortcuts public LanguageModelParam(final String word, final int unigramProbability, final int timestamp) { mWord0 = null; diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index f960c5343..89ef96d7f 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -18,8 +18,6 @@ package com.android.inputmethod.latin; import android.content.Context; -import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -52,7 +50,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter { public void clear() { final HashMap<String, String> attributes = CollectionUtils.newHashMap(); mFusionDictionary = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(attributes, false, false)); + new FusionDictionary.DictionaryOptions(attributes)); } /** diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 6a10131b0..8b466559c 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -100,7 +100,6 @@ import com.android.inputmethod.latin.utils.RecapitalizeStatus; import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.TargetPackageInfoGetterTask; import com.android.inputmethod.latin.utils.TextRange; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; import com.android.inputmethod.research.ResearchLogger; import java.io.FileDescriptor; @@ -1019,11 +1018,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // Only for debug mBoostPersonalizationDictionaryForDebug = currentSettingsValues.mBoostPersonalizationDictionaryForDebug; - if (mBoostPersonalizationDictionaryForDebug) { - UserHistoryForgettingCurveUtils.boostMaxFreqForDebug(); - } else { - UserHistoryForgettingCurveUtils.resetMaxFreqForDebug(); - } } } @@ -1475,6 +1469,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen ResearchLogger.latinIME_maybeDoubleSpacePeriod(textToInsert, false /* isBatchMode */); } + mWordComposer.doubleSpacePeriod(); mKeyboardSwitcher.updateShiftState(); return true; } @@ -1799,10 +1794,19 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (Character.isLetterOrDigit(codePointBeforeCursor) || currentSettingsValues.isUsuallyFollowedBySpace(codePointBeforeCursor)) { + final boolean autoShiftHasBeenOverriden = mKeyboardSwitcher.getKeyboardShiftMode() != + getCurrentAutoCapsState(); mSpaceState = SPACE_STATE_PHANTOM; + if (!autoShiftHasBeenOverriden) { + // When we change the space state, we need to update the shift state of the + // keyboard unless it has been overridden manually. This is happening for example + // after typing some letters and a period, then gesturing; the keyboard is not in + // caps mode yet, but since a gesture is starting, it should go in caps mode, + // unless the user explictly said it should not. + mKeyboardSwitcher.updateShiftState(); + } } mConnection.endBatchEdit(); - mKeyboardSwitcher.updateShiftState(); mWordComposer.setCapitalizedModeAndPreviousWordAtStartComposingTime(getActualCapsMode(), // Prev word is 1st word before cursor getNthPreviousWordForSuggestion(currentSettingsValues, 1 /* nthPreviousWord */)); diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java index 2f81d15d5..5ecfc67b2 100644 --- a/java/src/com/android/inputmethod/latin/WordComposer.java +++ b/java/src/com/android/inputmethod/latin/WordComposer.java @@ -471,7 +471,12 @@ public final class WordComposer { mCapsCount = 0; mDigitsCount = 0; mIsBatchMode = false; - mPreviousWord = mTypedWord.toString(); + final boolean isWhitespace = 1 == StringUtils.codePointCount(separatorString) + && Character.isWhitespace(separatorString.codePointAt(0)); + // If not whitespace, we don't use the previous word for suggestion. This is consistent + // with how we get the previous word for suggestion: see RichInputConnection#spaceRegex and + // LatinIME#getNthPreviousWordForSuggestion. + mPreviousWord = isWhitespace ? mTypedWord.toString() : null; mTypedWord.setLength(0); mCodePointSize = 0; mTrailingSingleQuotesCount = 0; @@ -485,6 +490,13 @@ public final class WordComposer { return lastComposedWord; } + public void doubleSpacePeriod() { + // When a period was entered with a double space, the separator we got has been + // changed by a period (see #commitWord). We should not use the previous word for + // suggestion. + mPreviousWord = null; + } + public void resumeSuggestionOnLastComposedWord(final LastComposedWord lastComposedWord, final String previousWord) { mPrimaryKeyCodes = lastComposedWord.mPrimaryKeyCodes; diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index bbbb8e461..f8fa68f45 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -58,12 +58,9 @@ public abstract class AbstractDictDecoder implements DictDecoder { headerSize); final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE), - 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); + new FusionDictionary.DictionaryOptions(attributes), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 83ee7d685..7f0aa777f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -330,7 +330,7 @@ public final class BinaryDictDecoderUtils { static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions options) { - if (options.mSupportsDynamicUpdate) { + if (options.supportsDynamicUpdate()) { final int address = dictBuffer.readUnsignedInt24(); if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; if ((address & FormatSpec.MSB24) != 0) { @@ -540,11 +540,11 @@ public final class BinaryDictDecoderUtils { } // reach the end of the array. - if (options.mSupportsDynamicUpdate) { + if (options.supportsDynamicUpdate()) { final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink(); if (!hasValidForwardLink) break; } - } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray()); + } while (options.supportsDynamicUpdate() && dictDecoder.hasNextPtNodeArray()); final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos; diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index c0dad3db2..8ba0797de 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -20,7 +20,6 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -161,7 +160,7 @@ public class BinaryDictEncoderUtils { node.mCachedSize = nodeSize; size += nodeSize; } - if (options.mSupportsDynamicUpdate) { + if (options.supportsDynamicUpdate()) { size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } ptNodeArray.mCachedSize = size; @@ -398,7 +397,7 @@ public class BinaryDictEncoderUtils { nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; } } - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; } else if (null != ptNode.mChildren) { nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray, @@ -418,7 +417,7 @@ public class BinaryDictEncoderUtils { ptNode.mCachedSize = nodeSize; size += nodeSize; } - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } if (ptNodeArray.mCachedSize != size) { @@ -534,7 +533,7 @@ public class BinaryDictEncoderUtils { if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug"); } while (changesDone); - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { computeParentAddresses(flatNodes); } final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1); @@ -643,7 +642,7 @@ public class BinaryDictEncoderUtils { byte flags = 0; if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS; if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL; - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { flags |= FormatSpec.FLAG_IS_NOT_MOVED; } else if (true) { switch (childrenAddressSize) { @@ -755,16 +754,11 @@ public class BinaryDictEncoderUtils { } /** - * Makes the 2-byte value for options flags. + * Makes the 2-byte value for options flags. Unused at the moment, and always 0. */ - private static final int makeOptionsValue(final FusionDictionary dictionary, - final FormatOptions formatOptions) { - final DictionaryOptions options = dictionary.mOptions; - final boolean hasBigrams = dictionary.hasBigrams(); - return (options.mFrenchLigatureProcessing ? FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG : 0) - + (options.mGermanUmlautProcessing ? FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG : 0) - + (hasBigrams ? FormatSpec.CONTAINS_BIGRAMS_FLAG : 0) - + (formatOptions.mSupportsDynamicUpdate ? FormatSpec.SUPPORTS_DYNAMIC_UPDATE : 0); + private static final int makeOptionsValue(final FormatOptions formatOptions) { + // TODO: why doesn't this handle CONTAINS_TIMESTAMP_FLAG? + return 0; } /** @@ -852,7 +846,7 @@ public class BinaryDictEncoderUtils { } dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict); } - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS); } if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate @@ -953,7 +947,7 @@ public class BinaryDictEncoderUtils { headerBuffer.write((byte) (0xFF & version)); // Options flags - final int options = makeOptionsValue(dict, formatOptions); + final int options = makeOptionsValue(formatOptions); headerBuffer.write((byte) (0xFF & (options >> 8))); headerBuffer.write((byte) (0xFF & options)); final int headerSizeOffset = headerBuffer.size(); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 86ebf5844..640d778bb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -112,7 +112,7 @@ public final class BinaryDictIOUtils { } if (p.mPosition == p.mNumOfPtNode) { - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { final boolean hasValidForwardLinkAddress = dictDecoder.readAndFollowForwardLink(); if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) { @@ -228,7 +228,7 @@ public final class BinaryDictIOUtils { // a forward link address that we need to consult and possibly resume // search on the next node array in the linked list. if (foundNextPtNode) break; - if (!header.mFormatOptions.mSupportsDynamicUpdate) { + if (!header.mFormatOptions.supportsDynamicUpdate()) { return FormatSpec.NOT_VALID_WORD; } @@ -507,7 +507,7 @@ public final class BinaryDictIOUtils { * Helper method to check whether the node is moved. */ public static boolean isMovedPtNode(final int flags, final FormatOptions options) { - return options.mSupportsDynamicUpdate + return options.supportsDynamicUpdate() && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED); } @@ -516,14 +516,14 @@ public final class BinaryDictIOUtils { */ public static boolean supportsDynamicUpdate(final FormatOptions options) { return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE - && options.mSupportsDynamicUpdate; + && options.supportsDynamicUpdate(); } /** * Helper method to check whether the node is deleted. */ public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) { - return formatOptions.mSupportsDynamicUpdate + return formatOptions.supportsDynamicUpdate() && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED); } @@ -546,7 +546,7 @@ public final class BinaryDictIOUtils { static int getChildrenAddressSize(final int optionFlags, final FormatOptions formatOptions) { - if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; + if (formatOptions.supportsDynamicUpdate()) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return 1; diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java deleted file mode 100644 index 971b4ff9f..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; - -/** - * The utility class to help dynamic updates on the binary dictionary. - * - * All the methods in this class are static. - */ -@UsedForTesting -public final class DynamicBinaryDictIOUtils { - private static final boolean DBG = false; - static final int MAX_JUMPS = 10000; - - private DynamicBinaryDictIOUtils() { - // This utility class is not publicly instantiable. - } - - /* package */ static int markAsDeleted(final int flags) { - return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED; - } - - /** - * Update a parent address in a PtNode that is referred to by ptNodeOriginAddress. - * - * @param dictUpdater the DictUpdater to write. - * @param ptNodeOriginAddress the address of the PtNode. - * @param newParentAddress the absolute address of the parent. - * @param formatOptions file format options. - */ - private static void updateParentAddress(final Ver3DictUpdater dictUpdater, - final int ptNodeOriginAddress, final int newParentAddress, - final FormatOptions formatOptions) { - final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - final int originalPosition = dictBuffer.position(); - dictBuffer.position(ptNodeOriginAddress); - if (!formatOptions.mSupportsDynamicUpdate) { - throw new RuntimeException("this file format does not support parent addresses"); - } - final int flags = dictBuffer.readUnsignedByte(); - if (BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) { - // If the node is moved, the parent address is stored in the destination node. - // We are guaranteed to process the destination node later, so there is no need to - // update anything here. - dictBuffer.position(originalPosition); - return; - } - if (DBG) { - MakedictLog.d("update parent address flags=" + flags + ", " + ptNodeOriginAddress); - } - final int parentOffset = newParentAddress - ptNodeOriginAddress; - BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset); - dictBuffer.position(originalPosition); - } - - /** - * Update parent addresses in a node array stored at ptNodeOriginAddress. - * - * @param dictUpdater the DictUpdater to be modified. - * @param ptNodeOriginAddress the address of the node array to update. - * @param newParentAddress the address to be written. - * @param formatOptions file format options. - */ - private static void updateParentAddresses(final Ver3DictUpdater dictUpdater, - final int ptNodeOriginAddress, final int newParentAddress, - final FormatOptions formatOptions) { - final int originalPosition = dictUpdater.getPosition(); - dictUpdater.setPosition(ptNodeOriginAddress); - do { - final int count = dictUpdater.readPtNodeCount(); - for (int i = 0; i < count; ++i) { - updateParentAddress(dictUpdater, dictUpdater.getPosition(), newParentAddress, - formatOptions); - dictUpdater.skipPtNode(formatOptions); - } - if (!dictUpdater.readAndFollowForwardLink()) break; - if (dictUpdater.getPosition() == FormatSpec.NO_FORWARD_LINK_ADDRESS) break; - } while (formatOptions.mSupportsDynamicUpdate); - dictUpdater.setPosition(originalPosition); - } - - /** - * Update a children address in a PtNode that is addressed by ptNodeOriginAddress. - * - * @param dictUpdater the DictUpdater to write. - * @param ptNodeOriginAddress the address of the PtNode. - * @param newChildrenAddress the absolute address of the child. - * @param formatOptions file format options. - */ - private static void updateChildrenAddress(final Ver3DictUpdater dictUpdater, - final int ptNodeOriginAddress, final int newChildrenAddress, - final FormatOptions formatOptions) { - final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - final int originalPosition = dictBuffer.position(); - dictBuffer.position(ptNodeOriginAddress); - final int flags = dictBuffer.readUnsignedByte(); - BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions); - BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte(); - final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS - ? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position(); - BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset); - dictBuffer.position(originalPosition); - } - - /** - * Helper method to move a PtNode to the tail of the file. - */ - private static int movePtNode(final OutputStream destination, - final Ver3DictUpdater dictUpdater, final PtNodeInfo info, - final int nodeArrayOriginAddress, final int oldNodeAddress, - final FormatOptions formatOptions) throws IOException { - final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - updateParentAddress(dictUpdater, oldNodeAddress, dictBuffer.limit() + 1, formatOptions); - dictBuffer.position(oldNodeAddress); - final int currentFlags = dictBuffer.readUnsignedByte(); - dictBuffer.position(oldNodeAddress); - dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags - & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); - int size = FormatSpec.PTNODE_FLAGS_SIZE; - updateForwardLink(dictUpdater, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions); - size += BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { info }); - return size; - } - - @SuppressWarnings("unused") - private static void updateForwardLink(final Ver3DictUpdater dictUpdater, - final int nodeArrayOriginAddress, final int newNodeArrayAddress, - final FormatOptions formatOptions) { - final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - dictUpdater.setPosition(nodeArrayOriginAddress); - int jumpCount = 0; - while (jumpCount++ < MAX_JUMPS) { - final int count = dictUpdater.readPtNodeCount(); - for (int i = 0; i < count; ++i) { - dictUpdater.readPtNode(dictUpdater.getPosition(), formatOptions); - } - final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); - if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { - dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); - BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress); - return; - } - dictBuffer.position(forwardLinkAddress); - } - if (DBG && jumpCount >= MAX_JUMPS) { - throw new RuntimeException("too many jumps, probably a bug."); - } - } - - /** - * Move a PtNode that is referred to by oldPtNodeOrigin to the tail of the file, and set the - * children address to the byte after the PtNode. - * - * @param fileEndAddress the address of the tail of the file. - * @param codePoints the characters to put inside the PtNode. - * @param length how many code points to read from codePoints. - * @param flags the flags for this PtNode. - * @param frequency the frequency of this terminal. - * @param parentAddress the address of the parent PtNode of this PtNode. - * @param shortcutTargets the shortcut targets for this PtNode. - * @param bigrams the bigrams for this PtNode. - * @param destination the stream representing the tail of the file. - * @param dictUpdater the DictUpdater. - * @param oldPtNodeArrayOrigin the origin of the old PtNode array this PtNode was a part of. - * @param oldPtNodeOrigin the old origin where this PtNode used to be stored. - * @param formatOptions format options for this dictionary. - * @return the size written, in bytes. - * @throws IOException if the file can't be accessed - */ - private static int movePtNode(final int fileEndAddress, final int[] codePoints, - final int length, final int flags, final int frequency, final int parentAddress, - final ArrayList<WeightedString> shortcutTargets, - final ArrayList<PendingAttribute> bigrams, final OutputStream destination, - final Ver3DictUpdater dictUpdater, final int oldPtNodeArrayOrigin, - final int oldPtNodeOrigin, final FormatOptions formatOptions) throws IOException { - int size = 0; - final int newPtNodeOrigin = fileEndAddress + 1; - final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length); - final PtNodeInfo tmpInfo = new PtNodeInfo(newPtNodeOrigin, -1 /* endAddress */, - flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, - shortcutTargets, bigrams); - size = BinaryDictIOUtils.computePtNodeSize(tmpInfo, formatOptions); - final PtNodeInfo newInfo = new PtNodeInfo(newPtNodeOrigin, newPtNodeOrigin + size, - flags, writtenCharacters, frequency, parentAddress, - fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, - bigrams); - movePtNode(destination, dictUpdater, newInfo, oldPtNodeArrayOrigin, oldPtNodeOrigin, - formatOptions); - return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; - } - - /** - * Converts a list of WeightedString to a list of PendingAttribute. - */ - public static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater, - final ArrayList<WeightedString> bigramStrings) - throws IOException, UnsupportedFormatException { - if (bigramStrings == null) return CollectionUtils.newArrayList(); - final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); - for (final WeightedString bigram : bigramStrings) { - final int pos = dictUpdater.getTerminalPosition(bigram.mWord); - if (pos == FormatSpec.NOT_VALID_WORD) { - // TODO: figure out what is the correct thing to do here. - } else { - bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); - } - } - return bigrams; - } - - /** - * Insert a word into a binary dictionary. - * - * @param dictUpdater the dict updater. - * @param destination a stream to the underlying file, with the pointer at the end of the file. - * @param word the word to insert. - * @param frequency the frequency of the new word. - * @param bigramStrings bigram list, or null if none. - * @param shortcuts shortcut list, or null if none. - * @param isBlackListEntry whether this should be a blacklist entry. - * @throws IOException if the file can't be accessed. - * @throws UnsupportedFormatException if the existing dictionary is in an unexpected format. - */ - // TODO: Support batch insertion. - // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. - @UsedForTesting - public static void insertWord(final Ver3DictUpdater dictUpdater, - final OutputStream destination, final String word, final int frequency, - final ArrayList<WeightedString> bigramStrings, - final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, - final boolean isBlackListEntry) - throws IOException, UnsupportedFormatException { - final ArrayList<PendingAttribute> bigrams = resolveBigramPositions(dictUpdater, - bigramStrings); - final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - - final boolean isTerminal = true; - final boolean hasBigrams = !bigrams.isEmpty(); - final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty(); - - // find the insert position of the word. - if (dictBuffer.position() != 0) dictBuffer.position(0); - final FileHeader fileHeader = dictUpdater.readHeader(); - - int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position(); - final int[] codePoints = FusionDictionary.getCodePoints(word); - final int wordLen = codePoints.length; - - for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { - if (wordPos >= wordLen) break; - nodeOriginAddress = dictBuffer.position(); - int nodeParentAddress = -1; - final int ptNodeCount = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer); - boolean foundNextNode = false; - - for (int i = 0; i < ptNodeCount; ++i) { - address = dictBuffer.position(); - final PtNodeInfo currentInfo = dictUpdater.readPtNode(address, - fileHeader.mFormatOptions); - final boolean isMovedNode = BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags, - fileHeader.mFormatOptions); - if (isMovedNode) continue; - nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) - ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; - boolean matched = true; - for (int p = 0; p < currentInfo.mCharacters.length; ++p) { - if (wordPos + p >= wordLen) { - /* - * splitting - * before - * abcd - ef - * - * insert "abc" - * - * after - * abc - d - ef - */ - final int newNodeAddress = dictBuffer.limit(); - final int flags = BinaryDictEncoderUtils.makePtNodeFlags(p > 1, - isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, - false /* isBlackListEntry */, fileHeader.mFormatOptions); - int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p, flags, - frequency, nodeParentAddress, shortcuts, bigrams, destination, - dictUpdater, nodeOriginAddress, address, fileHeader.mFormatOptions); - - final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, - currentInfo.mCharacters.length); - if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentAddresses(dictUpdater, currentInfo.mChildrenAddress, - newNodeAddress + written + 1, fileHeader.mFormatOptions); - } - final PtNodeInfo newInfo2 = new PtNodeInfo( - newNodeAddress + written + 1, -1 /* endAddress */, - currentInfo.mFlags, characters2, currentInfo.mFrequency, - newNodeAddress + 1, currentInfo.mChildrenAddress, - currentInfo.mShortcutTargets, currentInfo.mBigrams); - BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { newInfo2 }); - return; - } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { - if (p > 0) { - /* - * splitting - * before - * ab - cd - * - * insert "ac" - * - * after - * a - b - cd - * | - * - c - */ - - final int newNodeAddress = dictBuffer.limit(); - final int childrenAddress = currentInfo.mChildrenAddress; - - // move prefix - final int prefixFlags = BinaryDictEncoderUtils.makePtNodeFlags(p > 1, - false /* isTerminal */, 0 /* childrenAddressSize*/, - false /* hasShortcut */, false /* hasBigrams */, - false /* isNotAWord */, false /* isBlackListEntry */, - fileHeader.mFormatOptions); - int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p, - prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, - destination, dictUpdater, nodeOriginAddress, address, - fileHeader.mFormatOptions); - - final int[] suffixCharacters = Arrays.copyOfRange( - currentInfo.mCharacters, p, currentInfo.mCharacters.length); - if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentAddresses(dictUpdater, currentInfo.mChildrenAddress, - newNodeAddress + written + 1, fileHeader.mFormatOptions); - } - final int suffixFlags = BinaryDictEncoderUtils.makePtNodeFlags( - suffixCharacters.length > 1, - (currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0, - 0 /* childrenAddressSize */, - (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) - != 0, - (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, - isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); - final PtNodeInfo suffixInfo = new PtNodeInfo( - newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, - suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, - currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, - currentInfo.mBigrams); - written += BinaryDictIOUtils.computePtNodeSize(suffixInfo, - fileHeader.mFormatOptions) + 1; - - final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, - codePoints.length); - final int flags = BinaryDictEncoderUtils.makePtNodeFlags( - newCharacters.length > 1, isTerminal, - 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); - final PtNodeInfo newInfo = new PtNodeInfo( - newNodeAddress + written, -1 /* endAddress */, flags, - newCharacters, frequency, newNodeAddress + 1, - FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); - BinaryDictIOUtils.writeNodes(destination, - new PtNodeInfo[] { suffixInfo, newInfo }); - return; - } - matched = false; - break; - } - } - - if (matched) { - if (wordPos + currentInfo.mCharacters.length == wordLen) { - // the word exists in the dictionary. - // only update the PtNode. - final int newNodeAddress = dictBuffer.limit(); - final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; - final int flags = BinaryDictEncoderUtils.makePtNodeFlags(hasMultipleChars, - isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); - final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress + 1, - -1 /* endAddress */, flags, currentInfo.mCharacters, frequency, - nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, - bigrams); - movePtNode(destination, dictUpdater, newInfo, nodeOriginAddress, address, - fileHeader.mFormatOptions); - return; - } - wordPos += currentInfo.mCharacters.length; - if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { - /* - * found the prefix of the word. - * make new PtNode and link to the PtNode from this PtNode. - * - * before - * ab - cd - * - * insert "abcde" - * - * after - * ab - cd - e - */ - final int newNodeArrayAddress = dictBuffer.limit(); - updateChildrenAddress(dictUpdater, address, newNodeArrayAddress, - fileHeader.mFormatOptions); - final int newNodeAddress = newNodeArrayAddress + 1; - final boolean hasMultipleChars = (wordLen - wordPos) > 1; - final int flags = BinaryDictEncoderUtils.makePtNodeFlags(hasMultipleChars, - isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); - final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); - final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress, -1, flags, - characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, - shortcuts, bigrams); - BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { newInfo }); - return; - } - dictBuffer.position(currentInfo.mChildrenAddress); - foundNextNode = true; - break; - } - } - - if (foundNextNode) continue; - - // reached the end of the array. - final int linkAddressPosition = dictBuffer.position(); - int nextLink = dictBuffer.readUnsignedInt24(); - if ((nextLink & FormatSpec.MSB24) != 0) { - nextLink = -(nextLink & FormatSpec.SINT24_MAX); - } - if (nextLink == FormatSpec.NO_FORWARD_LINK_ADDRESS) { - /* - * expand this node. - * - * before - * ab - cd - * - * insert "abef" - * - * after - * ab - cd - * | - * - ef - */ - - // change the forward link address. - final int newNodeAddress = dictBuffer.limit(); - dictBuffer.position(linkAddressPosition); - BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress); - - final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); - final int flags = BinaryDictEncoderUtils.makePtNodeFlags(characters.length > 1, - isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, - isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); - final PtNodeInfo newInfo = new PtNodeInfo(newNodeAddress + 1, - -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, - FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); - BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[]{ newInfo }); - return; - } else { - depth--; - dictBuffer.position(nextLink); - } - } - } -} diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 846aacf11..2f6f194aa 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -40,12 +40,8 @@ public final class FormatSpec { * p | not used 3 bits * t | each unigram and bigram entry has a time stamp? * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG - * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG - * n | FRENCH_LIGATURE_PROCESSING_FLAG - * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE - * l | GERMAN_UMLAUT_PROCESSING_FLAG - * a | - * gs + * o | + * nflags * * h | * e | size of the file header, 4bytes @@ -82,45 +78,36 @@ public final class FormatSpec { * s * * f | - * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header) - * r | forward link address, 3byte - * w | 1 byte = bbbbbbbb match - * a | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte) - * r | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte - * d | - * linkaddress + * o | forward link address, 3byte + * r | 1 byte = bbbbbbbb match + * w | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte) + * a | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte + * r | + * dlinkaddress */ /* Node (FusionDictionary.PtNode) layout is as follows: - * | IF !SUPPORTS_DYNAMIC_UPDATE - * | addressType xx : mask with MASK_CHILDREN_ADDRESS_TYPE - * | 2 bits, 00 = no children : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS - * f | 01 = 1 byte : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE - * l | 10 = 2 bytes : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES - * a | 11 = 3 bytes : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES - * g | ELSE - * s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED - * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES - * | 01 = yes : FLAG_IS_MOVED - * | the new address is stored in the same place as the parent address - * | is deleted? 10 = yes : FLAG_IS_DELETED - * | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS - * | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL - * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS + * | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED + * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES + * | 01 = yes : FLAG_IS_MOVED + * f | the new address is stored in the same place as the parent address + * l | is deleted? 10 = yes : FLAG_IS_DELETED + * a | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS + * g | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL + * s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED * * p | - * a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header) - * r | parent address, 3byte - * e | 1 byte = bbbbbbbb match - * n | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) - * t | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte - * a | This address is relative to the head of the PtNode. - * d | If the node doesn't have a parent, this field is set to 0. + * a | parent address, 3byte + * r | 1 byte = bbbbbbbb match + * e | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) + * n | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte + * t | This address is relative to the head of the PtNode. + * a | If the node doesn't have a parent, this field is set to 0. * d | - * ress + * dress * * c | IF FLAG_HAS_MULTIPLE_CHARS * h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers @@ -134,23 +121,16 @@ public final class FormatSpec { * e | frequency 1 byte * q | * - * c | IF SUPPORTS_DYNAMIC_UPDATE - * h | children address, 3 bytes - * i | 1 byte = bbbbbbbb match - * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) - * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte - * r | if this node doesn't have children, this field is set to 0. - * e | (see BinaryDictEncoderUtils#writeVariableSignedAddress) - * n | ELSIF 00 = FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS == addressType - * a | // nothing - * d | ELSIF 01 = FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE == addressType - * d | children address, 1 byte - * r | ELSIF 10 = FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES == addressType - * e | children address, 2 bytes - * s | ELSE // 11 = FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES = addressType - * s | children address, 3 bytes - * | END - * | This address is relative to the position of this field. + * c | + * h | children address, 3 bytes + * i | 1 byte = bbbbbbbb match + * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) + * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte + * r | if this node doesn't have children, this field is set to 0. + * e | (see BinaryDictEncoderUtils#writeVariableSignedAddress) + * n | This address is relative to the position of this field. + * a | + * ddress * * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS * | shortcut string list @@ -208,17 +188,12 @@ public final class FormatSpec { // us to change the format during development while having testing devices remove // older files with each upgrade, while still having a readable versioning scheme. public static final int VERSION2 = 2; - public static final int VERSION3 = 3; public static final int VERSION4 = 400; static final int MINIMUM_SUPPORTED_VERSION = VERSION2; static final int MAXIMUM_SUPPORTED_VERSION = VERSION4; // These options need to be the same numeric values as the one in the native reading code. - static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; // TODO: Make the native reading code read this variable. - static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; - static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; - static final int CONTAINS_BIGRAMS_FLAG = 0x8; static final int CONTAINS_TIMESTAMP_FLAG = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and @@ -339,30 +314,23 @@ public final class FormatSpec { */ public static final class FormatOptions { public final int mVersion; - public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; public final boolean mHasTimestamp; - @UsedForTesting - public FormatOptions(final int version) { - this(version, false); - } @UsedForTesting - public FormatOptions(final int version, final boolean supportsDynamicUpdate) { - this(version, supportsDynamicUpdate, false /* hasTimestamp */); + public FormatOptions(final int version) { + this(version, false /* hasTimestamp */); } - public FormatOptions(final int version, final boolean supportsDynamicUpdate, - final boolean hasTimestamp) { + public FormatOptions(final int version, final boolean hasTimestamp) { mVersion = version; - if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { - throw new RuntimeException("Dynamic updates are only supported with versions " - + FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior."); - } - mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); mHasTimestamp = hasTimestamp; } + + public boolean supportsDynamicUpdate() { + return mVersion >= FIRST_VERSION_WITH_DYNAMIC_UPDATE; + } } /** @@ -374,7 +342,6 @@ public final class FormatSpec { public final FormatOptions mFormatOptions; // Note that these are corresponding definitions in native code in latinime::HeaderPolicy // and latinime::HeaderReadWriteUtils. - public static final String SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE = "SUPPORTS_DYNAMIC_UPDATE"; public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE"; public static final String HAS_HISTORICAL_INFO_ATTRIBUTE = "HAS_HISTORICAL_INFO"; public static final String ATTRIBUTE_VALUE_TRUE = "1"; @@ -433,7 +400,7 @@ public final class FormatSpec { if (dictFile.isDirectory()) { return new Ver4DictDecoder(dictFile, bufferType); } else if (dictFile.isFile()) { - return new Ver3DictDecoder(dictFile, bufferType); + return new Ver2DictDecoder(dictFile, bufferType); } return null; } @@ -443,7 +410,7 @@ public final class FormatSpec { if (dictFile.isDirectory()) { return new Ver4DictDecoder(dictFile, factory); } else if (dictFile.isFile()) { - return new Ver3DictDecoder(dictFile, factory); + return new Ver2DictDecoder(dictFile, factory); } return null; } diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 3bb218bea..fdf2ae7b5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -303,14 +303,9 @@ public final class FusionDictionary implements Iterable<Word> { * Options global to the dictionary. */ public static final class DictionaryOptions { - public final boolean mGermanUmlautProcessing; - public final boolean mFrenchLigatureProcessing; public final HashMap<String, String> mAttributes; - public DictionaryOptions(final HashMap<String, String> attributes, - final boolean germanUmlautProcessing, final boolean frenchLigatureProcessing) { + public DictionaryOptions(final HashMap<String, String> attributes) { mAttributes = attributes; - mGermanUmlautProcessing = germanUmlautProcessing; - mFrenchLigatureProcessing = frenchLigatureProcessing; } @Override public String toString() { // Convenience method @@ -339,14 +334,6 @@ public final class FusionDictionary implements Iterable<Word> { } s.append("\n"); } - if (mGermanUmlautProcessing) { - s.append(indent); - s.append("Needs German umlaut processing\n"); - } - if (mFrenchLigatureProcessing) { - s.append(indent); - s.append("Needs French ligature processing\n"); - } return s.toString(); } } @@ -701,138 +688,6 @@ public final class FusionDictionary implements Iterable<Word> { } /** - * Recursively count the number of nodes in a given branch of the trie. - * - * @param nodeArray the node array to count. - * @return the number of nodes in this branch. - */ - public static int countNodeArrays(final PtNodeArray nodeArray) { - int size = 1; - for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { - PtNode ptNode = nodeArray.mData.get(i); - if (null != ptNode.mChildren) - size += countNodeArrays(ptNode.mChildren); - } - return size; - } - - // Recursively find out whether there are any bigrams. - // This can be pretty expensive especially if there aren't any (we return as soon - // as we find one, so it's much cheaper if there are bigrams) - private static boolean hasBigramsInternal(final PtNodeArray nodeArray) { - if (null == nodeArray) return false; - for (int i = nodeArray.mData.size() - 1; i >= 0; --i) { - PtNode ptNode = nodeArray.mData.get(i); - if (null != ptNode.mBigrams) return true; - if (hasBigramsInternal(ptNode.mChildren)) return true; - } - return false; - } - - /** - * Finds out whether there are any bigrams in this dictionary. - * - * @return true if there is any bigram, false otherwise. - */ - // TODO: this is expensive especially for large dictionaries without any bigram. - // The up side is, this is always accurate and correct and uses no memory. We should - // find a more efficient way of doing this, without compromising too much on memory - // and ease of use. - public boolean hasBigrams() { - return hasBigramsInternal(mRootNodeArray); - } - - // Historically, the tails of the words were going to be merged to save space. - // However, that would prevent the code to search for a specific address in log(n) - // time so this was abandoned. - // The code is still of interest as it does add some compression to any dictionary - // that has no need for attributes. Implementations that does not read attributes should be - // able to read a dictionary with merged tails. - // Also, the following code does support frequencies, as in, it will only merges - // tails that share the same frequency. Though it would result in the above loss of - // performance while searching by address, it is still technically possible to merge - // tails that contain attributes, but this code does not take that into account - it does - // not compare attributes and will merge terminals with different attributes regardless. - public void mergeTails() { - MakedictLog.i("Do not merge tails"); - return; - -// MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root)); -// MakedictLog.i("Number of PtNodes : " + countPtNodes(root)); -// -// final HashMap<String, ArrayList<PtNodeArray>> repository = -// new HashMap<String, ArrayList<PtNodeArray>>(); -// mergeTailsInner(repository, root); -// -// MakedictLog.i("Number of different pseudohashes : " + repository.size()); -// int size = 0; -// for (ArrayList<PtNodeArray> a : repository.values()) { -// size += a.size(); -// } -// MakedictLog.i("Number of nodes after merge : " + (1 + size)); -// MakedictLog.i("Recursively seen nodes : " + countNodes(root)); - } - - // The following methods are used by the deactivated mergeTails() -// private static boolean isEqual(PtNodeArray a, PtNodeArray b) { -// if (null == a && null == b) return true; -// if (null == a || null == b) return false; -// if (a.data.size() != b.data.size()) return false; -// final int size = a.data.size(); -// for (int i = size - 1; i >= 0; --i) { -// PtNode aPtNode = a.data.get(i); -// PtNode bPtNode = b.data.get(i); -// if (aPtNode.frequency != bPtNode.frequency) return false; -// if (aPtNode.alternates == null && bPtNode.alternates != null) return false; -// if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false; -// if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false; -// if (!isEqual(aPtNode.children, bPtNode.children)) return false; -// } -// return true; -// } - -// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner( -// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) { -// final ArrayList<PtNode> branches = nodeArray.data; -// final int nodeSize = branches.size(); -// for (int i = 0; i < nodeSize; ++i) { -// PtNode ptNode = branches.get(i); -// if (null != ptNode.children) { -// String pseudoHash = getPseudoHash(ptNode.children); -// ArrayList<PtNodeArray> similarList = map.get(pseudoHash); -// if (null == similarList) { -// similarList = new ArrayList<PtNodeArray>(); -// map.put(pseudoHash, similarList); -// } -// boolean merged = false; -// for (PtNodeArray similar : similarList) { -// if (isEqual(ptNode.children, similar)) { -// ptNode.children = similar; -// merged = true; -// break; -// } -// } -// if (!merged) { -// similarList.add(ptNode.children); -// } -// mergeTailsInner(map, ptNode.children); -// } -// } -// return map; -// } - -// private static String getPseudoHash(final PtNodeArray nodeArray) { -// StringBuilder s = new StringBuilder(); -// for (PtNode ptNode : nodeArray.data) { -// s.append(ptNode.frequency); -// for (int ch : ptNode.chars) { -// s.append(Character.toChars(ch)); -// } -// } -// return s.toString(); -// } - - /** * Iterator to walk through a dictionary. * * This is purely for convenience. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index acab4f8a5..e9667ab0b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -34,18 +34,11 @@ import java.util.ArrayList; import java.util.Arrays; /** - * An implementation of DictDecoder for version 3 binary dictionary. + * An implementation of DictDecoder for version 2 binary dictionary. */ @UsedForTesting -public class Ver3DictDecoder extends AbstractDictDecoder { - private static final String TAG = Ver3DictDecoder.class.getSimpleName(); - - static { - JniUtils.loadNativeLibrary(); - } - - // TODO: implement something sensical instead of just a phony method - private static native int doNothing(); +public class Ver2DictDecoder extends AbstractDictDecoder { + private static final String TAG = Ver2DictDecoder.class.getSimpleName(); protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { private static int readFrequency(final DictBuffer dictBuffer) { @@ -57,7 +50,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder { private final DictionaryBufferFactory mBufferFactory; protected DictBuffer mDictBuffer; - /* package */ Ver3DictDecoder(final File file, final int factoryFlag) { + /* package */ Ver2DictDecoder(final File file, final int factoryFlag) { mDictionaryBinaryFile = file; mDictBuffer = null; @@ -72,7 +65,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder { } } - /* package */ Ver3DictDecoder(final File file, final DictionaryBufferFactory factory) { + /* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) { mDictionaryBinaryFile = file; mBufferFactory = factory; } @@ -166,7 +159,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder { final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); - addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, + addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, addressPointer); if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index 5da34534e..e5430423d 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -31,16 +31,16 @@ import java.util.ArrayList; import java.util.Iterator; /** - * An implementation of DictEncoder for version 3 binary dictionary. + * An implementation of DictEncoder for version 2 binary dictionary. */ -public class Ver3DictEncoder implements DictEncoder { +public class Ver2DictEncoder implements DictEncoder { private final File mDictFile; private OutputStream mOutStream; private byte[] mBuffer; private int mPosition; - public Ver3DictEncoder(final File dictFile) { + public Ver2DictEncoder(final File dictFile) { mDictFile = dictFile; mOutStream = null; mBuffer = null; @@ -49,7 +49,7 @@ public class Ver3DictEncoder implements DictEncoder { // This constructor is used only by BinaryDictOffdeviceUtilsTests. // If you want to use this in the production code, you should consider keeping consistency of // the interface of Ver3DictDecoder by using factory. - public Ver3DictEncoder(final OutputStream outStream) { + public Ver2DictEncoder(final OutputStream outStream) { mDictFile = null; mOutStream = outStream; } @@ -68,7 +68,7 @@ public class Ver3DictEncoder implements DictEncoder { @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { - if (formatOptions.mVersion > FormatSpec.VERSION3) { + if (formatOptions.mVersion > FormatSpec.VERSION2) { throw new UnsupportedFormatException( "The given format options has wrong version number : " + formatOptions.mVersion); @@ -169,7 +169,7 @@ public class Ver3DictEncoder implements DictEncoder { private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); - if (formatOptions.mSupportsDynamicUpdate) { + if (formatOptions.supportsDynamicUpdate()) { mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition, childrenPos); } else { diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java deleted file mode 100644 index 07adda625..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; - -/** - * An implementation of DictUpdater for version 3 binary dictionary. - */ -@UsedForTesting -public class Ver3DictUpdater extends Ver3DictDecoder implements DictUpdater { - private OutputStream mOutStream; - - @UsedForTesting - public Ver3DictUpdater(final File dictFile, final int factoryType) { - // DictUpdater must have an updatable DictBuffer. - super(dictFile, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY) - ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER); - mOutStream = null; - } - - private void openStreamAndBuffer() throws FileNotFoundException, IOException { - super.openDictBuffer(); - mOutStream = new FileOutputStream(mDictionaryBinaryFile, true /* append */); - } - - private void close() throws IOException { - if (mOutStream != null) { - mOutStream.close(); - mOutStream = null; - } - } - - @Override @UsedForTesting - public void deleteWord(final String word) throws IOException, UnsupportedFormatException { - if (mOutStream == null) openStreamAndBuffer(); - mDictBuffer.position(0); - readHeader(); - final int wordPos = getTerminalPosition(word); - if (wordPos != FormatSpec.NOT_VALID_WORD) { - mDictBuffer.position(wordPos); - final int flags = mDictBuffer.readUnsignedByte(); - mDictBuffer.position(wordPos); - mDictBuffer.put((byte) DynamicBinaryDictIOUtils.markAsDeleted(flags)); - } - close(); - } - - @Override @UsedForTesting - public void insertWord(final String word, final int frequency, - final ArrayList<WeightedString> bigramStrings, - final ArrayList<WeightedString> shortcuts, - final boolean isNotAWord, final boolean isBlackListEntry) - throws IOException, UnsupportedFormatException { - if (mOutStream == null) openStreamAndBuffer(); - DynamicBinaryDictIOUtils.insertWord(this, mOutStream, word, frequency, bigramStrings, - shortcuts, isNotAWord, isBlackListEntry); - close(); - } -} diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 07522b54b..3be62f066 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -183,14 +183,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder { * An auxiliary class for reading bigrams. */ protected static class BigramContentReader extends SparseTableContentReader { - private final boolean mHasTimestamp; - public BigramContentReader(final String name, final File baseDir, final DictionaryBufferFactory factory, final boolean hasTimestamp) { super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory); - mHasTimestamp = hasTimestamp; } // TODO: Consolidate this method and BigramContentWriter.getContentFilenames. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 1a245b6db..8eaee4d9f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -1,5 +1,4 @@ /* -/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,25 +17,15 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.CollectionUtils; -import com.android.inputmethod.latin.utils.FileUtils; +import com.android.inputmethod.latin.utils.LocaleUtils; import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; /** * An implementation of DictEncoder for version 4 binary dictionary. @@ -44,197 +33,19 @@ import java.util.Iterator; @UsedForTesting public class Ver4DictEncoder implements DictEncoder { private final File mDictPlacedDir; - private byte[] mTrieBuf; - private int mTriePos; - private OutputStream mTrieOutStream; - private OutputStream mHeaderOutStream; - private OutputStream mFreqOutStream; - private OutputStream mUnigramTimestampOutStream; - private OutputStream mTerminalAddressTableOutStream; - private File mDictDir; - private String mBaseFilename; - private BigramContentWriter mBigramWriter; - private ShortcutContentWriter mShortcutWriter; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { mDictPlacedDir = dictPlacedDir; } - private static class BigramContentWriter extends SparseTableContentWriter { - private final boolean mWriteTimestamp; - - public BigramContentWriter(final String name, final int initialCapacity, - final File baseDir, final boolean writeTimestamp) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp)); - mWriteTimestamp = writeTimestamp; - } - - private static String[] getContentFilenames(final String name, - final boolean writeTimestamp) { - final String[] contentFilenames; - if (writeTimestamp) { - contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, - name + FormatSpec.BIGRAM_FILE_EXTENSION }; - } else { - contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; - } - return contentFilenames; - } - - private static String[] getContentIds(final boolean writeTimestamp) { - final String[] contentIds; - if (writeTimestamp) { - contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, - FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; - } else { - contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; - } - return contentIds; - } - - public void writeBigramsForOneWord(final int terminalId, final int bigramCount, - final Iterator<WeightedString> bigramIterator, final FusionDictionary dict) - throws IOException { - write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, - new SparseTableContentWriterInterface() { - @Override - public void write(final OutputStream outStream) throws IOException { - writeBigramsForOneWordInternal(outStream, bigramIterator, dict); - }}); - if (mWriteTimestamp) { - write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId, - new SparseTableContentWriterInterface() { - @Override - public void write(final OutputStream outStream) throws IOException { - initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream, - bigramCount); - }}); - } - } - - private void writeBigramsForOneWordInternal(final OutputStream outStream, - final Iterator<WeightedString> bigramIterator, final FusionDictionary dict) - throws IOException { - while (bigramIterator.hasNext()) { - final WeightedString bigram = bigramIterator.next(); - final PtNode target = - FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); - final int unigramFrequencyForThisWord = target.mFrequency; - final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags( - bigramIterator.hasNext(), 0, bigram.mFrequency, - unigramFrequencyForThisWord, bigram.mWord); - BinaryDictEncoderUtils.writeUIntToStream(outStream, bigramFlags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToStream(outStream, target.mTerminalId, - FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); - } - } - - private void initBigramTimestampsCountersAndLevelsForOneWordInternal( - final OutputStream outStream, final int bigramCount) throws IOException { - for (int i = 0; i < bigramCount; ++i) { - // TODO: Figure out what initial values should be. - BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, - FormatSpec.BIGRAM_TIMESTAMP_SIZE); - BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, - FormatSpec.BIGRAM_COUNTER_SIZE); - BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, - FormatSpec.BIGRAM_LEVEL_SIZE); - } - } - } - - private static class ShortcutContentWriter extends SparseTableContentWriter { - public ShortcutContentWriter(final String name, final int initialCapacity, - final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity, - FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, - new String[] { FormatSpec.SHORTCUT_CONTENT_ID }); - } - - public void writeShortcutForOneWord(final int terminalId, - final Iterator<WeightedString> shortcutIterator) throws IOException { - write(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, - new SparseTableContentWriterInterface() { - @Override - public void write(final OutputStream outStream) throws IOException { - writeShortcutForOneWordInternal(outStream, shortcutIterator); - } - }); - } - - private void writeShortcutForOneWordInternal(final OutputStream outStream, - final Iterator<WeightedString> shortcutIterator) throws IOException { - while (shortcutIterator.hasNext()) { - final WeightedString target = shortcutIterator.next(); - final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( - shortcutIterator.hasNext(), target.mFrequency); - BinaryDictEncoderUtils.writeUIntToStream(outStream, shortcutFlags, - FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); - CharEncoding.writeString(outStream, target.mWord); - } - } - } - - private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) - throws FileNotFoundException, IOException { - final FileHeader header = new FileHeader(0, dictOptions, formatOptions); - mBaseFilename = header.getId() + "." + header.getVersion(); - mDictDir = new File(mDictPlacedDir, mBaseFilename); - final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); - final File headerFile = new File(mDictDir, - mBaseFilename + FormatSpec.HEADER_FILE_EXTENSION); - final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); - final File timestampFile = new File(mDictDir, - mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION); - final File terminalAddressTableFile = new File(mDictDir, - mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); - if (!mDictDir.isDirectory()) { - if (mDictDir.exists()) { - FileUtils.deleteRecursively(mDictDir); - } - mDictDir.mkdirs(); - } - mTrieOutStream = new FileOutputStream(trieFile); - mHeaderOutStream = new FileOutputStream(headerFile); - mFreqOutStream = new FileOutputStream(freqFile); - mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); - if (formatOptions.mHasTimestamp) { - mUnigramTimestampOutStream = new FileOutputStream(timestampFile); - } - } - - private void close() throws IOException { - try { - if (mTrieOutStream != null) { - mTrieOutStream.close(); - } - if (mHeaderOutStream != null) { - mHeaderOutStream.close(); - } - if (mFreqOutStream != null) { - mFreqOutStream.close(); - } - if (mTerminalAddressTableOutStream != null) { - mTerminalAddressTableOutStream.close(); - } - if (mUnigramTimestampOutStream != null) { - mUnigramTimestampOutStream.close(); - } - } finally { - mTrieOutStream = null; - mHeaderOutStream = null; - mFreqOutStream = null; - mTerminalAddressTableOutStream = null; - } - } - + // TODO: This builds a FusionDictionary first and iterates it to add words to the binary + // dictionary. However, it is possible to just add words directly to the binary dictionary + // instead. + // In the long run, when we stop supporting version 2, FusionDictionary will become deprecated + // and we can remove it. Then we'll be able to just call BinaryDictionary directly. @Override - public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) + public void writeDictionary(FusionDictionary dict, FormatOptions formatOptions) throws IOException, UnsupportedFormatException { if (formatOptions.mVersion != FormatSpec.VERSION4) { throw new UnsupportedFormatException("File header has a wrong version number : " @@ -243,208 +54,71 @@ public class Ver4DictEncoder implements DictEncoder { if (!mDictPlacedDir.isDirectory()) { throw new UnsupportedFormatException("Given path is not a directory."); } - - if (mTrieOutStream == null) { - openStreams(formatOptions, dict.mOptions); - } - - BinaryDictEncoderUtils.writeDictionaryHeader(mHeaderOutStream, dict, formatOptions); - - MakedictLog.i("Flattening the tree..."); - ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); - int terminalCount = 0; - final ArrayList<PtNode> nodes = CollectionUtils.newArrayList(); - for (final PtNodeArray array : flatNodes) { - for (final PtNode node : array.mData) { - if (node.isTerminal()) { - nodes.add(node); - node.mTerminalId = terminalCount++; + if (!BinaryDictionary.createEmptyDictFile(mDictPlacedDir.getAbsolutePath(), + FormatSpec.VERSION4, dict.mOptions.mAttributes)) { + throw new IOException("Cannot create dictionary file : " + + mDictPlacedDir.getAbsolutePath()); + } + final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(), + 0l, mDictPlacedDir.length(), true /* useFullEditDistance */, + LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get( + FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE)), + Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */, + true /* isUpdatable */); + if (!binaryDict.isValidDictionary()) { + // Somehow createEmptyDictFile returned true, but the file was not created correctly + throw new IOException("Cannot create dictionary file"); + } + for (final Word word : dict) { + // TODO: switch to addMultipleDictionaryEntries when they support shortcuts + if (null == word.mShortcutTargets || word.mShortcutTargets.isEmpty()) { + binaryDict.addUnigramWord(word.mWord, word.mFrequency, + null /* shortcutTarget */, 0 /* shortcutProbability */, + word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */); + } else { + for (final WeightedString shortcutTarget : word.mShortcutTargets) { + binaryDict.addUnigramWord(word.mWord, word.mFrequency, + shortcutTarget.mWord, shortcutTarget.mFrequency, + word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */); } } - } - Collections.sort(nodes, new Comparator<PtNode>() { - @Override - public int compare(final PtNode lhs, final PtNode rhs) { - if (lhs.mFrequency != rhs.mFrequency) { - return lhs.mFrequency < rhs.mFrequency ? -1 : 1; - } - if (lhs.mTerminalId < rhs.mTerminalId) return -1; - if (lhs.mTerminalId > rhs.mTerminalId) return 1; - return 0; + if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDict.flushWithGC(); } - }); - int count = 0; - for (final PtNode node : nodes) { - node.mTerminalId = count++; - } - - MakedictLog.i("Computing addresses..."); - BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); - if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); - - writeTerminalData(flatNodes, terminalCount); - if (formatOptions.mHasTimestamp) { - initUnigramTimestamps(terminalCount); - } - mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, - formatOptions.mHasTimestamp); - writeBigrams(flatNodes, dict); - mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir); - writeShortcuts(flatNodes); - - final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); - final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; - mTrieBuf = new byte[bufferSize]; - - MakedictLog.i("Writing file..."); - for (PtNodeArray nodeArray : flatNodes) { - BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); } - if (MakedictLog.DBG) { - BinaryDictEncoderUtils.showStatistics(flatNodes); - MakedictLog.i("has " + terminalCount + " terminals."); + for (final Word word0 : dict) { + if (null == word0.mBigrams) continue; + for (final WeightedString word1 : word0.mBigrams) { + binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency, + 0 /* timestamp */); + } + if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { + binaryDict.flushWithGC(); + } } - mTrieOutStream.write(mTrieBuf); - - MakedictLog.i("Done"); - close(); + binaryDict.flushWithGC(); + binaryDict.close(); } @Override public void setPosition(int position) { - if (mTrieBuf == null || position < 0 || position > mTrieBuf.length) return; - mTriePos = position; } @Override public int getPosition() { - return mTriePos; + return 0; } @Override public void writePtNodeCount(int ptNodeCount) { - final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); - // ptNodeCount must fit on one byte or two bytes. - // Please see comments in FormatSpec - if (countSize != 1 && countSize != 2) { - throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize); - } - final int encodedPtNodeCount = (countSize == 2) ? - (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount; - mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, encodedPtNodeCount, - countSize); - } - - private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); - mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, - BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), - FormatSpec.PTNODE_FLAGS_SIZE); - } - - private void writeParentPosition(int parentPos, final PtNode ptNode, - final FormatOptions formatOptions) { - if (parentPos != FormatSpec.NO_PARENT_ADDRESS) { - parentPos -= ptNode.mCachedAddressAfterUpdate; - } - mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos, - formatOptions); - } - - private void writeCharacters(final int[] characters, final boolean hasSeveralChars) { - mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos); - if (hasSeveralChars) { - mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; - } - } - - private void writeTerminalId(final int terminalId) { - mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId, - FormatSpec.PTNODE_TERMINAL_ID_SIZE); - } - - private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) { - final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); - if (formatOptions.mSupportsDynamicUpdate) { - mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf, - mTriePos, childrenPos); - } else { - mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, - mTriePos, childrenPos); - } - } - - private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict) - throws IOException { - mBigramWriter.openStreams(); - for (final PtNodeArray nodeArray : flatNodes) { - for (final PtNode ptNode : nodeArray.mData) { - if (ptNode.mBigrams != null) { - mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(), - ptNode.mBigrams.iterator(), dict); - } - } - } - mBigramWriter.closeStreams(); - } - - private void writeShortcuts(final ArrayList<PtNodeArray> flatNodes) throws IOException { - mShortcutWriter.openStreams(); - for (final PtNodeArray nodeArray : flatNodes) { - for (final PtNode ptNode : nodeArray.mData) { - if (ptNode.mShortcutTargets != null && !ptNode.mShortcutTargets.isEmpty()) { - mShortcutWriter.writeShortcutForOneWord(ptNode.mTerminalId, - ptNode.mShortcutTargets.iterator()); - } - } - } - mShortcutWriter.closeStreams(); } @Override public void writeForwardLinkAddress(int forwardLinkAddress) { - mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, - forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE); } @Override - public void writePtNode(final PtNode ptNode, final int parentPosition, - final FormatOptions formatOptions, final FusionDictionary dict) { - writePtNodeFlags(ptNode, formatOptions); - writeParentPosition(parentPosition, ptNode, formatOptions); - writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); - if (ptNode.isTerminal()) { - writeTerminalId(ptNode.mTerminalId); - } - writeChildrenPosition(ptNode, formatOptions); - } - - private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes, - final int terminalCount) throws IOException { - final byte[] freqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; - final byte[] terminalAddressTableBuf = - new byte[terminalCount * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE]; - for (final PtNodeArray nodeArray : flatNodes) { - for (final PtNode ptNode : nodeArray.mData) { - if (ptNode.isTerminal()) { - BinaryDictEncoderUtils.writeUIntToBuffer(freqBuf, - ptNode.mTerminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE, - ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf, - ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, - ptNode.mCachedAddressAfterUpdate, - FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - } - } - } - mFreqOutStream.write(freqBuf); - mTerminalAddressTableOutStream.write(terminalAddressTableBuf); - } - - private void initUnigramTimestamps(final int terminalCount) throws IOException { - // Initial value of time stamps for each word is 0. - final byte[] unigramTimestampBuf = - new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE]; - mUnigramTimestampOutStream.write(unigramTimestampBuf); + public void writePtNode( + PtNode ptNode, int parentPosition, FormatOptions formatOptions, FusionDictionary dict) { } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java index 91d9cf345..6298295c6 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java @@ -40,6 +40,7 @@ import java.util.Iterator; @UsedForTesting public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { private static final String TAG = Ver4DictUpdater.class.getSimpleName(); + private static final int MAX_JUMPS = 10000; private OutputStream mDictStream; private final File mFrequencyFile; @@ -54,8 +55,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { } private static class BigramContentUpdater extends SparseTableContentUpdater { - private final boolean mHasTimestamp; - public BigramContentUpdater(final String name, final File baseDir, final boolean hasTimestamp) { super(name + FormatSpec.BIGRAM_FILE_EXTENSION, @@ -63,7 +62,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { BigramContentReader.getContentFilenames(name, hasTimestamp), BigramContentReader.getContentIds(hasTimestamp), new DictionaryBufferFromWritableByteBufferFactory()); - mHasTimestamp = hasTimestamp; } public void insertBigramEntries(final int terminalId, final int frequency, @@ -149,7 +147,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { mDictBuffer.position(wordPos); final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); mDictBuffer.position(wordPos); - mDictBuffer.put((byte) DynamicBinaryDictIOUtils.markAsDeleted(flags)); + mDictBuffer.put((byte)markAsDeleted(flags)); } } @@ -186,7 +184,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { skipPtNode(formatOptions); } if (!readAndFollowForwardLink()) break; - } while (jumpCount++ < DynamicBinaryDictIOUtils.MAX_JUMPS); + } while (jumpCount++ < MAX_JUMPS); setPosition(originalPos); } @@ -219,7 +217,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { final int originalPos = getPosition(); setPosition(nodeArrayPos); int jumpCount = 0; - while (jumpCount++ < DynamicBinaryDictIOUtils.MAX_JUMPS) { + while (jumpCount++ < MAX_JUMPS) { final int ptNodeCount = readPtNodeCount(); for (int i = 0; i < ptNodeCount; ++i) { skipPtNode(formatOptions); @@ -738,8 +736,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { mDictBuffer.put((byte) newFlags); updateFrequency(terminalId, frequency); - insertBigrams(terminalId, frequency, - DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); + insertBigrams(terminalId, frequency, resolveBigramPositions(this, bigramStrings)); insertShortcuts(terminalId, shortcuts); } @@ -768,8 +765,30 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater { insertTerminalPosition(posOfTerminal); close(); - insertBigrams(newTerminalId, frequency, - DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings)); + insertBigrams(newTerminalId, frequency, resolveBigramPositions(this, bigramStrings)); insertShortcuts(newTerminalId, shortcuts); } + + /** + * Converts a list of WeightedString to a list of PendingAttribute. + */ + private static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater, + final ArrayList<WeightedString> bigramStrings) + throws IOException, UnsupportedFormatException { + if (bigramStrings == null) return CollectionUtils.newArrayList(); + final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); + for (final WeightedString bigram : bigramStrings) { + final int pos = dictUpdater.getTerminalPosition(bigram.mWord); + if (pos == FormatSpec.NOT_VALID_WORD) { + // TODO: figure out what is the correct thing to do here. + } else { + bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); + } + } + return bigrams; + } + + private static int markAsDeleted(final int flags) { + return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED; + } } diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index 296733fad..9b573b4b8 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -95,8 +95,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB @Override protected Map<String, String> getHeaderAttributeMap() { HashMap<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE, diff --git a/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java b/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java index b51fd9377..be0955456 100644 --- a/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java @@ -40,12 +40,17 @@ public final class SpannableStringUtils { * are out of range in <code>dest</code>. */ public static void copyNonParagraphSuggestionSpansFrom(Spanned source, int start, int end, - Spannable dest, int destoff) { + Spannable dest, int destoff) { Object[] spans = source.getSpans(start, end, SuggestionSpan.class); for (int i = 0; i < spans.length; i++) { int fl = source.getSpanFlags(spans[i]); - if (0 != (fl & Spannable.SPAN_PARAGRAPH)) continue; + // We don't care about the PARAGRAPH flag in LatinIME code. However, if this flag + // is set, Spannable#setSpan will throw an exception unless the span is on the edge + // of a word. But the spans have been split into two by the getText{Before,After}Cursor + // methods, so after concatenation they may end in the middle of a word. + // Since we don't use them, we can just remove them and avoid crashing. + fl &= ~Spannable.SPAN_PARAGRAPH; int st = source.getSpanStart(spans[i]); int en = source.getSpanEnd(spans[i]); diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index a2c3ed44d..db628fe18 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -95,8 +95,7 @@ public final class UserHistoryDictIOUtils { static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) { final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(options, false, - false)); + new FusionDictionary.DictionaryOptions(options)); int profTotal = 0; for (final String word1 : bigrams.keySet()) { final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryForgettingCurveUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryForgettingCurveUtils.java deleted file mode 100644 index 677035ed6..000000000 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryForgettingCurveUtils.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.util.Log; - -import java.util.concurrent.TimeUnit; - -import com.android.inputmethod.annotations.UsedForTesting; - -@UsedForTesting -public final class UserHistoryForgettingCurveUtils { - private static final String TAG = UserHistoryForgettingCurveUtils.class.getSimpleName(); - private static final boolean DEBUG = false; - private static final int DEFAULT_FC_FREQ = 127; - private static final int BOOSTED_FC_FREQ = 200; - private static int FC_FREQ_MAX = DEFAULT_FC_FREQ; - /* package */ static final int COUNT_MAX = 3; - private static final int FC_LEVEL_MAX = 3; - /* package */ static final int ELAPSED_TIME_MAX = 15; - private static final int ELAPSED_TIME_INTERVAL_HOURS = 6; - private static final long ELAPSED_TIME_INTERVAL_MILLIS = - TimeUnit.HOURS.toMillis(ELAPSED_TIME_INTERVAL_HOURS); - private static final int HALF_LIFE_HOURS = 48; - private static final int MAX_PUSH_ELAPSED = (FC_LEVEL_MAX + 1) * (ELAPSED_TIME_MAX + 1); - - public static void boostMaxFreqForDebug() { - FC_FREQ_MAX = BOOSTED_FC_FREQ; - } - - public static void resetMaxFreqForDebug() { - FC_FREQ_MAX = DEFAULT_FC_FREQ; - } - - private UserHistoryForgettingCurveUtils() { - // This utility class is not publicly instantiable. - } - - public static final class ForgettingCurveParams { - private byte mFc; - long mLastTouchedTime = 0; - private final boolean mIsValid; - - private void updateLastTouchedTime() { - mLastTouchedTime = System.currentTimeMillis(); - } - - public ForgettingCurveParams(boolean isValid) { - this(System.currentTimeMillis(), isValid); - } - - private ForgettingCurveParams(long now, boolean isValid) { - this(pushCount((byte)0, isValid), now, now, isValid); - } - - /** This constructor is called when the user history bigram dictionary is being restored. */ - public ForgettingCurveParams(int fc, long now, long last) { - // All words with level >= 1 had been saved. - // Invalid words with level == 0 had been saved. - // Valid words words with level == 0 had *not* been saved. - this(fc, now, last, fcToLevel((byte)fc) > 0); - } - - private ForgettingCurveParams(int fc, long now, long last, boolean isValid) { - mIsValid = isValid; - mFc = (byte)fc; - mLastTouchedTime = last; - updateElapsedTime(now); - } - - public boolean isValid() { - return mIsValid; - } - - public byte getFc() { - updateElapsedTime(System.currentTimeMillis()); - return mFc; - } - - public int getFrequency() { - updateElapsedTime(System.currentTimeMillis()); - return UserHistoryForgettingCurveUtils.fcToFreq(mFc); - } - - public int notifyTypedAgainAndGetFrequency() { - updateLastTouchedTime(); - // TODO: Check whether this word is valid or not - mFc = pushCount(mFc, false); - return UserHistoryForgettingCurveUtils.fcToFreq(mFc); - } - - private void updateElapsedTime(long now) { - final int elapsedTimeCount = - (int)((now - mLastTouchedTime) / ELAPSED_TIME_INTERVAL_MILLIS); - if (elapsedTimeCount <= 0) { - return; - } - if (elapsedTimeCount >= MAX_PUSH_ELAPSED) { - mLastTouchedTime = now; - mFc = 0; - return; - } - for (int i = 0; i < elapsedTimeCount; ++i) { - mLastTouchedTime += ELAPSED_TIME_INTERVAL_MILLIS; - mFc = pushElapsedTime(mFc); - } - } - } - - @UsedForTesting - /* package */ static int fcToElapsedTime(byte fc) { - return fc & 0x0F; - } - - @UsedForTesting - /* package */ static int fcToCount(byte fc) { - return (fc >> 4) & 0x03; - } - - @UsedForTesting - /* package */ static int fcToLevel(byte fc) { - return (fc >> 6) & 0x03; - } - - @UsedForTesting - private static int calcFreq(int elapsedTime, int count, int level) { - if (level <= 0) { - // Reserved words, just return -1 - return -1; - } - if (count == COUNT_MAX) { - // Temporary promote because it's frequently typed recently - ++level; - } - final int et = Math.min(FC_FREQ_MAX, Math.max(0, elapsedTime)); - final int l = Math.min(FC_LEVEL_MAX, Math.max(0, level)); - return MathUtils.SCORE_TABLE[l - 1][et]; - } - - /* pakcage */ static byte calcFc(int elapsedTime, int count, int level) { - final int et = Math.min(FC_FREQ_MAX, Math.max(0, elapsedTime)); - final int c = Math.min(COUNT_MAX, Math.max(0, count)); - final int l = Math.min(FC_LEVEL_MAX, Math.max(0, level)); - return (byte)(et | (c << 4) | (l << 6)); - } - - public static int fcToFreq(byte fc) { - final int elapsedTime = fcToElapsedTime(fc); - final int count = fcToCount(fc); - final int level = fcToLevel(fc); - return calcFreq(elapsedTime, count, level); - } - - @UsedForTesting - public static byte pushElapsedTime(byte fc) { - int elapsedTime = fcToElapsedTime(fc); - int count = fcToCount(fc); - int level = fcToLevel(fc); - if (elapsedTime >= ELAPSED_TIME_MAX) { - // Downgrade level - elapsedTime = 0; - count = COUNT_MAX; - --level; - } else { - ++elapsedTime; - } - return calcFc(elapsedTime, count, level); - } - - @UsedForTesting - public static byte pushCount(byte fc, boolean isValid) { - final int elapsedTime = fcToElapsedTime(fc); - int count = fcToCount(fc); - int level = fcToLevel(fc); - if ((elapsedTime == 0 && count >= COUNT_MAX) || (isValid && level == 0)) { - // Upgrade level - ++level; - count = 0; - if (DEBUG) { - Log.d(TAG, "Upgrade level."); - } - } else { - ++count; - } - return calcFc(0, count, level); - } - - // TODO: isValid should be false for a word whose frequency is 0, - // or that is not in the dictionary. - /** - * Check wheather we should save the bigram to the SQL DB or not - */ - public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) { - int level = fcToLevel(fc); - if (level == 0) { - if (isValid || !addLevel0Bigram) { - return false; - } - } - final int elapsedTime = fcToElapsedTime(fc); - return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0); - } - - private static final class MathUtils { - public static final int[][] SCORE_TABLE = new int[FC_LEVEL_MAX][ELAPSED_TIME_MAX + 1]; - static { - for (int i = 0; i < FC_LEVEL_MAX; ++i) { - final float initialFreq; - if (i >= 2) { - initialFreq = FC_FREQ_MAX; - } else if (i == 1) { - initialFreq = FC_FREQ_MAX / 2; - } else if (i == 0) { - initialFreq = FC_FREQ_MAX / 4; - } else { - continue; - } - for (int j = 0; j < ELAPSED_TIME_MAX; ++j) { - final float elapsedHours = j * ELAPSED_TIME_INTERVAL_HOURS; - final float freq = initialFreq - * (float)Math.pow(initialFreq, elapsedHours / HALF_LIFE_HOURS); - final int intFreq = Math.min(FC_FREQ_MAX, Math.max(0, (int)freq)); - SCORE_TABLE[i][j] = intFreq; - } - } - } - } -} |