diff options
43 files changed, 669 insertions, 788 deletions
diff --git a/java/res/values/strings.xml b/java/res/values/strings.xml index ef2444826..70f4c1829 100644 --- a/java/res/values/strings.xml +++ b/java/res/values/strings.xml @@ -498,6 +498,14 @@ mobile devices. [CHAR LIMIT=25] --> <string name="read_external_dictionary_confirm_install_message">Really install this file for <xliff:g id="locale_name">%s</xliff:g>?</string> <!-- Title for an error dialog that contains the details of the error in the body [CHAR LIMIT=80] --> <string name="error">There was an error</string> + <!-- Title of the settings for dumpping contacts dictionary file [CHAR LIMIT=35] --> + <string name="prefs_dump_contacts_dict">Dump contacts dictionary</string> + <!-- Title of the settings for dumpping personal dictionary file [CHAR LIMIT=35] --> + <string name="prefs_dump_user_dict">Dump personal dictionary</string> + <!-- Title of the settings for dumpping user history dictionary file [CHAR LIMIT=35] --> + <string name="prefs_dump_user_history_dict">Dump user history dictionary</string> + <!-- Title of the settings for dumpping personalization dictionary file [CHAR LIMIT=35] --> + <string name="prefs_dump_personalization_dict">Dump personalization dictionary</string> <!-- Title of the button to revert to the default value of the device in the settings dialog [CHAR LIMIT=15] --> <string name="button_default">Default</string> diff --git a/java/res/xml/prefs_for_debug.xml b/java/res/xml/prefs_for_debug.xml index 8d9508e38..899e2b88e 100644 --- a/java/res/xml/prefs_for_debug.xml +++ b/java/res/xml/prefs_for_debug.xml @@ -61,4 +61,21 @@ <PreferenceScreen android:key="read_external_dictionary" android:title="@string/prefs_read_external_dictionary" /> + + <PreferenceScreen + android:key="dump_contacts_dict" + android:title="@string/prefs_dump_contacts_dict" /> + + <PreferenceScreen + android:key="dump_user_dict" + android:title="@string/prefs_dump_user_dict" /> + + <PreferenceScreen + android:key="dump_user_history_dict" + android:title="@string/prefs_dump_user_history_dict" /> + + <PreferenceScreen + android:key="dump_personalization_dict" + android:title="@string/prefs_dump_personalization_dict" /> + </PreferenceScreen> diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 00eb57c9f..80a27e23f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -22,7 +22,10 @@ import android.util.SparseArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.makedict.Word; +import com.android.inputmethod.latin.makedict.DictionaryHeader; +import com.android.inputmethod.latin.makedict.FormatSpec; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; +import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.settings.NativeSuggestOptions; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.JniUtils; @@ -33,6 +36,7 @@ import com.android.inputmethod.latin.utils.WordProperty; import java.io.File; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -138,6 +142,9 @@ public final class BinaryDictionary extends Dictionary { String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); + private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, + int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, + ArrayList<int[]> outAttributeValues); private static native void flushNative(long dict, String filePath); private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); private static native void flushWithGCNative(long dict, String filePath); @@ -171,7 +178,6 @@ public final class BinaryDictionary extends Dictionary { private static native int setCurrentTimeForTestNative(int currentTime); private static native String getPropertyNative(long dict, String query); - @UsedForTesting public static boolean createEmptyDictFile(final String filePath, final long dictVersion, final Map<String, String> attributeMap) { final String[] keyArray = new String[attributeMap.size()]; @@ -191,6 +197,33 @@ public final class BinaryDictionary extends Dictionary { mNativeDict = openNative(path, startOffset, length, isUpdatable); } + @UsedForTesting + public DictionaryHeader getHeader() throws UnsupportedFormatException { + if (mNativeDict == 0) { + return null; + } + final int[] outHeaderSize = new int[1]; + final int[] outFormatVersion = new int[1]; + final ArrayList<int[]> outAttributeKeys = CollectionUtils.newArrayList(); + final ArrayList<int[]> outAttributeValues = CollectionUtils.newArrayList(); + getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, + outAttributeValues); + final HashMap<String, String> attributes = new HashMap<String, String>(); + for (int i = 0; i < outAttributeKeys.size(); i++) { + final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeKeys.get(i)); + final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( + outAttributeValues.get(i)); + attributes.put(attributeKey, attributeValue); + } + final boolean hasHistoricalInfo = + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY).equals( + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); + return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), + new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); + } + + @Override public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo, @@ -308,7 +341,6 @@ public final class BinaryDictionary extends Dictionary { return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } - @UsedForTesting public WordProperty getWordProperty(final String word) { if (TextUtils.isEmpty(word)) { return null; @@ -348,16 +380,10 @@ public final class BinaryDictionary extends Dictionary { * Method to iterate all words in the dictionary for makedict. * If token is 0, this method newly starts iterating the dictionary. */ - @UsedForTesting public GetNextWordPropertyResult getNextWordProperty(final int token) { final int[] codePoints = new int[MAX_WORD_LENGTH]; final int nextToken = getNextWordNative(mNativeDict, token, codePoints); - int len = 0; - // codePoints is null-terminated if its length is shorter than the array length. - while (len < MAX_WORD_LENGTH && codePoints[len] != 0) { - ++len; - } - final String word = new String(mOutputCodePoints, 0, len); + final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); return new GetNextWordPropertyResult(getWordProperty(word), nextToken); } diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java index 7e97802e1..a7008379f 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java @@ -22,8 +22,8 @@ import android.content.res.AssetFileDescriptor; import android.util.Log; import com.android.inputmethod.latin.makedict.DictDecoder; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils; @@ -230,7 +230,7 @@ final public class BinaryDictionaryGetter { try { // Read the version of the file final DictDecoder dictDecoder = FormatSpec.getDictDecoder(f); - final FileHeader header = dictDecoder.readHeader(); + final DictionaryHeader header = dictDecoder.readHeader(); final String version = header.mDictionaryOptions.mAttributes.get(VERSION_KEY); if (null == version) { diff --git a/java/src/com/android/inputmethod/latin/DictionaryDumpBroadcastReceiver.java b/java/src/com/android/inputmethod/latin/DictionaryDumpBroadcastReceiver.java new file mode 100644 index 000000000..ee2fdc6c7 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/DictionaryDumpBroadcastReceiver.java @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.content.BroadcastReceiver; +import android.content.Context; +import android.content.Intent; +import android.util.Log; + +public class DictionaryDumpBroadcastReceiver extends BroadcastReceiver { + private static final String TAG = DictionaryDumpBroadcastReceiver.class.getSimpleName(); + + private static final String DOMAIN = "com.android.inputmethod.latin"; + public static final String DICTIONARY_DUMP_INTENT_ACTION = DOMAIN + ".DICT_DUMP"; + public static final String DICTIONARY_NAME_KEY = "dictName"; + + final LatinIME mLatinIme; + + public DictionaryDumpBroadcastReceiver(final LatinIME latinIme) { + mLatinIme = latinIme; + } + + @Override + public void onReceive(Context context, Intent intent) { + final String action = intent.getAction(); + if (action.equals(DICTIONARY_DUMP_INTENT_ACTION)) { + final String dictName = intent.getStringExtra(DICTIONARY_NAME_KEY); + if (dictName == null) { + Log.e(TAG, "Received dictionary dump intent action " + + "but the dictionary name is not set."); + return; + } + mLatinIme.dumpDictionaryForDebug(dictName); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java index 8b02984e0..e68c6b771 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitatorForSuggest.java @@ -534,4 +534,25 @@ public class DictionaryFacilitatorForSuggest { mPersonalizationDictionary.addMultipleDictionaryEntriesToDictionary(languageModelParams, callback); } + + public void dumpDictionaryForDebug(final String dictName) { + final ExpandableBinaryDictionary dictToDump; + if (dictName.equals(Dictionary.TYPE_CONTACTS)) { + dictToDump = mContactsDictionary; + } else if (dictName.equals(Dictionary.TYPE_USER)) { + dictToDump = mUserDictionary; + } else if (dictName.equals(Dictionary.TYPE_USER_HISTORY)) { + dictToDump = mUserHistoryDictionary; + } else if (dictName.equals(Dictionary.TYPE_PERSONALIZATION)) { + dictToDump = mPersonalizationDictionary; + } else { + dictToDump = null; + } + if (dictToDump == null) { + Log.e(TAG, "Cannot dump " + dictName + ". " + + "The dictionary is not being used for suggestion or cannot be dumped."); + return; + } + dictToDump.dumpAllWordsForDebug(); + } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 4dee84a7b..f0dc7720d 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -21,6 +21,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.utils.AsyncResultHolder; @@ -28,6 +29,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.FileUtils; import com.android.inputmethod.latin.utils.LanguageModelParam; import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor; +import com.android.inputmethod.latin.utils.WordProperty; import java.io.File; import java.util.ArrayList; @@ -267,9 +269,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { protected Map<String, String> getHeaderAttributeMap() { HashMap<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_KEY, mDictName); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_KEY, + attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, mDictName); + attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); + attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); return attributeMap; } @@ -778,16 +780,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } @UsedForTesting - protected void runAfterGcForDebug(final Runnable r) { - getExecutor(mDictName).executePrioritized(new Runnable() { + public void dumpAllWordsForDebug() { + reloadDictionaryIfRequired(); + getExecutor(mDictName).execute(new Runnable() { @Override public void run() { - try { - mBinaryDictionary.flushWithGC(); - r.run(); - } finally { - mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false); - } + Log.d(TAG, "dictionary=" + mDictName); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + mBinaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + if (wordProperty == null) { + Log.d(TAG, " dictionary is empty."); + break; + } + Log.d(TAG, wordProperty.toString()); + token = result.mNextToken; + } while (token != 0); } }); } diff --git a/java/src/com/android/inputmethod/latin/InputPointers.java b/java/src/com/android/inputmethod/latin/InputPointers.java index c3bcf3785..47bc6b078 100644 --- a/java/src/com/android/inputmethod/latin/InputPointers.java +++ b/java/src/com/android/inputmethod/latin/InputPointers.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin; import android.util.Log; +import android.util.SparseIntArray; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.utils.ResizableIntArray; @@ -160,15 +161,21 @@ public final class InputPointers { private boolean isValidTimeStamps() { final int[] times = mTimes.getPrimitiveArray(); + final int[] pointerIds = mPointerIds.getPrimitiveArray(); + final SparseIntArray lastTimeOfPointers = new SparseIntArray(); final int size = getPointerSize(); - for (int i = 1; i < size; ++i) { - if (times[i] < times[i - 1]) { + for (int i = 0; i < size; ++i) { + final int pointerId = pointerIds[i]; + final int time = times[i]; + final int lastTime = lastTimeOfPointers.get(pointerId, time); + if (time < lastTime) { // dump for (int j = 0; j < size; ++j) { Log.d(TAG, "--- (" + j + ") " + times[j]); } return false; } + lastTimeOfPointers.put(pointerId, time); } return true; } diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 6517ef29d..2e163c4be 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -133,6 +133,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private BroadcastReceiver mDictionaryPackInstallReceiver = new DictionaryPackInstallBroadcastReceiver(this); + private BroadcastReceiver mDictionaryDumpBroadcastReceiver = + new DictionaryDumpBroadcastReceiver(this); + private AlertDialog mOptionsDialog; private final boolean mIsHardwareAcceleratedDrawingEnabled; @@ -487,6 +490,10 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen newDictFilter.addAction(DictionaryPackConstants.NEW_DICTIONARY_INTENT_ACTION); registerReceiver(mDictionaryPackInstallReceiver, newDictFilter); + final IntentFilter dictDumpFilter = new IntentFilter(); + dictDumpFilter.addAction(DictionaryDumpBroadcastReceiver.DICTIONARY_DUMP_INTENT_ACTION); + registerReceiver(mDictionaryDumpBroadcastReceiver, dictDumpFilter); + DictionaryDecayBroadcastReciever.setUpIntervalAlarmForDictionaryDecaying(this); } @@ -1758,6 +1765,13 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen resetSuggest(new Suggest(locale, dictionaryFacilitator)); } + public void dumpDictionaryForDebug(final String dictName) { + if (mInputLogic.mSuggest == null) { + initSuggest(); + } + mInputLogic.mSuggest.mDictionaryFacilitator.dumpDictionaryForDebug(dictName); + } + public void debugDumpStateAndCrashWithException(final String context) { final SettingsValues settingsValues = mSettings.getCurrent(); final StringBuilder s = new StringBuilder(settingsValues.toString()); diff --git a/java/src/com/android/inputmethod/latin/debug/ExternalDictionaryGetterForDebug.java b/java/src/com/android/inputmethod/latin/debug/ExternalDictionaryGetterForDebug.java index 028f78a87..800f56597 100644 --- a/java/src/com/android/inputmethod/latin/debug/ExternalDictionaryGetterForDebug.java +++ b/java/src/com/android/inputmethod/latin/debug/ExternalDictionaryGetterForDebug.java @@ -26,7 +26,7 @@ import android.os.Environment; import com.android.inputmethod.latin.BinaryDictionaryFileDumper; import com.android.inputmethod.latin.BinaryDictionaryGetter; import com.android.inputmethod.latin.R; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils; import com.android.inputmethod.latin.utils.LocaleUtils; @@ -51,7 +51,7 @@ public class ExternalDictionaryGetterForDebug { final File[] files = new File(SOURCE_FOLDER).listFiles(); final ArrayList<String> eligibleList = CollectionUtils.newArrayList(); for (File f : files) { - final FileHeader header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(f); + final DictionaryHeader header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(f); if (null == header) continue; eligibleList.add(f.getName()); } @@ -99,7 +99,7 @@ public class ExternalDictionaryGetterForDebug { public static void askInstallFile(final Context context, final String dirPath, final String fileName, final Runnable completeRunnable) { final File file = new File(dirPath, fileName.toString()); - final FileHeader header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(file); + final DictionaryHeader header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(file); final StringBuilder message = new StringBuilder(); final String locale = header.getLocaleString(); for (String key : header.mDictionaryOptions.mAttributes.keySet()) { @@ -143,7 +143,7 @@ public class ExternalDictionaryGetterForDebug { } private static void installFile(final Context context, final File file, - final FileHeader header) { + final DictionaryHeader header) { BufferedOutputStream outputStream = null; File tempFile = null; try { diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index 1a9118147..5c7c4b8e3 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -19,7 +19,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -36,7 +35,7 @@ public abstract class AbstractDictDecoder implements DictDecoder { private static final int ERROR_CANNOT_READ = 1; private static final int ERROR_WRONG_FORMAT = 2; - protected FileHeader readHeader(final DictBuffer headerBuffer) + protected DictionaryHeader readHeader(final DictBuffer headerBuffer) throws IOException, UnsupportedFormatException { if (headerBuffer == null) { openDictBuffer(); @@ -57,10 +56,10 @@ public abstract class AbstractDictDecoder implements DictDecoder { final HashMap<String, String> attributes = HeaderReader.readAttributes(headerBuffer, headerSize); - final FileHeader header = new FileHeader(headerSize, + final DictionaryHeader header = new DictionaryHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes), - new FormatOptions(version, FileHeader.ATTRIBUTE_VALUE_TRUE.equals( - attributes.get(FileHeader.HAS_HISTORICAL_INFO_KEY)))); + new FormatOptions(version, DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( + attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 31747155e..369184573 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -17,7 +17,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -598,7 +597,7 @@ public final class BinaryDictDecoderUtils { /* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder, final FusionDictionary dict) throws IOException, UnsupportedFormatException { // Read header - final FileHeader fileHeader = dictDecoder.readHeader(); + final DictionaryHeader fileHeader = dictDecoder.readHeader(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, PtNode> reversePtNodeMapping = new TreeMap<Integer, PtNode>(); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 0dc50d14e..dea9f2e28 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -20,7 +20,6 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; @@ -151,7 +150,7 @@ public final class BinaryDictIOUtils { final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header - final FileHeader header = dictDecoder.readHeader(); + final DictionaryHeader header = dictDecoder.readHeader(); readUnigramsAndBigramsBinaryInner(dictDecoder, header.mBodyOffset, words, frequencies, bigrams, header.mFormatOptions); } @@ -172,7 +171,7 @@ public final class BinaryDictIOUtils { if (word == null) return FormatSpec.NOT_VALID_WORD; dictDecoder.setPosition(0); - final FileHeader header = dictDecoder.readHeader(); + final DictionaryHeader header = dictDecoder.readHeader(); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { @@ -311,7 +310,7 @@ public final class BinaryDictIOUtils { * @param length The length of the data file. * @return the header of the specified dictionary file. */ - private static FileHeader getDictionaryFileHeader( + private static DictionaryHeader getDictionaryFileHeader( final File file, final long offset, final long length) throws FileNotFoundException, IOException, UnsupportedFormatException { final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; @@ -337,10 +336,10 @@ public final class BinaryDictIOUtils { return dictDecoder.readHeader(); } - public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, + public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file, final long offset, final long length) { try { - final FileHeader header = getDictionaryFileHeader(file, offset, length); + final DictionaryHeader header = getDictionaryFileHeader(file, offset, length); return header; } catch (UnsupportedFormatException e) { return null; diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index b4838f00f..bba1d434f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -18,7 +18,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; @@ -41,7 +40,7 @@ public interface DictDecoder { /** * Reads and returns the file header. */ - public FileHeader readHeader() throws IOException, UnsupportedFormatException; + public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException; /** * Reads PtNode from ptNodePos. diff --git a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java new file mode 100644 index 000000000..b99e281da --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; + +/** + * Class representing dictionary header. + */ +public final class DictionaryHeader { + public final int mBodyOffset; + public final DictionaryOptions mDictionaryOptions; + public final FormatOptions mFormatOptions; + + // Note that these are corresponding definitions in native code in latinime::HeaderPolicy + // and latinime::HeaderReadWriteUtils. + // TODO: Standardize the key names and bump up the format version, taking care not to + // break format version 2 dictionaries. + public static final String DICTIONARY_VERSION_KEY = "version"; + public static final String DICTIONARY_LOCALE_KEY = "locale"; + public static final String DICTIONARY_ID_KEY = "dictionary"; + public static final String DICTIONARY_DESCRIPTION_KEY = "description"; + public static final String DICTIONARY_DATE_KEY = "date"; + public static final String HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO"; + public static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; + public static final String ATTRIBUTE_VALUE_TRUE = "1"; + + public DictionaryHeader(final int headerSize, final DictionaryOptions dictionaryOptions, + final FormatOptions formatOptions) throws UnsupportedFormatException { + mDictionaryOptions = dictionaryOptions; + mFormatOptions = formatOptions; + mBodyOffset = formatOptions.mVersion < FormatSpec.VERSION4 ? headerSize : 0; + if (null == getLocaleString()) { + throw new UnsupportedFormatException("Cannot create a FileHeader without a locale"); + } + if (null == getVersion()) { + throw new UnsupportedFormatException( + "Cannot create a FileHeader without a version"); + } + if (null == getId()) { + throw new UnsupportedFormatException("Cannot create a FileHeader without an ID"); + } + } + + // Helper method to get the locale as a String + public String getLocaleString() { + return mDictionaryOptions.mAttributes.get(DICTIONARY_LOCALE_KEY); + } + + // Helper method to get the version String + public String getVersion() { + return mDictionaryOptions.mAttributes.get(DICTIONARY_VERSION_KEY); + } + + // Helper method to get the dictionary ID as a String + public String getId() { + return mDictionaryOptions.mAttributes.get(DICTIONARY_ID_KEY); + } + + // Helper method to get the description + public String getDescription() { + // TODO: Right now each dictionary file comes with a description in its own language. + // It will display as is no matter the device's locale. It should be internationalized. + return mDictionaryOptions.mAttributes.get(DICTIONARY_DESCRIPTION_KEY); + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 74e305976..5a3807389 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -19,7 +19,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import java.io.File; @@ -330,66 +329,6 @@ public final class FormatSpec { } /** - * Class representing file header. - */ - public static final class FileHeader { - public final int mBodyOffset; - public final DictionaryOptions mDictionaryOptions; - public final FormatOptions mFormatOptions; - - // Note that these are corresponding definitions in native code in latinime::HeaderPolicy - // and latinime::HeaderReadWriteUtils. - // TODO: Standardize the key names and bump up the format version, taking care not to - // break format version 2 dictionaries. - public static final String DICTIONARY_VERSION_KEY = "version"; - public static final String DICTIONARY_LOCALE_KEY = "locale"; - public static final String DICTIONARY_ID_KEY = "dictionary"; - public static final String DICTIONARY_DESCRIPTION_KEY = "description"; - public static final String DICTIONARY_DATE_KEY = "date"; - public static final String HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO"; - public static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; - public static final String ATTRIBUTE_VALUE_TRUE = "1"; - public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions, - final FormatOptions formatOptions) throws UnsupportedFormatException { - mDictionaryOptions = dictionaryOptions; - mFormatOptions = formatOptions; - mBodyOffset = formatOptions.mVersion < VERSION4 ? headerSize : 0; - if (null == getLocaleString()) { - throw new UnsupportedFormatException("Cannot create a FileHeader without a locale"); - } - if (null == getVersion()) { - throw new UnsupportedFormatException( - "Cannot create a FileHeader without a version"); - } - if (null == getId()) { - throw new UnsupportedFormatException("Cannot create a FileHeader without an ID"); - } - } - - // Helper method to get the locale as a String - public String getLocaleString() { - return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_LOCALE_KEY); - } - - // Helper method to get the version String - public String getVersion() { - return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_VERSION_KEY); - } - - // Helper method to get the dictionary ID as a String - public String getId() { - return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_ID_KEY); - } - - // Helper method to get the description - public String getDescription() { - // TODO: Right now each dictionary file comes with a description in its own language. - // It will display as is no matter the device's locale. It should be internationalized. - return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_DESCRIPTION_KEY); - } - } - - /** * Returns new dictionary decoder. * * @param dictFile the dictionary file. diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index ea0a2c6c2..d35f780cf 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -19,7 +19,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -90,11 +89,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder { } @Override - public FileHeader readHeader() throws IOException, UnsupportedFormatException { + public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { if (mDictBuffer == null) { openDictBuffer(); } - final FileHeader header = super.readHeader(mDictBuffer); + final DictionaryHeader header = super.readHeader(mDictBuffer); final int version = header.mFormatOptions.mVersion; if (!(version >= 2 && version <= 3)) { throw new UnsupportedFormatException("File header has a wrong version : " + version); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index e459e4861..9ddaaf734 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -19,7 +19,6 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -166,12 +165,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } @Override - public FileHeader readHeader() throws IOException, UnsupportedFormatException { + public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { if (mHeaderBuffer == null) { openDictBuffer(); } mHeaderBuffer.position(0); - final FileHeader header = super.readHeader(mHeaderBuffer); + final DictionaryHeader header = super.readHeader(mHeaderBuffer); final int version = header.mFormatOptions.mVersion; if (version != FormatSpec.VERSION4) { throw new UnsupportedFormatException("File header has a wrong version : " + version); diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index b12f79b07..160775d63 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -62,7 +62,7 @@ public class Ver4DictEncoder implements DictEncoder { final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(), 0l, mDictPlacedDir.length(), true /* useFullEditDistance */, LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get( - FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY)), + DictionaryHeader.DICTIONARY_LOCALE_KEY)), Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */, true /* isUpdatable */); if (!binaryDict.isValidDictionary()) { diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index d636a253a..6a7a3368e 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -17,21 +17,15 @@ package com.android.inputmethod.latin.personalization; import android.content.Context; -import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary; -import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.utils.LanguageModelParam; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; import java.io.File; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; @@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit; */ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary { private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName(); - public static final boolean DBG_SAVE_RESTORE = false; private static final boolean DBG_DUMP_ON_CLOSE = false; /** Any pair being typed or picked */ @@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED; public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY; - public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4; - /** The locale for this dictionary. */ public final Locale mLocale; @@ -95,13 +86,13 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB @Override protected Map<String, String> getHeaderAttributeMap() { HashMap<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_KEY, mDictName); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_KEY, + attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, mDictName); + attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); + attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); return attributeMap; } @@ -161,57 +152,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB } @UsedForTesting - public void dumpAllWordsForDebug() { - runAfterGcForDebug(new Runnable() { - @Override - public void run() { - dumpAllWordsForDebugLocked(); - } - }); - } - - private void dumpAllWordsForDebugLocked() { - Log.d(TAG, "dumpAllWordsForDebug started."); - final OnAddWordListener listener = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "load unigram: " + word + "," + frequency); - } - - @Override - public void setBigram(final String word0, final String word1, final int frequency) { - if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH - && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) { - Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency); - } else { - Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + "," - + frequency); - } - } - }; - - // Load the dictionary from binary file - final File dictFile = new File(mContext.getFilesDir(), mDictName); - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile, - DictDecoder.USE_BYTEARRAY); - if (dictDecoder == null) { - // This is an expected condition: we don't have a user history dictionary for this - // language yet. It will be created sometime later. - return; - } - - try { - dictDecoder.openDictBuffer(); - UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); - } catch (IOException e) { - Log.d(TAG, "IOException on opening a bytebuffer", e); - } catch (UnsupportedFormatException e) { - Log.d(TAG, "Unsupported format, can't read the dictionary", e); - } - } - - @UsedForTesting public void clearAndFlushDictionary() { // Clear the node structure on memory clear(); diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java deleted file mode 100644 index 55a90ee51..000000000 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.personalization; - -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.utils.CollectionUtils; - -import java.util.HashMap; -import java.util.Set; - -/** - * A store of bigrams which will be updated when the user history dictionary is closed - * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale - * bigrams when we write to the SQL DB. - */ -@UsedForTesting -public final class UserHistoryDictionaryBigramList { - public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0; - private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName(); - private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap(); - private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap(); - private int mSize = 0; - - public void evictAll() { - mSize = 0; - mBigramMap.clear(); - } - - /** - * Called when the user typed a word. - */ - @UsedForTesting - public void addBigram(String word1, String word2) { - addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE); - } - - /** - * Called when loaded from the SQL DB. - */ - public void addBigram(String word1, String word2, byte fcValue) { - if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { - Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue); - } - final HashMap<String, Byte> map; - if (mBigramMap.containsKey(word1)) { - map = mBigramMap.get(word1); - } else { - map = CollectionUtils.newHashMap(); - mBigramMap.put(word1, map); - } - if (!map.containsKey(word2)) { - ++mSize; - map.put(word2, fcValue); - } - } - - /** - * Called when inserted to the SQL DB. - */ - public void updateBigram(String word1, String word2, byte fcValue) { - if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { - Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue); - } - final HashMap<String, Byte> map; - if (mBigramMap.containsKey(word1)) { - map = mBigramMap.get(word1); - } else { - return; - } - if (!map.containsKey(word2)) { - return; - } - map.put(word2, fcValue); - } - - public int size() { - return mSize; - } - - public boolean isEmpty() { - return mBigramMap.isEmpty(); - } - - public boolean containsKey(String word) { - return mBigramMap.containsKey(word); - } - - public Set<String> keySet() { - return mBigramMap.keySet(); - } - - public HashMap<String, Byte> getBigrams(String word1) { - if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1); - // TODO: lower case according to locale - final String lowerWord1 = word1.toLowerCase(); - if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1); - return EMPTY_BIGRAM_MAP; - } - - public boolean removeBigram(String word1, String word2) { - final HashMap<String, Byte> set = getBigrams(word1); - if (set.isEmpty()) { - return false; - } - if (set.containsKey(word2)) { - set.remove(word2); - --mSize; - return true; - } - return false; - } -} diff --git a/java/src/com/android/inputmethod/latin/settings/DebugSettings.java b/java/src/com/android/inputmethod/latin/settings/DebugSettings.java index 29bbed8bd..fa5ae92e7 100644 --- a/java/src/com/android/inputmethod/latin/settings/DebugSettings.java +++ b/java/src/com/android/inputmethod/latin/settings/DebugSettings.java @@ -16,15 +16,18 @@ package com.android.inputmethod.latin.settings; +import android.content.Intent; import android.content.SharedPreferences; import android.os.Bundle; import android.os.Process; import android.preference.CheckBoxPreference; import android.preference.Preference; +import android.preference.Preference.OnPreferenceClickListener; import android.preference.PreferenceFragment; import android.preference.PreferenceScreen; -import com.android.inputmethod.keyboard.KeyboardSwitcher; +import com.android.inputmethod.latin.Dictionary; +import com.android.inputmethod.latin.DictionaryDumpBroadcastReceiver; import com.android.inputmethod.latin.LatinImeLogger; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.debug.ExternalDictionaryGetterForDebug; @@ -40,6 +43,11 @@ public final class DebugSettings extends PreferenceFragment public static final String PREF_USE_ONLY_PERSONALIZATION_DICTIONARY_FOR_DEBUG = "use_only_personalization_dictionary_for_debug"; private static final String PREF_READ_EXTERNAL_DICTIONARY = "read_external_dictionary"; + private static final String PREF_DUMP_CONTACTS_DICT = "dump_contacts_dict"; + private static final String PREF_DUMP_USER_DICT = "dump_user_dict"; + private static final String PREF_DUMP_USER_HISTORY_DICT = "dump_user_history_dict"; + private static final String PREF_DUMP_PERSONALIZATION_DICT = "dump_personalization_dict"; + private static final boolean SHOW_STATISTICS_LOGGING = false; private boolean mServiceNeedsRestart = false; @@ -83,11 +91,53 @@ public final class DebugSettings extends PreferenceFragment }); } + final OnPreferenceClickListener dictDumpPrefClickListener = + new DictDumpPrefClickListener(this); + findPreference(PREF_DUMP_CONTACTS_DICT).setOnPreferenceClickListener( + dictDumpPrefClickListener); + findPreference(PREF_DUMP_USER_DICT).setOnPreferenceClickListener( + dictDumpPrefClickListener); + findPreference(PREF_DUMP_USER_HISTORY_DICT).setOnPreferenceClickListener( + dictDumpPrefClickListener); + findPreference(PREF_DUMP_PERSONALIZATION_DICT).setOnPreferenceClickListener( + dictDumpPrefClickListener); + mServiceNeedsRestart = false; mDebugMode = (CheckBoxPreference) findPreference(PREF_DEBUG_MODE); updateDebugMode(); } + private static class DictDumpPrefClickListener implements OnPreferenceClickListener { + final PreferenceFragment mPreferenceFragment; + + public DictDumpPrefClickListener(final PreferenceFragment preferenceFragment) { + mPreferenceFragment = preferenceFragment; + } + + @Override + public boolean onPreferenceClick(final Preference arg0) { + final String dictName; + if (arg0.getKey().equals(PREF_DUMP_CONTACTS_DICT)) { + dictName = Dictionary.TYPE_CONTACTS; + } else if (arg0.getKey().equals(PREF_DUMP_USER_DICT)) { + dictName = Dictionary.TYPE_USER; + } else if (arg0.getKey().equals(PREF_DUMP_USER_HISTORY_DICT)) { + dictName = Dictionary.TYPE_USER_HISTORY; + } else if (arg0.getKey().equals(PREF_DUMP_PERSONALIZATION_DICT)) { + dictName = Dictionary.TYPE_PERSONALIZATION; + } else { + dictName = null; + } + if (dictName != null) { + final Intent intent = + new Intent(DictionaryDumpBroadcastReceiver.DICTIONARY_DUMP_INTENT_ACTION); + intent.putExtra(DictionaryDumpBroadcastReceiver.DICTIONARY_NAME_KEY, dictName); + mPreferenceFragment.getActivity().sendBroadcast(intent); + } + return true; + } + } + @Override public void onStop() { super.onStop(); diff --git a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java index 306735779..a15556511 100644 --- a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java @@ -29,7 +29,7 @@ import com.android.inputmethod.latin.BinaryDictionaryGetter; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import java.io.File; @@ -282,7 +282,7 @@ public class DictionaryInfoUtils { BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); } - public static FileHeader getDictionaryFileHeaderOrNull(final File file) { + public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) { return BinaryDictIOUtils.getDictionaryFileHeaderOrNull(file, 0, file.length()); } @@ -294,7 +294,7 @@ public class DictionaryInfoUtils { */ private static DictionaryInfo createDictionaryInfoFromFileAddress( final AssetFileAddress fileAddress) { - final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull( + final DictionaryHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull( new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength); if (header == null) { return null; diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java index c632a71a9..e7932b5a6 100644 --- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java @@ -46,7 +46,7 @@ public final class StringUtils { public static String newSingleCodePointString(int codePoint) { if (Character.charCount(codePoint) == 1) { - // Optimization: avoid creating an temporary array for characters that are + // Optimization: avoid creating a temporary array for characters that are // represented by a single char value return String.valueOf((char) codePoint); } @@ -205,6 +205,24 @@ public final class StringUtils { return codePoints; } + /** + * Construct a String from a code point array + * + * @param codePoints a code point array that is null terminated when its logical length is + * shorter than the array length. + * @return a string constructed from the code point array. + */ + public static String getStringFromNullTerminatedCodePointArray(final int[] codePoints) { + int stringLength = codePoints.length; + for (int i = 0; i < codePoints.length; i++) { + if (codePoints[i] == 0) { + stringLength = i; + break; + } + } + return new String(codePoints, 0 /* offset */, stringLength); + } + // This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE. public static int getCapitalizationType(final String text) { // If the first char is not uppercase, then the word is either all lower case or diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java deleted file mode 100644 index 7af03da59..000000000 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.DictEncoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.PendingAttribute; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.concurrent.TimeUnit; - -/** - * Reads and writes Binary files for a UserHistoryDictionary. - * - * All the methods in this class are static. - */ -public final class UserHistoryDictIOUtils { - private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); - private static final boolean DEBUG = false; - - public interface OnAddWordListener { - /** - * Callback to be notified when a word is added to the dictionary. - * @param word The added word. - * @param shortcutTarget A shortcut target for this word, or null if none. - * @param frequency The frequency for this word. - * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist). - * Unspecified if shortcutTarget is null - do not rely on its value. - */ - public void setUnigram(final String word, final String shortcutTarget, final int frequency, - final int shortcutFreq); - public void setBigram(final String word1, final String word2, final int frequency); - } - - @UsedForTesting - public interface BigramDictionaryInterface { - public int getFrequency(final String word1, final String word2); - } - - /** - * Writes dictionary to file. - */ - @UsedForTesting - public static void writeDictionary(final DictEncoder dictEncoder, - final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, - final FormatOptions formatOptions, final HashMap<String, String> options) { - final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options); - fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY, - String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); - try { - dictEncoder.writeDictionary(fusionDict, formatOptions); - Log.d(TAG, "end writing"); - } catch (IOException e) { - Log.e(TAG, "IO exception while writing file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } - } - - /** - * Constructs a new FusionDictionary from BigramDictionaryInterface. - */ - @UsedForTesting - static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict, - final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) { - final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(options)); - int profTotal = 0; - for (final String word1 : bigrams.keySet()) { - final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); - for (final String word2 : word1Bigrams.keySet()) { - final int freq = dict.getFrequency(word1, word2); - if (freq == -1) { - // don't add this bigram. - continue; - } - if (DEBUG) { - if (word1 == null) { - Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); - } else { - Log.d(TAG, "add bigram: " + word1 - + "," + word2 + "," + Integer.toString(freq)); - } - profTotal++; - } - if (word1 == null) { // unigram - fusionDict.add(word2, freq, null, false /* isNotAWord */); - } else { // bigram - if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) { - fusionDict.add(word1, 2, null, false /* isNotAWord */); - } - fusionDict.setBigram(word1, word2, freq); - } - bigrams.updateBigram(word1, word2, (byte)freq); - } - } - if (DEBUG) { - Log.d(TAG, "add " + profTotal + "words"); - } - return fusionDict; - } - - /** - * Reads dictionary from file. - */ - public static void readDictionaryBinary(final DictDecoder dictDecoder, - final OnAddWordListener dict) { - final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap(); - final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); - final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); - try { - dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams); - } catch (IOException e) { - Log.e(TAG, "IO exception while reading file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } catch (ArrayIndexOutOfBoundsException e) { - Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e); - } - addWordsFromWordMap(unigrams, frequencies, bigrams, dict); - } - - /** - * Adds all unigrams and bigrams in maps to OnAddWordListener. - */ - @UsedForTesting - static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams, - final TreeMap<Integer, Integer> frequencies, - final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams, - final OnAddWordListener to) { - for (Entry<Integer, String> entry : unigrams.entrySet()) { - final String word1 = entry.getValue(); - final int unigramFrequency = frequencies.get(entry.getKey()); - to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */); - final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); - if (attrList != null) { - for (final PendingAttribute attr : attrList) { - final String word2 = unigrams.get(attr.mAddress); - if (word1 == null || word2 == null) { - Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2); - continue; - } - to.setBigram(word1, word2, - BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency, - attr.mFrequency)); - } - } - } - - } -} diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java index ba9b114b0..c6630eb8b 100644 --- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java @@ -41,7 +41,7 @@ public class WordProperty { // package. public static final class ProbabilityInfo { public final int mProbability; - // wTimestamp, mLevel and mCount are historical info. These values are depend on the + // mTimestamp, mLevel and mCount are historical info. These values are depend on the // implementation in native code; thus, we must not use them and have any assumptions about // them except for tests. public final int mTimestamp; @@ -54,15 +54,11 @@ public class WordProperty { mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX]; mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]; } - } - private static int getCodePointCount(final int[] codePoints) { - for (int i = 0; i < codePoints.length; i++) { - if (codePoints[i] == 0) { - return i; - } + @Override + public String toString() { + return mTimestamp + ":" + mLevel + ":" + mCount; } - return codePoints.length; } // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. @@ -72,7 +68,7 @@ public class WordProperty { final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo, final ArrayList<int[]> shortcutTargets, final ArrayList<Integer> shortcutProbabilities) { - mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints)); + mCodePoints = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); mIsNotAWord = isNotAWord; mIsBlacklisted = isBlacklisted; mHasBigrams = hasBigram; @@ -81,9 +77,8 @@ public class WordProperty { final int bigramTargetCount = bigramTargets.size(); for (int i = 0; i < bigramTargetCount; i++) { - final int[] bigramTargetCodePointArray = bigramTargets.get(i); - final String bigramTargetString = new String(bigramTargetCodePointArray, - 0 /* offset */, getCodePointCount(bigramTargetCodePointArray)); + final String bigramTargetString = + StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); final ProbabilityInfo bigramProbability = new ProbabilityInfo(bigramProbabilityInfo.get(i)); mBigramTargets.add( @@ -93,9 +88,8 @@ public class WordProperty { final int shortcutTargetCount = shortcutTargets.size(); for (int i = 0; i < shortcutTargetCount; i++) { - final int[] shortcutTargetCodePointArray = shortcutTargets.get(i); - final String shortcutTargetString = new String(shortcutTargetCodePointArray, - 0 /* offset */, getCodePointCount(shortcutTargetCodePointArray)); + final String shortcutTargetString = + StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i)); mShortcutTargets.add( new WeightedString(shortcutTargetString, shortcutProbabilities.get(i))); } @@ -105,4 +99,44 @@ public class WordProperty { public boolean isValid() { return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY; } + + @Override + public String toString() { + // TODO: Move this logic to CombinedInputOutput. + final StringBuffer builder = new StringBuffer(); + builder.append(" word=" + mCodePoints); + builder.append(","); + builder.append("f=" + mProbabilityInfo.mProbability); + if (mIsNotAWord) { + builder.append(","); + builder.append("not_a_word=true"); + } + if (mIsBlacklisted) { + builder.append(","); + builder.append("blacklisted=true"); + } + if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) { + builder.append(","); + builder.append("historicalInfo=" + mProbabilityInfo); + } + builder.append("\n"); + for (int i = 0; i < mBigramTargets.size(); i++) { + builder.append(" bigram=" + mBigramTargets.get(i).mWord); + builder.append(","); + builder.append("f=" + mBigramTargets.get(i).mFrequency); + if (mBigramProbabilityInfo.get(i).mTimestamp + != BinaryDictionary.NOT_A_VALID_TIMESTAMP) { + builder.append(","); + builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i)); + } + builder.append("\n"); + } + for (int i = 0; i < mShortcutTargets.size(); i++) { + builder.append(" shortcut=" + mShortcutTargets.get(i).mWord); + builder.append(","); + builder.append("f=" + mShortcutTargets.get(i).mFrequency); + builder.append("\n"); + } + return builder.toString(); + } }
\ No newline at end of file diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index c919ebd91..4372cbeb5 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -137,6 +137,17 @@ static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dic delete dictionary; } +static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict, + jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys, + jobject outAttributeValues) { + Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); + if (!dictionary) return; + const int formatVersion = dictionary->getFormatVersionNumber(); + env->SetIntArrayRegion(outFormatVersion, 0 /* start */, 1 /* len */, &formatVersion); + // TODO: Implement + return; +} + static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); if (!dictionary) return 0; @@ -511,6 +522,11 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) }, { + const_cast<char *>("getHeaderInfoNative"), + const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), + reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo) + }, + { const_cast<char *>("flushNative"), const_cast<char *>("(JLjava/lang/String;)V"), reinterpret_cast<void *>(latinime_BinaryDictionary_flush) diff --git a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp index b8106377c..e37811b88 100644 --- a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp @@ -78,7 +78,8 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(&terminals[0]); } - + const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()-> + getHeaderStructurePolicy()->shouldBoostExactMatches(); // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; ++terminalIndex) { @@ -102,7 +103,7 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; && !(isPossiblyOffensiveWord && isFirstCharUppercase); const int outputTypeFlags = (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) - | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); + | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0); // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); @@ -113,7 +114,8 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; compoundDistance, traverseSession->getInputSize(), terminalDicNode->getContainedErrorTypes(), (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) - || (isValidWord && scoringPolicy->doesAutoCorrectValidWord())); + || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()), + boostExactMatches); if (maxScore < finalScore && isValidWord) { maxScore = finalScore; } @@ -147,7 +149,7 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; scoringPolicy->calculateFinalScore(compoundDistance, traverseSession->getInputSize(), terminalDicNode->getContainedErrorTypes(), - true /* forceCommit */) : finalScore; + true /* forceCommit */, boostExactMatches) : finalScore; const int updatedOutputWordIndex = outputShortcuts(&shortcutIt, outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes, sameAsTyped); diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index b76b13971..417620e00 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -40,6 +40,8 @@ class DictionaryHeaderStructurePolicy { virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const = 0; + virtual bool shouldBoostExactMatches() const = 0; + protected: DictionaryHeaderStructurePolicy() {} diff --git a/native/jni/src/suggest/core/policy/scoring.h b/native/jni/src/suggest/core/policy/scoring.h index 783383450..e581a97c3 100644 --- a/native/jni/src/suggest/core/policy/scoring.h +++ b/native/jni/src/suggest/core/policy/scoring.h @@ -28,7 +28,8 @@ class DicTraverseSession; class Scoring { public: virtual int calculateFinalScore(const float compoundDistance, const int inputSize, - const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit) const = 0; + const ErrorTypeUtils::ErrorType containedErrorTypes, const bool forceCommit, + const bool boostExactMatches) const = 0; virtual bool getMostProbableString(const DicTraverseSession *const traverseSession, const int terminalSize, const float languageWeight, int *const outputCodePoints, int *const type, int *const freq) const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index a44f9f0fc..1320c6560 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -146,6 +146,11 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mHasHistoricalInfoOfWords; } + AK_FORCE_INLINE bool shouldBoostExactMatches() const { + // TODO: Investigate better ways to handle exact matches for personalized dictionaries. + return !isDecayingDict(); + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp index b918e0765..824d442e4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp @@ -28,6 +28,14 @@ const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 10000 const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; +bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode( + const PtNodeParams *const ptNodeParams) { + if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) { + mTerminalPositions->push_back(ptNodeParams->getHeadPos()); + } + return true; +} + // Visits all PtNodes in post-order depth first manner. // For example, visits c -> b -> y -> x -> a for the following dictionary: // a _ b _ c diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h index a69490943..bcc5c7857 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h @@ -59,6 +59,21 @@ class DynamicPtReadingHelper { DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; + class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener { + public: + TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions) + : mTerminalPositions(terminalPositions) {} + bool onAscend() { return true; } + bool onDescend(const int ptNodeArrayPos) { return true; } + bool onReadingPtNodeArrayTail() { return true; } + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions); + + std::vector<int> *const mTerminalPositions; + }; + DynamicPtReadingHelper(const BufferWithExtendableBuffer *const buffer, const PtNodeReader *const ptNodeReader) : mIsError(false), mReadingState(), mBuffer(buffer), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 1c420e070..75d85988c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -392,10 +392,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code historicalInfo->getCount(), &bigrams, &shortcuts); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, - int *const outCodePoints) { - // TODO: Implement. - return 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + if (token == 0) { + mTerminalPtNodePositionsForIteratingWords.clear(); + DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( + &mTerminalPtNodePositionsForIteratingWords); + DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); + } + const int terminalPtNodePositionsVectorSize = + static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); + if (token < 0 || token >= terminalPtNodePositionsVectorSize) { + AKLOGE("Given token %d is invalid.", token); + return 0; + } + const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + int unigramProbability = NOT_A_PROBABILITY; + getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, + outCodePoints, &unigramProbability); + const int nextToken = token + 1; + if (nextToken >= terminalPtNodePositionsVectorSize) { + // All words have been iterated. + mTerminalPtNodePositionsForIteratingWords.clear(); + return 0; + } + return nextToken; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 1bcd4ceea..9ba5be0c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -17,6 +17,8 @@ #ifndef LATINIME_VER4_PATRICIA_TRIE_POLICY_H #define LATINIME_VER4_PATRICIA_TRIE_POLICY_H +#include <vector> + #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" @@ -50,7 +52,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), - mBigramCount(mHeaderPolicy->getBigramCount()) {}; + mBigramCount(mHeaderPolicy->getBigramCount()), + mTerminalPtNodePositionsForIteratingWords() {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -134,6 +137,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; + std::vector<int> mTerminalPtNodePositionsForIteratingWords; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h index c777e7238..8b405e8de 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h @@ -50,14 +50,14 @@ class TypingScoring : public Scoring { AK_FORCE_INLINE int calculateFinalScore(const float compoundDistance, const int inputSize, const ErrorTypeUtils::ErrorType containedErrorTypes, - const bool forceCommit) const { + const bool forceCommit, const bool boostExactMatches) const { const float maxDistance = ScoringParams::DISTANCE_WEIGHT_LANGUAGE + static_cast<float>(inputSize) * ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT; float score = ScoringParams::TYPING_BASE_OUTPUT_SCORE - compoundDistance / maxDistance; if (forceCommit) { score += ScoringParams::AUTOCORRECT_OUTPUT_THRESHOLD; } - if (ErrorTypeUtils::isExactMatch(containedErrorTypes)) { + if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { score += ScoringParams::EXACT_MATCH_PROMOTION; if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) { score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index 343ab420c..c41bbd768 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -22,6 +22,7 @@ import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.DictDecoder; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; @@ -102,14 +103,14 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { getContext().getCacheDir()); FileUtils.deleteRecursively(file); Map<String, String> attributeMap = new HashMap<String, String>(); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_KEY, dictId); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY, dictId); - attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_KEY, + attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, dictId); + attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, dictId); + attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); - attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); - attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_KEY, - FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, attributeMap)) { return file; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index e39b46f94..bab86e546 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -971,6 +971,99 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } + public void testIterateAllWords() { + testIterateAllWords(FormatSpec.VERSION4); + } + + private void testIterateAllWords(final int formatVersion) { + final long seed = System.currentTimeMillis(); + final Random random = new Random(seed); + final int UNIGRAM_COUNT = 1000; + final int BIGRAM_COUNT = 1000; + final int codePointSetSize = 20; + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + assertFalse(invalidWordProperty.isValid()); + + final ArrayList<String> words = new ArrayList<String>(); + final HashMap<String, Integer> wordProbabilitiesToCheckLater = + new HashMap<String, Integer>(); + final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>(); + final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = + new HashMap<Pair<String, String>, Integer>(); + + for (int i = 0; i < UNIGRAM_COUNT; i++) { + final String word = CodePointUtils.generateWord(random, codePointSet); + final int unigramProbability = random.nextInt(0xFF); + addUnigramWord(binaryDictionary, word, unigramProbability); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + words.add(word); + wordProbabilitiesToCheckLater.put(word, unigramProbability); + } + + for (int i = 0; i < BIGRAM_COUNT; i++) { + final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); + if (word0Index == word1Index) { + continue; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + binaryDictionary.addBigramWords(word0, word1, bigramProbability, + BinaryDictionary.NOT_A_VALID_TIMESTAMP); + if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { + binaryDictionary.flushWithGC(); + } + if (!bigrams.containsKey(word0)) { + final HashSet<String> bigramWord1s = new HashSet<String>(); + bigrams.put(word0, bigramWord1s); + } + bigrams.get(word0).add(word1); + bigramProbabilitiesToCheckLater.put( + new Pair<String, String>(word0, word1), bigramProbability); + } + + final HashSet<String> wordSet = new HashSet<String>(words); + final HashSet<Pair<String, String>> bigramSet = + new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet()); + int token = 0; + do { + final BinaryDictionary.GetNextWordPropertyResult result = + binaryDictionary.getNextWordProperty(token); + final WordProperty wordProperty = result.mWordProperty; + final String word0 = wordProperty.mCodePoints; + assertEquals((int)wordProbabilitiesToCheckLater.get(word0), + wordProperty.mProbabilityInfo.mProbability); + wordSet.remove(word0); + final HashSet<String> bigramWord1s = bigrams.get(word0); + for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) { + final String word1 = wordProperty.mBigramTargets.get(j).mWord; + assertTrue(bigramWord1s.contains(word1)); + final int probability = wordProperty.mBigramTargets.get(j).mFrequency; + final Pair<String, String> bigram = new Pair<String, String>(word0, word1); + assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability); + bigramSet.remove(bigram); + } + token = result.mNextToken; + } while (token != 0); + assertTrue(wordSet.isEmpty()); + assertTrue(bigramSet.isEmpty()); + } + public void testAddShortcuts() { testAddShortcuts(FormatSpec.VERSION4); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 715db2f9b..0815819d6 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -24,7 +24,6 @@ import android.util.SparseArray; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -498,7 +497,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); - FileHeader fileHeader = null; + DictionaryHeader fileHeader = null; try { fileHeader = dictDecoder.readHeader(); } catch (IOException e) { diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java index 20cf9a562..79f3e0dc9 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java @@ -16,7 +16,6 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; @@ -39,14 +38,14 @@ public class BinaryDictUtils { public static DictionaryOptions makeDictionaryOptions(final String id, final String version, final FormatSpec.FormatOptions formatOptions) { final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>()); - options.mAttributes.put(FileHeader.DICTIONARY_LOCALE_KEY, "en_US"); - options.mAttributes.put(FileHeader.DICTIONARY_ID_KEY, id); - options.mAttributes.put(FileHeader.DICTIONARY_VERSION_KEY, version); + options.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, "en_US"); + options.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, id); + options.mAttributes.put(DictionaryHeader.DICTIONARY_VERSION_KEY, version); if (formatOptions.mHasTimestamp) { - options.mAttributes.put(FileHeader.HAS_HISTORICAL_INFO_KEY, - FileHeader.ATTRIBUTE_VALUE_TRUE); - options.mAttributes.put(FileHeader.USES_FORGETTING_CURVE_KEY, - FileHeader.ATTRIBUTE_VALUE_TRUE); + options.mAttributes.put(DictionaryHeader.HAS_HISTORICAL_INFO_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); + options.mAttributes.put(DictionaryHeader.USES_FORGETTING_CURVE_KEY, + DictionaryHeader.ATTRIBUTE_VALUE_TRUE); } return options; } diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java deleted file mode 100644 index 93731b3cd..000000000 --- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.content.Context; -import android.test.AndroidTestCase; -import android.test.suitebuilder.annotation.LargeTest; -import android.util.Log; - -import com.android.inputmethod.latin.makedict.DictDecoder; -import com.android.inputmethod.latin.makedict.DictEncoder; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver2DictDecoder; -import com.android.inputmethod.latin.makedict.Ver2DictEncoder; -import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; - -/** - * Unit tests for UserHistoryDictIOUtils - */ -@LargeTest -public class UserHistoryDictIOUtilsTests extends AndroidTestCase - implements BigramDictionaryInterface { - - private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName(); - private static final int UNIGRAM_FREQUENCY = 50; - private static final int BIGRAM_FREQUENCY = 100; - private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>(); - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = new FormatSpec.FormatOptions(2); - private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - private static final HashMap<String, String> HEADER_OPTIONS = new HashMap<String, String>(); - static { - HEADER_OPTIONS.put(FileHeader.DICTIONARY_LOCALE_KEY, "en_US"); - HEADER_OPTIONS.put(FileHeader.DICTIONARY_ID_KEY, "test"); - HEADER_OPTIONS.put(FileHeader.DICTIONARY_VERSION_KEY, "1000"); - } - - /** - * Return same frequency for all words and bigrams - */ - @Override - public int getFrequency(String word1, String word2) { - if (word1 == null) return UNIGRAM_FREQUENCY; - return BIGRAM_FREQUENCY; - } - - // Utilities for Testing - - private void addWord(final String word, - final HashMap<String, ArrayList<String> > addedWords) { - if (!addedWords.containsKey(word)) { - addedWords.put(word, new ArrayList<String>()); - } - } - - private void addBigram(final String word1, final String word2, - final HashMap<String, ArrayList<String> > addedWords) { - addWord(word1, addedWords); - addWord(word2, addedWords); - addedWords.get(word1).add(word2); - } - - private void addBigramToBigramList(final String word1, final String word2, - final HashMap<String, ArrayList<String> > addedWords, - final UserHistoryDictionaryBigramList bigramList) { - bigramList.addBigram(null, word1); - bigramList.addBigram(word1, word2); - - addBigram(word1, word2, addedWords); - } - - private void checkWordInFusionDict(final FusionDictionary dict, final String word, - final ArrayList<String> expectedBigrams) { - final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); - assertNotNull(ptNode); - assertTrue(ptNode.isTerminal()); - - for (final String bigram : expectedBigrams) { - assertNotNull(ptNode.getBigram(bigram)); - } - } - - private void checkWordsInFusionDict(final FusionDictionary dict, - final HashMap<String, ArrayList<String> > bigrams) { - for (final String word : bigrams.keySet()) { - if (bigrams.containsKey(word)) { - checkWordInFusionDict(dict, word, bigrams.get(word)); - } else { - checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM); - } - } - } - - private void checkWordInBigramList( - final UserHistoryDictionaryBigramList bigramList, final String word, - final ArrayList<String> expectedBigrams) { - // check unigram - final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null); - assertTrue(unigramMap.containsKey(word)); - - // check bigrams - final ArrayList<String> actualBigrams = new ArrayList<String>( - bigramList.getBigrams(word).keySet()); - - Collections.sort(expectedBigrams); - Collections.sort(actualBigrams); - assertEquals(expectedBigrams, actualBigrams); - } - - private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList, - final HashMap<String, ArrayList<String> > addedWords) { - for (final String word : addedWords.keySet()) { - if (addedWords.containsKey(word)) { - checkWordInBigramList(bigramList, word, addedWords.get(word)); - } else { - checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM); - } - } - } - - private void writeDictToFile(final File file, - final UserHistoryDictionaryBigramList bigramList) { - final DictEncoder dictEncoder = new Ver2DictEncoder(file); - UserHistoryDictIOUtils.writeDictionary(dictEncoder, this, bigramList, FORMAT_OPTIONS, - HEADER_OPTIONS); - } - - private void readDictFromFile(final File file, final OnAddWordListener listener) - throws IOException, FileNotFoundException, UnsupportedFormatException { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY); - dictDecoder.openDictBuffer(); - UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); - } - - public void testGenerateFusionDictionary() { - final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); - - final HashMap<String, ArrayList<String> > addedWords = - new HashMap<String, ArrayList<String>>(); - addBigramToBigramList("this", "is", addedWords, originalList); - addBigramToBigramList("this", "was", addedWords, originalList); - addBigramToBigramList("hello", "world", addedWords, originalList); - - final FusionDictionary fusionDict = UserHistoryDictIOUtils.constructFusionDictionary( - this, originalList, HEADER_OPTIONS); - - checkWordsInFusionDict(fusionDict, addedWords); - } - - public void testReadAndWrite() throws IOException, FileNotFoundException, - UnsupportedFormatException { - final Context context = getContext(); - - File file = null; - try { - file = File.createTempFile("testReadAndWrite", TEST_DICT_FILE_EXTENSION, - getContext().getCacheDir()); - } catch (IOException e) { - Log.d(TAG, "IOException while creating a temporary file", e); - } - assertNotNull(file); - - // make original dictionary - final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); - final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap(); - addBigramToBigramList("this" , "is" , addedWords, originalList); - addBigramToBigramList("this" , "was" , addedWords, originalList); - addBigramToBigramList("is" , "not" , addedWords, originalList); - addBigramToBigramList("hello", "world", addedWords, originalList); - - // write to file - writeDictToFile(file, originalList); - - // make result dict. - final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); - final OnAddWordListener listener = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "in: setUnigram: " + word + "," + frequency); - resultList.addBigram(null, word, (byte)frequency); - } - @Override - public void setBigram(final String word1, final String word2, final int frequency) { - Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); - resultList.addBigram(word1, word2, (byte)frequency); - } - }; - - // load from file - readDictFromFile(file, listener); - checkWordsInBigramList(resultList, addedWords); - - // add new bigram - addBigramToBigramList("hello", "java", addedWords, resultList); - - // rewrite - writeDictToFile(file, resultList); - final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); - final OnAddWordListener listener2 = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency, final int shortcutFreq) { - Log.d(TAG, "in: setUnigram: " + word + "," + frequency); - resultList2.addBigram(null, word, (byte)frequency); - } - @Override - public void setBigram(final String word1, final String word2, final int frequency) { - Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); - resultList2.addBigram(word1, word2, (byte)frequency); - } - }; - - // load from file - readDictFromFile(file, listener2); - checkWordsInBigramList(resultList2, addedWords); - } -} diff --git a/tools/dicttool/compat/android/util/SparseIntArray.java b/tools/dicttool/compat/android/util/SparseIntArray.java new file mode 100644 index 000000000..ac8a04ceb --- /dev/null +++ b/tools/dicttool/compat/android/util/SparseIntArray.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.util; + +public class SparseIntArray { + private final SparseArray<Integer> mArray; + + public SparseIntArray() { + this(10); + } + + public SparseIntArray(final int initialCapacity) { + mArray = new SparseArray<Integer>(initialCapacity); + } + + public int size() { + return mArray.size(); + } + + public void clear() { + mArray.clear(); + } + + public void put(final int key, final int value) { + mArray.put(key, value); + } + + public int get(final int key) { + return get(key, 0); + } + + public int get(final int key, final int valueIfKeyNotFound) { + return mArray.get(key, valueIfKeyNotFound); + } + + public int indexOfKey(final int key) { + return mArray.indexOfKey(key); + } + + public int keyAt(final int index) { + return mArray.keyAt(index); + } +} diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index d8059e428..9ed4dd5a2 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.DictDecoder; import com.android.inputmethod.latin.makedict.DictEncoder; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -48,9 +49,9 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { // Create a thrice-compressed dictionary file. final DictionaryOptions testOptions = new DictionaryOptions(new HashMap<String, String>()); - testOptions.mAttributes.put(FormatSpec.FileHeader.DICTIONARY_VERSION_KEY, VERSION); - testOptions.mAttributes.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY, LOCALE); - testOptions.mAttributes.put(FormatSpec.FileHeader.DICTIONARY_ID_KEY, ID); + testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_VERSION_KEY, VERSION); + testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, LOCALE); + testOptions.mAttributes.put(DictionaryHeader.DICTIONARY_ID_KEY, ID); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), testOptions); dict.add("foo", TEST_FREQ, null, false /* isNotAWord */); dict.add("fta", 1, null, false /* isNotAWord */); @@ -80,11 +81,11 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { null /* dict : an optional dictionary to add words to, or null */, false /* deleteDictIfBroken */); assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get( - FormatSpec.FileHeader.DICTIONARY_VERSION_KEY)); + DictionaryHeader.DICTIONARY_VERSION_KEY)); assertEquals("Wrong locale attribute", LOCALE, resultDict.mOptions.mAttributes.get( - FormatSpec.FileHeader.DICTIONARY_LOCALE_KEY)); + DictionaryHeader.DICTIONARY_LOCALE_KEY)); assertEquals("Wrong id attribute", ID, resultDict.mOptions.mAttributes.get( - FormatSpec.FileHeader.DICTIONARY_ID_KEY)); + DictionaryHeader.DICTIONARY_ID_KEY)); assertEquals("Dictionary can't be read back correctly", FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(), TEST_FREQ); |