diff options
51 files changed, 802 insertions, 245 deletions
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz Binary files differindex d28ef485b..afef676b2 100644 --- a/dictionaries/en_GB_wordlist.combined.gz +++ b/dictionaries/en_GB_wordlist.combined.gz diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz Binary files differindex b656f880d..eafbc9d30 100644 --- a/dictionaries/en_US_wordlist.combined.gz +++ b/dictionaries/en_US_wordlist.combined.gz diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz Binary files differindex 8aa40e944..9cbca0b41 100644 --- a/dictionaries/en_wordlist.combined.gz +++ b/dictionaries/en_wordlist.combined.gz diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz Binary files differindex 1d988d6fe..1815e4732 100644 --- a/dictionaries/fr_wordlist.combined.gz +++ b/dictionaries/fr_wordlist.combined.gz diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz Binary files differindex 221ea7508..876eb71c2 100644 --- a/dictionaries/pt_BR_wordlist.combined.gz +++ b/dictionaries/pt_BR_wordlist.combined.gz diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz Binary files differindex 6a041d9cf..406869059 100644 --- a/dictionaries/pt_PT_wordlist.combined.gz +++ b/dictionaries/pt_PT_wordlist.combined.gz diff --git a/java/res/drawable-hdpi/btn_keyboard_key_dark_active_holo.9.png b/java/res/drawable-hdpi/btn_keyboard_key_dark_active_holo.9.png Binary files differindex 87211a502..fa2cb8542 100644 --- a/java/res/drawable-hdpi/btn_keyboard_key_dark_active_holo.9.png +++ b/java/res/drawable-hdpi/btn_keyboard_key_dark_active_holo.9.png diff --git a/java/res/drawable-hdpi/sym_keyboard_voice_holo_dark.png b/java/res/drawable-hdpi/sym_keyboard_voice_holo_dark.png Binary files differindex c1e16a651..8a6336a57 100644 --- a/java/res/drawable-hdpi/sym_keyboard_voice_holo_dark.png +++ b/java/res/drawable-hdpi/sym_keyboard_voice_holo_dark.png diff --git a/java/res/drawable-hdpi/sym_keyboard_voice_off_holo_dark.png b/java/res/drawable-hdpi/sym_keyboard_voice_off_holo_dark.png Binary files differindex 26d068490..edf1379ab 100644 --- a/java/res/drawable-hdpi/sym_keyboard_voice_off_holo_dark.png +++ b/java/res/drawable-hdpi/sym_keyboard_voice_off_holo_dark.png diff --git a/java/res/drawable-mdpi/btn_keyboard_key_dark_active_holo.9.png b/java/res/drawable-mdpi/btn_keyboard_key_dark_active_holo.9.png Binary files differindex f98653ea1..8e9a34957 100644 --- a/java/res/drawable-mdpi/btn_keyboard_key_dark_active_holo.9.png +++ b/java/res/drawable-mdpi/btn_keyboard_key_dark_active_holo.9.png diff --git a/java/res/drawable-mdpi/sym_keyboard_voice_holo_dark.png b/java/res/drawable-mdpi/sym_keyboard_voice_holo_dark.png Binary files differindex 16be37d05..0795fcc9b 100644 --- a/java/res/drawable-mdpi/sym_keyboard_voice_holo_dark.png +++ b/java/res/drawable-mdpi/sym_keyboard_voice_holo_dark.png diff --git a/java/res/drawable-mdpi/sym_keyboard_voice_off_holo_dark.png b/java/res/drawable-mdpi/sym_keyboard_voice_off_holo_dark.png Binary files differindex 95d718a46..f76da5797 100644 --- a/java/res/drawable-mdpi/sym_keyboard_voice_off_holo_dark.png +++ b/java/res/drawable-mdpi/sym_keyboard_voice_off_holo_dark.png diff --git a/java/res/drawable-xhdpi/btn_keyboard_key_dark_active_holo.9.png b/java/res/drawable-xhdpi/btn_keyboard_key_dark_active_holo.9.png Binary files differindex 738316d66..a2f6ac0e2 100644 --- a/java/res/drawable-xhdpi/btn_keyboard_key_dark_active_holo.9.png +++ b/java/res/drawable-xhdpi/btn_keyboard_key_dark_active_holo.9.png diff --git a/java/res/drawable-xhdpi/sym_keyboard_voice_holo_dark.png b/java/res/drawable-xhdpi/sym_keyboard_voice_holo_dark.png Binary files differindex 944a8524d..b2bb9b803 100644 --- a/java/res/drawable-xhdpi/sym_keyboard_voice_holo_dark.png +++ b/java/res/drawable-xhdpi/sym_keyboard_voice_holo_dark.png diff --git a/java/res/drawable-xhdpi/sym_keyboard_voice_off_holo_dark.png b/java/res/drawable-xhdpi/sym_keyboard_voice_off_holo_dark.png Binary files differindex 2016caf40..23e75bfe7 100644 --- a/java/res/drawable-xhdpi/sym_keyboard_voice_off_holo_dark.png +++ b/java/res/drawable-xhdpi/sym_keyboard_voice_off_holo_dark.png diff --git a/java/res/drawable-xxhdpi/btn_keyboard_key_dark_active_holo.9.png b/java/res/drawable-xxhdpi/btn_keyboard_key_dark_active_holo.9.png Binary files differindex b35c29fe6..17f0a7a58 100644 --- a/java/res/drawable-xxhdpi/btn_keyboard_key_dark_active_holo.9.png +++ b/java/res/drawable-xxhdpi/btn_keyboard_key_dark_active_holo.9.png diff --git a/java/res/drawable-xxhdpi/sym_keyboard_voice_holo_dark.png b/java/res/drawable-xxhdpi/sym_keyboard_voice_holo_dark.png Binary files differindex 6809f0711..f04cadf6f 100644 --- a/java/res/drawable-xxhdpi/sym_keyboard_voice_holo_dark.png +++ b/java/res/drawable-xxhdpi/sym_keyboard_voice_holo_dark.png diff --git a/java/res/drawable-xxhdpi/sym_keyboard_voice_off_holo_dark.png b/java/res/drawable-xxhdpi/sym_keyboard_voice_off_holo_dark.png Binary files differindex 6bd506a11..e74d523bc 100644 --- a/java/res/drawable-xxhdpi/sym_keyboard_voice_off_holo_dark.png +++ b/java/res/drawable-xxhdpi/sym_keyboard_voice_off_holo_dark.png diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict Binary files differindex 6564d47fa..8660c28e2 100644 --- a/java/res/raw/main_en.dict +++ b/java/res/raw/main_en.dict diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict Binary files differindex 10adad092..0d2e51837 100644 --- a/java/res/raw/main_fr.dict +++ b/java/res/raw/main_fr.dict diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict Binary files differindex f9ae9b561..98a27c7c8 100644 --- a/java/res/raw/main_pt_br.dict +++ b/java/res/raw/main_pt_br.dict diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index fdde98da1..a463651d5 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -31,6 +31,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Locale; +import java.util.Map; /** * Implements a static, compacted, binary dictionary of standard words. @@ -104,6 +105,8 @@ public final class BinaryDictionary extends Dictionary { JniUtils.loadNativeLibrary(); } + private static native boolean createEmptyDictFileNative(String filePath, long dictVersion, + String[] attributeKeyStringArray, String[] attributeValueStringArray); private static native long openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable); private static native void flushNative(long dict, String filePath); @@ -127,6 +130,20 @@ public final class BinaryDictionary extends Dictionary { private static native int calculateProbabilityNative(long dict, int unigramProbability, int bigramProbability); + @UsedForTesting + public static boolean createEmptyDictFile(final String filePath, final long dictVersion, + final Map<String, String> attributeMap) { + final String[] keyArray = new String[attributeMap.size()]; + final String[] valueArray = new String[attributeMap.size()]; + int index = 0; + for (final String key : attributeMap.keySet()) { + keyArray[index] = key; + valueArray[index] = attributeMap.get(key); + index++; + } + return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray); + } + // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable) { diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 0774ce203..99859decf 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -22,12 +22,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import com.android.inputmethod.latin.personalization.DynamicPersonalizationDictionaryWriter; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.utils.AsyncResultHolder; @@ -35,9 +30,9 @@ import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor; import java.io.File; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; @@ -68,8 +63,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ protected static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3 /* version */, true /* supportsDynamicUpdate */); + private static final int DICTIONARY_FORMAT_VERSION = 3; + + private static final String SUPPORTS_DYNAMIC_UPDATE = + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE; /** * A static map of time recorders, each of which records the time of accesses to a single binary @@ -233,6 +230,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { }); } + protected Map<String, String> getHeaderAttributeMap() { + HashMap<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + SUPPORTS_DYNAMIC_UPDATE); + return attributeMap; + } + protected void clear() { getExecutor(mFilename).execute(new Runnable() { @Override @@ -240,17 +244,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE && mDictionaryWriter == null) { mBinaryDictionary.close(); final File file = new File(mContext.getFilesDir(), mFilename); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), - false, false)); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - try { - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - } catch (IOException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } + BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); } else { mDictionaryWriter.clear(); } @@ -498,17 +493,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) { if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) { final File file = new File(mContext.getFilesDir(), mFilename); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), - false, false)); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - try { - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - } catch (IOException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Exception in creating new dictionary file.", e); - } + BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); } else { if (mBinaryDictionary.needsToRunGC()) { mBinaryDictionary.flushWithGC(); diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 3b1d2427b..6cc0bfb76 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -225,6 +225,26 @@ public class BinaryDictEncoderUtils { return position; } + static void writeUIntToStream(final OutputStream stream, final int value, final int size) + throws IOException { + switch(size) { + case 4: + stream.write((value >> 24) & 0xFF); + /* fall through */ + case 3: + stream.write((value >> 16) & 0xFF); + /* fall through */ + case 2: + stream.write((value >> 8) & 0xFF); + /* fall through */ + case 1: + stream.write(value & 0xFF); + break; + default: + /* nop */ + } + } + // End utility methods // This method is responsible for finding a nice ordering of the nodes that favors run-time diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index aa5129ccb..849bff050 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -325,6 +325,12 @@ public final class FormatSpec { public final int mHeaderSize; public final DictionaryOptions mDictionaryOptions; public final FormatOptions mFormatOptions; + // Note that these are corresponding definitions in native code in latinime::HeaderPolicy + // and latinime::HeaderReadWriteUtils. + public static final String SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE = "SUPPORTS_DYNAMIC_UPDATE"; + public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE"; + public static final String ATTRIBUTE_VALUE_TRUE = "1"; + private static final String DICTIONARY_VERSION_ATTRIBUTE = "version"; private static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale"; private static final String DICTIONARY_ID_ATTRIBUTE = "dictionary"; diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java new file mode 100644 index 000000000..0b9cf91d2 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collections; + +/** + * SparseTable is an extensible map from integer to integer. + * This holds one value for every mBlockSize keys, so it uses 1/mBlockSize'th of the full index + * memory. + */ +@UsedForTesting +public class SparseTable { + + /** + * mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize + * terminals. + * It contains at index i = j / mBlockSize the index in mContentsTable where the values for + * terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized integer array. + */ + private final ArrayList<Integer> mLookupTable; + private final ArrayList<Integer> mContentTable; + + private final int mBlockSize; + public static final int NOT_EXIST = -1; + + @UsedForTesting + public SparseTable(final int initialCapacity, final int blockSize) { + mBlockSize = blockSize; + final int lookupTableSize = initialCapacity / mBlockSize + + (initialCapacity % mBlockSize > 0 ? 1 : 0); + mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST)); + mContentTable = new ArrayList<Integer>(); + } + + @UsedForTesting + public SparseTable(final int[] lookupTable, final int[] contentTable, final int blockSize) { + mBlockSize = blockSize; + mLookupTable = new ArrayList<Integer>(lookupTable.length); + for (int i = 0; i < lookupTable.length; ++i) { + mLookupTable.add(lookupTable[i]); + } + mContentTable = new ArrayList<Integer>(contentTable.length); + for (int i = 0; i < contentTable.length; ++i) { + mContentTable.add(contentTable[i]); + } + } + + /** + * Converts an byte array to an int array considering each set of 4 bytes is an int stored in + * big-endian. + * The length of byteArray must be a multiple of four. + * Otherwise, IndexOutOfBoundsException will be raised. + */ + @UsedForTesting + private static void convertByteArrayToIntegerArray(final byte[] byteArray, + final ArrayList<Integer> integerArray) { + for (int i = 0; i < byteArray.length; i += 4) { + int value = 0; + for (int j = i; j < i + 4; ++j) { + value <<= 8; + value |= byteArray[j] & 0xFF; + } + integerArray.add(value); + } + } + + @UsedForTesting + public SparseTable(final byte[] lookupTable, final byte[] contentTable, final int blockSize) { + mBlockSize = blockSize; + mLookupTable = new ArrayList<Integer>(lookupTable.length / 4); + mContentTable = new ArrayList<Integer>(contentTable.length / 4); + convertByteArrayToIntegerArray(lookupTable, mLookupTable); + convertByteArrayToIntegerArray(contentTable, mContentTable); + } + + @UsedForTesting + public int get(final int index) { + if (index < 0 || index / mBlockSize >= mLookupTable.size() + || mLookupTable.get(index / mBlockSize) == NOT_EXIST) { + return NOT_EXIST; + } + return mContentTable.get(mLookupTable.get(index / mBlockSize) + (index % mBlockSize)); + } + + @UsedForTesting + public void set(final int index, final int value) { + if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) { + mLookupTable.set(index / mBlockSize, mContentTable.size()); + for (int i = 0; i < mBlockSize; ++i) { + mContentTable.add(NOT_EXIST); + } + } + mContentTable.set(mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value); + } + + public void remove(final int index) { + set(index, NOT_EXIST); + } + + @UsedForTesting + public int size() { + return mLookupTable.size() * mBlockSize; + } + + @UsedForTesting + /* package */ int getContentTableSize() { + return mContentTable.size(); + } + + @UsedForTesting + /* package */ int getLookupTableSize() { + return mLookupTable.size(); + } + + public boolean contains(final int index) { + return get(index) != NOT_EXIST; + } + + @UsedForTesting + public void write(final OutputStream lookupOutStream, final OutputStream contentOutStream) + throws IOException { + for (final int index : mLookupTable) { + BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, 4); + } + + for (final int index : mContentTable) { + BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); + } + } +} diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java index 075d7e3c3..66517a800 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java @@ -34,12 +34,15 @@ import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListe import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; /** - * This class is a base class of a dictionary for the personalized prediction language model. + * This class is a base class of a dictionary that supports decaying for the personalized language + * model. */ -public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDictionary { - private static final String TAG = DynamicPredictionDictionaryBase.class.getSimpleName(); +public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary { + private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName(); public static final boolean DBG_SAVE_RESTORE = false; private static final boolean DBG_STRESS_TEST = false; private static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; @@ -60,8 +63,9 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDi // Should always be false except when we use this class for test @UsedForTesting boolean mIsTest = false; - /* package */ DynamicPredictionDictionaryBase(final Context context, final String locale, - final SharedPreferences sp, final String dictionaryType, final String fileName) { + /* package */ DecayingExpandableBinaryDictionaryBase(final Context context, + final String locale, final SharedPreferences sp, final String dictionaryType, + final String fileName) { super(context, fileName, dictionaryType, true); mLocale = locale; mFileName = fileName; @@ -84,6 +88,16 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableBinaryDi } @Override + protected Map<String, String> getHeaderAttributeMap() { + HashMap<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + return attributeMap; + } + + @Override protected boolean hasContentChanged() { return false; } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java index ab3de801c..c616a296c 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java @@ -46,7 +46,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Use a dynamic binary dictionary instead public WeakReference<PersonalizationDictionary> mDictionary; - public WeakReference<DynamicPredictionDictionaryBase> mPredictionDictionary; + public WeakReference<DecayingExpandableBinaryDictionaryBase> mPredictionDictionary; public final String mSystemLocale; public PersonalizationDictionaryUpdateSession(String locale) { mSystemLocale = locale; @@ -60,15 +60,16 @@ public abstract class PersonalizationDictionaryUpdateSession { mDictionary = new WeakReference<PersonalizationDictionary>(dictionary); } - public void setPredictionDictionary(DynamicPredictionDictionaryBase dictionary) { - mPredictionDictionary = new WeakReference<DynamicPredictionDictionaryBase>(dictionary); + public void setPredictionDictionary(DecayingExpandableBinaryDictionaryBase dictionary) { + mPredictionDictionary = + new WeakReference<DecayingExpandableBinaryDictionaryBase>(dictionary); } protected PersonalizationDictionary getDictionary() { return mDictionary == null ? null : mDictionary.get(); } - protected DynamicPredictionDictionaryBase getPredictionDictionary() { + protected DecayingExpandableBinaryDictionaryBase getPredictionDictionary() { return mPredictionDictionary == null ? null : mPredictionDictionary.get(); } @@ -81,7 +82,7 @@ public abstract class PersonalizationDictionaryUpdateSession { } private void unsetPredictionDictionary() { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -89,7 +90,7 @@ public abstract class PersonalizationDictionaryUpdateSession { } public void clearAndFlushPredictionDictionary(Context context) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -105,7 +106,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Support multi locale to add bigram public void addBigramToPersonalizationDictionary(String word0, String word1, boolean isValid, int frequency) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } @@ -116,7 +117,7 @@ public abstract class PersonalizationDictionaryUpdateSession { // TODO: Support multi locale to add bigram public void addBigramsToPersonalizationDictionary( final ArrayList<PersonalizationLanguageModelParam> lmParams) { - final DynamicPredictionDictionaryBase dictionary = getPredictionDictionary(); + final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); if (dictionary == null) { return; } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java index e80953c05..432954453 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java @@ -22,7 +22,7 @@ import com.android.inputmethod.latin.ExpandableBinaryDictionary; import android.content.Context; import android.content.SharedPreferences; -public class PersonalizationPredictionDictionary extends DynamicPredictionDictionaryBase { +public class PersonalizationPredictionDictionary extends DecayingExpandableBinaryDictionaryBase { private static final String NAME = PersonalizationPredictionDictionary.class.getSimpleName(); /* package */ PersonalizationPredictionDictionary(final Context context, final String locale, diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java index 4c1803bdf..55a90ee51 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java @@ -54,7 +54,7 @@ public final class UserHistoryDictionaryBigramList { * Called when loaded from the SQL DB. */ public void addBigram(String word1, String word2, byte fcValue) { - if (DynamicPredictionDictionaryBase.DBG_SAVE_RESTORE) { + if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap<String, Byte> map; @@ -74,7 +74,7 @@ public final class UserHistoryDictionaryBigramList { * Called when inserted to the SQL DB. */ public void updateBigram(String word1, String word2, byte fcValue) { - if (DynamicPredictionDictionaryBase.DBG_SAVE_RESTORE) { + if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) { Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap<String, Byte> map; diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java index b140c919b..38e308a4e 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java @@ -26,7 +26,7 @@ import android.content.SharedPreferences; * Locally gathers stats about the words user types and various other signals like auto-correction * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. */ -public class UserHistoryPredictionDictionary extends DynamicPredictionDictionaryBase { +public class UserHistoryPredictionDictionary extends DecayingExpandableBinaryDictionaryBase { /* package for tests */ static final String NAME = UserHistoryPredictionDictionary.class.getSimpleName(); /* package */ UserHistoryPredictionDictionary(final Context context, final String locale, diff --git a/native/jni/Android.mk b/native/jni/Android.mk index c2070327e..0594ddff0 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ + dict_file_writing_utils.cpp \ format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 7f47493b2..7761ec4d5 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -26,12 +26,55 @@ #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/autocorrection_threshold_utils.h" namespace latinime { class ProximityInfo; +// TODO: Move to makedict. +static jboolean latinime_BinaryDictionary_createEmptyDictFile(JNIEnv *env, jclass clazz, + jstring filePath, jlong dictVersion, jobjectArray attributeKeyStringArray, + jobjectArray attributeValueStringArray) { + const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); + char filePathChars[filePathUtf8Length + 1]; + env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); + filePathChars[filePathUtf8Length] = '\0'; + + const int keyCount = env->GetArrayLength(attributeKeyStringArray); + const int valueCount = env->GetArrayLength(attributeValueStringArray); + if (keyCount != valueCount) { + return false; + } + + HeaderReadWriteUtils::AttributeMap attributeMap; + for (int i = 0; i < keyCount; i++) { + jstring keyString = static_cast<jstring>( + env->GetObjectArrayElement(attributeKeyStringArray, i)); + const jsize keyUtf8Length = env->GetStringUTFLength(keyString); + char keyChars[keyUtf8Length + 1]; + env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); + keyChars[keyUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::key_type key; + HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); + + jstring valueString = static_cast<jstring>( + env->GetObjectArrayElement(attributeValueStringArray, i)); + const jsize valueUtf8Length = env->GetStringUTFLength(valueString); + char valueChars[valueUtf8Length + 1]; + env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); + valueChars[valueUtf8Length] = '\0'; + HeaderReadWriteUtils::AttributeMap::mapped_type value; + HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); + + attributeMap[key] = value; + } + + return DictFileWritingUtils::createEmptyDictFile(filePathChars, static_cast<int>(dictVersion), + &attributeMap); +} + static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; @@ -282,6 +325,11 @@ static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jcl static const JNINativeMethod sMethods[] = { { + const_cast<char *>("createEmptyDictFileNative"), + const_cast<char *>("(Ljava/lang/String;J[Ljava/lang/String;[Ljava/lang/String;)Z"), + reinterpret_cast<void *>(latinime_BinaryDictionary_createEmptyDictFile) + }, + { const_cast<char *>("openNative"), const_cast<char *>("(Ljava/lang/String;JJZ)J"), reinterpret_cast<void *>(latinime_BinaryDictionary_open) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index bc2f5ee58..29307b56a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -325,7 +325,7 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { - AKLOGE("Bigram link is invalid. start position: %d", bigramPos); + AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index b033eee05..c6d8ddcf7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -263,7 +263,7 @@ class DynamicPatriciaTrieReadingHelper { AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { - AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); + AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a51ae5e1d..578645cd5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -16,9 +16,6 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include <cstdio> -#include <cstring> - #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" @@ -28,13 +25,12 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/hash_map_compat.h" namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; -const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = - ".tmp"; // TODO: Make MAX_DICTIONARY_SIZE 8MB. const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; @@ -147,7 +143,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { return; } - flushAllToFile(fileName, &headerBuffer, mBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); } void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, @@ -161,7 +157,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { return; } - flushAllToFile(fileName, &headerBuffer, &newDictBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( @@ -463,60 +459,6 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return true; } -// TODO: Create a struct which contains header, body and etc... and use here as an argument. -void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const { - const int tmpFileNameBufSize = strlen(fileName) - + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; - // Name of a temporary file used for writing that is a connected string of original name and - // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. - char tmpFileName[tmpFileNameBufSize]; - snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - FILE *const file = fopen(tmpFileName, "wb"); - if (!file) { - AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName); - ASSERT(false); - return; - } - // Write the dictionary header. - if (!writeBufferToFilePointer(file, dictHeader)) { - remove(tmpFileName); - AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); - ASSERT(false); - return; - } - // Write the dictionary body. - if (!writeBufferToFilePointer(file, dictBody)) { - remove(tmpFileName); - AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); - ASSERT(false); - return; - } - fclose(file); - rename(tmpFileName, fileName); -} - -// This closes file pointer when an error is caused and returns whether the writing was succeeded -// or not. -bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const { - const int originalBufSize = buffer->getOriginalBufferSize(); - if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), - originalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); - if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), - additionalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - return true; -} - bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 028fa6075..fe1b2437a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -17,7 +17,6 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H -#include <cstdio> #include <stdint.h> #include "defines.h" @@ -85,7 +84,6 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; @@ -124,13 +122,6 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - void flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const; - - bool writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const; - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite); }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index 5a3983776..30ff10cd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -36,6 +36,16 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1; const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; +/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + BufferWithExtendableBuffer *const buffer, const int rootPos) { + int writingPos = rootPos; + if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) { + return false; + } + return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */, + &writingPos); +} + /* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index a37e9fb3d..af76bc6b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -30,6 +30,8 @@ class DynamicPatriciaTrieWritingUtils { public: static const int NODE_FLAG_FIELD_SIZE; + static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos); + static bool writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 47ace23a1..7bbeacaa0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -22,10 +22,12 @@ namespace latinime { + +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; -const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; +const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; // Used for logging. Question mark is used to indicate that the key is not found. @@ -37,7 +39,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out return; } std::vector<int> keyCodePointVector; - insertCharactersIntoVector(key, &keyCodePointVector); + HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. @@ -53,47 +55,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - int attributeValue = 0; - if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { - if (attributeValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); - } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); - } else { - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector); + const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + if (demotionRate <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } bool HeaderPolicy::readUsesForgettingCurveFlag() const { - int attributeValue = 0; - if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { - return attributeValue != 0; - } else { - return false; - } + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + false /* defaultValue */); } -// Returns S_INT_MIN when the key is not found or the value is invalid. +// Returns current time when the key is not found or the value is invalid. int HeaderPolicy::readLastUpdatedTime() const { - int attributeValue = 0; - if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { - return attributeValue; - } else { - return S_INT_MIN; - } -} - -// Returns whether the key is found or not and stores the found value into outValue. -bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector<int> keyVector; - insertCharactersIntoVector(key, &keyVector); - HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); - if (it == mAttributeMap.end()) { - // The key was not found. - return false; - } - *outValue = parseIntAttributeValue(&(it->second)); - return true; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + time(0) /* defaultValue */); } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -117,13 +101,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT // Set current time as a last updated time. HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); std::vector<int> updatedTimekey; - insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - const time_t currentTime = time(NULL); - std::vector<int> updatedTimeValue; - char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; - snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); - insertCharactersIntoVector(charBuf, &updatedTimeValue); - attributeMapTowrite[updatedTimekey] = updatedTimeValue; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0)); if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; @@ -149,30 +128,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT return attributeMap; } -/* static */ int HeaderPolicy::parseIntAttributeValue( - const std::vector<int> *const attributeValue) { - int value = 0; - bool isNegative = false; - for (size_t i = 0; i < attributeValue->size(); ++i) { - if (i == 0 && attributeValue->at(i) == '-') { - isNegative = true; - } else { - if (!isdigit(attributeValue->at(i))) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - value *= 10; - value += attributeValue->at(i) - '0'; - } - } - return isNegative ? -value : value; -} - -/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, - std::vector<int> *const vector) { - for (int i = 0; characters[i]; ++i) { - vector->push_back(characters[i]); - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 6b396f3f2..e97c08ca4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,6 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <cctype> #include <stdint.h> #include "defines.h" @@ -29,16 +28,26 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictBuf(dictBuf), - mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + // Reads information from existing dictionary buffer. + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), - mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), + mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), mLastUpdatedTime(readLastUpdatedTime()) {} + // Constructs header information using an attribute map. + HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) + : mDictFormatVersion(dictFormatVersion), + mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + attributeMap)), mSize(0), mAttributeMap(*attributeMap), + mMultiWordCostMultiplier(readUsesForgettingCurveFlag()), + mUsesForgettingCurve(readUsesForgettingCurveFlag()), + mLastUpdatedTime(readLastUpdatedTime()) {} + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { @@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const USES_FORGETTING_CURVE_KEY; static const char *const LAST_UPDATED_TIME_KEY; - static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - const uint8_t *const mDictBuf; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; @@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int readLastUpdatedTime() const; - bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); - - static int parseIntAttributeValue(const std::vector<int> *const attributeValue); - - static void insertCharactersIntoVector( - const char *const characters, std::vector<int> *const vector); }; } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 80fe88671..3b1c78085 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include <cctype> +#include <cstdio> #include <vector> #include "defines.h" @@ -43,6 +45,13 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; + /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() @@ -56,6 +65,28 @@ const HeaderReadWriteUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + AttributeMap::key_type key; + insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key); + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; +} + /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); @@ -128,4 +159,57 @@ const HeaderReadWriteUtils::DictionaryFlags return true; } +/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value) { + setIntAttribute(headerAttributes, key, value ? 1 : 0); +} + +/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value) { + AttributeMap::mapped_type valueVector; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value); + insertCharactersIntoVector(charBuf, &valueVector); + (*headerAttributes)[*key] = valueVector; +} + +/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const bool defaultValue) { + const int intDefaultValue = defaultValue ? 1 : 0; + const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); + return intValue != 0; +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const int defaultValue) { + AttributeMap::const_iterator it = headerAttributes->find(*key); + if (it != headerAttributes->end()) { + int value = 0; + bool isNegative = false; + for (size_t i = 0; i < it->second.size(); ++i) { + if (i == 0 && it->second.at(i) == '-') { + isNegative = true; + } else { + if (!isdigit(it->second.at(i))) { + // If not a number. + return defaultValue; + } + value *= 10; + value += it->second.at(i) - '0'; + } + } + return isNegative ? -value : value; + } + return defaultValue; +} + +/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, + std::vector<int> *const vector) { + for (int i = 0; characters[i]; ++i) { + vector->push_back(characters[i]); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 6cce73375..caa5097f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -54,6 +54,9 @@ class HeaderReadWriteUtils { + HEADER_SIZE_FIELD_SIZE; } + static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); @@ -69,6 +72,24 @@ class HeaderReadWriteUtils { static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, int *const writingPos); + /** + * Methods for header attributes. + */ + static void setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value); + + static void setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool defaultValue); + + static int readIntAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); + + static void insertCharactersIntoVector(const char *const characters, + AttributeMap::key_type *const key); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); @@ -87,7 +108,10 @@ class HeaderReadWriteUtils { static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index c594470cc..8a84bd261 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -416,6 +416,11 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod if (PatriciaTrieReadingUtils::hasBigrams(flags)) { getBigramsStructurePolicy()->skipAllBigrams(&pos); } + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return pos; + } childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 1316b425f..7df55815f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -71,8 +71,17 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, pos); } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + const int codePoint = getCodePointAndAdvancePosition(buffer, pos); + if (codePoint == NOT_A_CODE_POINT) { + // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is + // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR + // when the PtNode has a single code point. + length = 0; + AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", + *pos - 1, codePoint, buffer[*pos - 1]); + ASSERT(false); + } else if (maxLength > 0) { + outBuffer[0] = codePoint; length = 1; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp new file mode 100644 index 000000000..2e4ec2e1d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" + +#include <cstdio> +#include <cstring> + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +namespace latinime { + +const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp"; + +/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath, + const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + switch (dictVersion) { + case 3: + return createEmptyV3DictFile(filePath, attributeMap); + default: + // Only version 3 dictionary is supported for now. + return false; + } +} + +/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */); + BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { + return false; + } + return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); +} + +/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { + const int tmpFileNameBufSize = strlen(filePath) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; + // Name of a temporary file used for writing that is a connected string of original name and + // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. + char tmpFileName[tmpFileNameBufSize]; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + ASSERT(false); + return false; + } + // Write the dictionary header. + if (!writeBufferToFile(file, dictHeader)) { + remove(tmpFileName); + AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + ASSERT(false); + return false; + } + // Write the dictionary body. + if (!writeBufferToFile(file, dictBody)) { + remove(tmpFileName); + AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + ASSERT(false); + return false; + } + fclose(file); + rename(tmpFileName, filePath); + return true; +} + +// This closes file pointer when an error is caused and returns whether the writing was succeeded +// or not. +/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer) { + const int originalBufSize = buffer->getOriginalBufferSize(); + if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), + originalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); + if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), + additionalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h new file mode 100644 index 000000000..bd4ac66fd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H +#define LATINIME_DICT_FILE_WRITING_UTILS_H + +#include <cstdio> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class DictFileWritingUtils { + public: + static bool createEmptyDictFile(const char *const filePath, const int dictVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, + BufferWithExtendableBuffer *const dictBody); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); + + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + + static bool createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer); +}; +} // namespace latinime +#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index b6aa85896..9f0a331e3 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -74,7 +74,8 @@ class TypingWeighting : public Weighting { // Note: min() required since length can be MAX_POINT_TO_KEY_LENGTH for characters not on // the keyboard (like accented letters) const float normalizedSquaredLength = traverseSession->getProximityInfoState(0) - ->getPointToKeyLength(pointIndex, dicNode->getNodeCodePoint()); + ->getPointToKeyLength(pointIndex, + CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float normalizedDistance = TouchPositionCorrectionUtils::getSweetSpotFactor( traverseSession->isTouchPositionCorrectionEnabled(), normalizedSquaredLength); const float weightedDistance = ScoringParams::DISTANCE_WEIGHT_LENGTH * normalizedDistance; @@ -113,10 +114,10 @@ class TypingWeighting : public Weighting { const int16_t parentPointIndex = parentDicNode->getInputIndex(0); const int prevCodePoint = parentDicNode->getNodeCodePoint(); const float distance1 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex + 1, prevCodePoint); + parentPointIndex + 1, CharUtils::toBaseLowerCase(prevCodePoint)); const int codePoint = dicNode->getNodeCodePoint(); const float distance2 = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - parentPointIndex, codePoint); + parentPointIndex, CharUtils::toBaseLowerCase(codePoint)); const float distance = distance1 + distance2; const float weightedLengthDistance = distance * ScoringParams::DISTANCE_WEIGHT_LENGTH; @@ -133,7 +134,7 @@ class TypingWeighting : public Weighting { const bool existsAdjacentProximityChars = traverseSession->getProximityInfoState(0) ->existsAdjacentProximityChars(insertedPointIndex); const float dist = traverseSession->getProximityInfoState(0)->getPointToKeyLength( - insertedPointIndex + 1, dicNode->getNodeCodePoint()); + insertedPointIndex + 1, CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint())); const float weightedDistance = dist * ScoringParams::DISTANCE_WEIGHT_LENGTH; const bool singleChar = dicNode->getNodeCodePointCount() == 1; float cost = (singleChar ? ScoringParams::INSERTION_COST_FIRST_CHAR : 0.0f); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 96a2217a3..7ed3ee180 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -21,24 +21,18 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver3DictEncoder; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Locale; +import java.util.Map; import java.util.Random; @LargeTest public class BinaryDictionaryTests extends AndroidTestCase { - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(3 /* version */, true /* supportsDynamicUpdate */); private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_LOCALE = "test"; @@ -52,15 +46,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { super.tearDown(); } - private File createEmptyDictionaryAndGetFile(final String filename) throws IOException, - UnsupportedFormatException { - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); + private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); - final DictEncoder dictEncoder = new Ver3DictEncoder(file); - dictEncoder.writeDictionary(dict, FORMAT_OPTIONS); - return file; + Map<String, String> attributeMap = new HashMap<String, String>(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + 3 /* dictVersion */, attributeMap)) { + return file; + } else { + throw new IOException("Empty dictionary cannot be created."); + } } public void testIsValidDictionary() { @@ -69,8 +66,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -95,8 +90,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -139,8 +132,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -169,8 +160,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -240,8 +229,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -294,8 +281,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -342,8 +327,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -392,8 +375,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, @@ -445,8 +426,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), @@ -516,8 +495,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), @@ -617,8 +594,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); - } catch (UnsupportedFormatException e) { - fail("UnsupportedFormatException while writing an initial dictionary : " + e); } final ArrayList<String> words = new ArrayList<String>(); diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java new file mode 100644 index 000000000..132483d5e --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/SparseTableTests.java @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Random; + +/** + * Unit tests for SparseTable. + */ +@LargeTest +public class SparseTableTests extends AndroidTestCase { + private static final String TAG = SparseTableTests.class.getSimpleName(); + + private static final int[] SMALL_INDEX = { SparseTable.NOT_EXIST, 0 }; + private static final int[] BIG_INDEX = { SparseTable.NOT_EXIST, 1, 2, 3, 4, 5, 6, 7}; + + private final Random mRandom; + private final ArrayList<Integer> mRandomIndex; + + private static final int DEFAULT_SIZE = 10000; + private static final int BLOCK_SIZE = 8; + + public SparseTableTests() { + this(System.currentTimeMillis(), DEFAULT_SIZE); + } + + public SparseTableTests(final long seed, final int tableSize) { + super(); + Log.d(TAG, "Seed for test is " + seed + ", size is " + tableSize); + mRandom = new Random(seed); + mRandomIndex = new ArrayList<Integer>(tableSize); + for (int i = 0; i < tableSize; ++i) { + mRandomIndex.add(SparseTable.NOT_EXIST); + } + } + + public void testInitializeWithArray() { + final SparseTable table = new SparseTable(SMALL_INDEX, BIG_INDEX, BLOCK_SIZE); + for (int i = 0; i < 8; ++i) { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + assertEquals(SparseTable.NOT_EXIST, table.get(8)); + for (int i = 9; i < 16; ++i) { + assertEquals(i - 8, table.get(i)); + } + } + + public void testSet() { + final SparseTable table = new SparseTable(16, BLOCK_SIZE); + table.set(3, 6); + table.set(8, 16); + for (int i = 0; i < 16; ++i) { + if (i == 3 || i == 8) { + assertEquals(i * 2, table.get(i)); + } else { + assertEquals(SparseTable.NOT_EXIST, table.get(i)); + } + } + } + + private void generateRandomIndex(final int size, final int prop) { + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandom.nextInt(100) < prop) { + mRandomIndex.set(i, mRandom.nextInt()); + } else { + mRandomIndex.set(i, SparseTable.NOT_EXIST); + } + } + } + + private void runTestRandomSet() { + final SparseTable table = new SparseTable(DEFAULT_SIZE, BLOCK_SIZE); + int elementCount = 0; + for (int i = 0; i < DEFAULT_SIZE; ++i) { + if (mRandomIndex.get(i) != SparseTable.NOT_EXIST) { + table.set(i, mRandomIndex.get(i)); + elementCount++; + } + } + + Log.d(TAG, "table size = " + table.getLookupTableSize() + " + " + + table.getContentTableSize()); + Log.d(TAG, "the table has " + elementCount + " elements"); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), (int)mRandomIndex.get(i)); + } + + // flush and reload + OutputStream lookupOutStream = null; + OutputStream contentOutStream = null; + InputStream lookupInStream = null; + InputStream contentInStream = null; + try { + final File lookupIndexFile = File.createTempFile("testRandomSet", ".small"); + final File contentFile = File.createTempFile("testRandomSet", ".big"); + lookupOutStream = new FileOutputStream(lookupIndexFile); + contentOutStream = new FileOutputStream(contentFile); + table.write(lookupOutStream, contentOutStream); + lookupInStream = new FileInputStream(lookupIndexFile); + contentInStream = new FileInputStream(contentFile); + final byte[] lookupArray = new byte[(int) lookupIndexFile.length()]; + final byte[] contentArray = new byte[(int) contentFile.length()]; + lookupInStream.read(lookupArray); + contentInStream.read(contentArray); + final SparseTable newTable = new SparseTable(lookupArray, contentArray, BLOCK_SIZE); + for (int i = 0; i < DEFAULT_SIZE; ++i) { + assertEquals(table.get(i), newTable.get(i)); + } + } catch (IOException e) { + Log.d(TAG, "IOException while flushing and realoding", e); + } finally { + if (lookupOutStream != null) { + try { + lookupOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing the stream", e); + } + } + if (contentOutStream != null) { + try { + contentOutStream.close(); + } catch (IOException e) { + Log.d(TAG, "IOException while closing contentStream.", e); + } + } + } + } + + public void testRandomSet() { + for (int i = 0; i <= 100; i += 10) { + generateRandomIndex(DEFAULT_SIZE, i); + runTestRandomSet(); + } + } +} diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index fa80385fc..6c4cbcf9d 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -185,7 +185,7 @@ public final class BinaryDictOffdeviceUtils { crash(filename, new RuntimeException( filename + " does not seem to be a dictionary file")); } else { - final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, + final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile, DictDecoder.USE_BYTEARRAY); if (report) { System.out.println("Format : Binary dictionary format"); |