diff options
28 files changed, 298 insertions, 161 deletions
diff --git a/java/src/com/android/inputmethod/dictionarypack/ActionBatch.java b/java/src/com/android/inputmethod/dictionarypack/ActionBatch.java index 706bdea8e..3f69cedee 100644 --- a/java/src/com/android/inputmethod/dictionarypack/ActionBatch.java +++ b/java/src/com/android/inputmethod/dictionarypack/ActionBatch.java @@ -325,8 +325,9 @@ public final class ActionBatch { MetadataDbHelper.TYPE_BULK, MetadataDbHelper.STATUS_AVAILABLE, mWordList.mId, mWordList.mLocale, mWordList.mDescription, null == mWordList.mLocalFilename ? "" : mWordList.mLocalFilename, - mWordList.mRemoteFilename, mWordList.mLastUpdate, mWordList.mChecksum, - mWordList.mFileSize, mWordList.mVersion, mWordList.mFormatVersion); + mWordList.mRemoteFilename, mWordList.mLastUpdate, mWordList.mRawChecksum, + mWordList.mChecksum, mWordList.mFileSize, mWordList.mVersion, + mWordList.mFormatVersion); PrivateLog.log("Insert 'available' record for " + mWordList.mDescription + " and locale " + mWordList.mLocale); db.insert(MetadataDbHelper.METADATA_TABLE_NAME, null, values); @@ -374,7 +375,7 @@ public final class ActionBatch { final ContentValues values = MetadataDbHelper.makeContentValues(0, MetadataDbHelper.TYPE_BULK, MetadataDbHelper.STATUS_INSTALLED, mWordList.mId, mWordList.mLocale, mWordList.mDescription, - "", mWordList.mRemoteFilename, mWordList.mLastUpdate, + "", mWordList.mRemoteFilename, mWordList.mLastUpdate, mWordList.mRawChecksum, mWordList.mChecksum, mWordList.mFileSize, mWordList.mVersion, mWordList.mFormatVersion); PrivateLog.log("Insert 'preinstalled' record for " + mWordList.mDescription @@ -416,8 +417,9 @@ public final class ActionBatch { oldValues.getAsInteger(MetadataDbHelper.STATUS_COLUMN), mWordList.mId, mWordList.mLocale, mWordList.mDescription, oldValues.getAsString(MetadataDbHelper.LOCAL_FILENAME_COLUMN), - mWordList.mRemoteFilename, mWordList.mLastUpdate, mWordList.mChecksum, - mWordList.mFileSize, mWordList.mVersion, mWordList.mFormatVersion); + mWordList.mRemoteFilename, mWordList.mLastUpdate, mWordList.mRawChecksum, + mWordList.mChecksum, mWordList.mFileSize, mWordList.mVersion, + mWordList.mFormatVersion); PrivateLog.log("Updating record for " + mWordList.mDescription + " and locale " + mWordList.mLocale); db.update(MetadataDbHelper.METADATA_TABLE_NAME, values, diff --git a/java/src/com/android/inputmethod/dictionarypack/DictionaryProvider.java b/java/src/com/android/inputmethod/dictionarypack/DictionaryProvider.java index 80def701d..c35995b24 100644 --- a/java/src/com/android/inputmethod/dictionarypack/DictionaryProvider.java +++ b/java/src/com/android/inputmethod/dictionarypack/DictionaryProvider.java @@ -89,10 +89,13 @@ public final class DictionaryProvider extends ContentProvider { private static final class WordListInfo { public final String mId; public final String mLocale; + public final String mRawChecksum; public final int mMatchLevel; - public WordListInfo(final String id, final String locale, final int matchLevel) { + public WordListInfo(final String id, final String locale, final String rawChecksum, + final int matchLevel) { mId = id; mLocale = locale; + mRawChecksum = rawChecksum; mMatchLevel = matchLevel; } } @@ -106,7 +109,8 @@ public final class DictionaryProvider extends ContentProvider { private static final class ResourcePathCursor extends AbstractCursor { // Column names for the cursor returned by this content provider. - static private final String[] columnNames = { "id", "locale" }; + static private final String[] columnNames = { MetadataDbHelper.WORDLISTID_COLUMN, + MetadataDbHelper.LOCALE_COLUMN, MetadataDbHelper.RAW_CHECKSUM_COLUMN }; // The list of word lists served by this provider that match the client request. final WordListInfo[] mWordLists; @@ -141,6 +145,7 @@ public final class DictionaryProvider extends ContentProvider { switch (column) { case 0: return mWordLists[mPos].mId; case 1: return mWordLists[mPos].mLocale; + case 2: return mWordLists[mPos].mRawChecksum; default : return null; } } @@ -357,6 +362,8 @@ public final class DictionaryProvider extends ContentProvider { final int localeIndex = results.getColumnIndex(MetadataDbHelper.LOCALE_COLUMN); final int localFileNameIndex = results.getColumnIndex(MetadataDbHelper.LOCAL_FILENAME_COLUMN); + final int rawChecksumIndex = + results.getColumnIndex(MetadataDbHelper.RAW_CHECKSUM_COLUMN); final int statusIndex = results.getColumnIndex(MetadataDbHelper.STATUS_COLUMN); if (results.moveToFirst()) { do { @@ -379,6 +386,7 @@ public final class DictionaryProvider extends ContentProvider { } final String wordListLocale = results.getString(localeIndex); final String wordListLocalFilename = results.getString(localFileNameIndex); + final String wordListRawChecksum = results.getString(rawChecksumIndex); final int wordListStatus = results.getInt(statusIndex); // Test the requested locale against this wordlist locale. The requested locale // has to either match exactly or be more specific than the dictionary - a @@ -412,8 +420,8 @@ public final class DictionaryProvider extends ContentProvider { final WordListInfo currentBestMatch = dicts.get(wordListCategory); if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) { - dicts.put(wordListCategory, - new WordListInfo(wordListId, wordListLocale, matchLevel)); + dicts.put(wordListCategory, new WordListInfo(wordListId, wordListLocale, + wordListRawChecksum, matchLevel)); } } while (results.moveToNext()); } diff --git a/java/src/com/android/inputmethod/dictionarypack/MD5Calculator.java b/java/src/com/android/inputmethod/dictionarypack/MD5Calculator.java index e47e86e4b..ccd054c84 100644 --- a/java/src/com/android/inputmethod/dictionarypack/MD5Calculator.java +++ b/java/src/com/android/inputmethod/dictionarypack/MD5Calculator.java @@ -20,7 +20,7 @@ import java.io.InputStream; import java.io.IOException; import java.security.MessageDigest; -final class MD5Calculator { +public final class MD5Calculator { private MD5Calculator() {} // This helper class is not instantiable public static String checksum(final InputStream in) throws IOException { diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java index 4a8fa51ee..668eb925b 100644 --- a/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java +++ b/java/src/com/android/inputmethod/dictionarypack/MetadataDbHelper.java @@ -20,6 +20,7 @@ import android.content.ContentValues; import android.content.Context; import android.database.Cursor; import android.database.sqlite.SQLiteDatabase; +import android.database.sqlite.SQLiteException; import android.database.sqlite.SQLiteOpenHelper; import android.text.TextUtils; import android.util.Log; @@ -46,7 +47,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper { // used to identify the versions for upgrades. This should never change going forward. private static final int METADATA_DATABASE_VERSION_WITH_CLIENTID = 6; // The current database version. - private static final int CURRENT_METADATA_DATABASE_VERSION = 7; + private static final int CURRENT_METADATA_DATABASE_VERSION = 8; private final static long NOT_A_DOWNLOAD_ID = -1; @@ -66,7 +67,8 @@ public class MetadataDbHelper extends SQLiteOpenHelper { public static final String VERSION_COLUMN = "version"; public static final String FORMATVERSION_COLUMN = "formatversion"; public static final String FLAGS_COLUMN = "flags"; - public static final int COLUMN_COUNT = 13; + public static final String RAW_CHECKSUM_COLUMN = "rawChecksum"; + public static final int COLUMN_COUNT = 14; private static final String CLIENT_CLIENT_ID_COLUMN = "clientid"; private static final String CLIENT_METADATA_URI_COLUMN = "uri"; @@ -119,8 +121,9 @@ public class MetadataDbHelper extends SQLiteOpenHelper { + CHECKSUM_COLUMN + " TEXT, " + FILESIZE_COLUMN + " INTEGER, " + VERSION_COLUMN + " INTEGER," - + FORMATVERSION_COLUMN + " INTEGER," - + FLAGS_COLUMN + " INTEGER," + + FORMATVERSION_COLUMN + " INTEGER, " + + FLAGS_COLUMN + " INTEGER, " + + RAW_CHECKSUM_COLUMN + " TEXT," + "PRIMARY KEY (" + WORDLISTID_COLUMN + "," + VERSION_COLUMN + "));"; private static final String METADATA_CREATE_CLIENT_TABLE = "CREATE TABLE IF NOT EXISTS " + CLIENT_TABLE_NAME + " (" @@ -136,7 +139,8 @@ public class MetadataDbHelper extends SQLiteOpenHelper { static final String[] METADATA_TABLE_COLUMNS = { PENDINGID_COLUMN, TYPE_COLUMN, STATUS_COLUMN, WORDLISTID_COLUMN, LOCALE_COLUMN, DESCRIPTION_COLUMN, LOCAL_FILENAME_COLUMN, REMOTE_FILENAME_COLUMN, DATE_COLUMN, CHECKSUM_COLUMN, - FILESIZE_COLUMN, VERSION_COLUMN, FORMATVERSION_COLUMN, FLAGS_COLUMN }; + FILESIZE_COLUMN, VERSION_COLUMN, FORMATVERSION_COLUMN, FLAGS_COLUMN, + RAW_CHECKSUM_COLUMN }; // List of all client table columns. static final String[] CLIENT_TABLE_COLUMNS = { CLIENT_CLIENT_ID_COLUMN, CLIENT_METADATA_URI_COLUMN, CLIENT_PENDINGID_COLUMN, FLAGS_COLUMN }; @@ -215,6 +219,17 @@ public class MetadataDbHelper extends SQLiteOpenHelper { createClientTable(db); } + private void addRawChecksumColumnUnlessPresent(final SQLiteDatabase db, final String clientId) { + try { + db.execSQL("SELECT " + RAW_CHECKSUM_COLUMN + " FROM " + + METADATA_TABLE_NAME + " LIMIT 0;"); + } catch (SQLiteException e) { + Log.i(TAG, "No " + RAW_CHECKSUM_COLUMN + " column : creating it"); + db.execSQL("ALTER TABLE " + METADATA_TABLE_NAME + " ADD COLUMN " + + RAW_CHECKSUM_COLUMN + " TEXT;"); + } + } + /** * Upgrade the database. Upgrade from version 3 is supported. * Version 3 has a DB named METADATA_DATABASE_NAME_STEM containing a table METADATA_TABLE_NAME. @@ -260,6 +275,12 @@ public class MetadataDbHelper extends SQLiteOpenHelper { db.execSQL("DROP TABLE IF EXISTS " + CLIENT_TABLE_NAME); onCreate(db); } + // A rawChecksum column that did not exist in the previous versions was added that + // corresponds to the md5 checksum of the file after decompression/decryption. This is to + // strengthen the system against corrupted dictionary files. + // The most secure way to upgrade a database is to just test for the column presence, and + // add it if it's not there. + addRawChecksumColumnUnlessPresent(db, mClientId); } /** @@ -431,7 +452,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper { public static ContentValues makeContentValues(final int pendingId, final int type, final int status, final String wordlistId, final String locale, final String description, final String filename, final String url, final long date, - final String checksum, final long filesize, final int version, + final String rawChecksum, final String checksum, final long filesize, final int version, final int formatVersion) { final ContentValues result = new ContentValues(COLUMN_COUNT); result.put(PENDINGID_COLUMN, pendingId); @@ -443,6 +464,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper { result.put(LOCAL_FILENAME_COLUMN, filename); result.put(REMOTE_FILENAME_COLUMN, url); result.put(DATE_COLUMN, date); + result.put(RAW_CHECKSUM_COLUMN, rawChecksum); result.put(CHECKSUM_COLUMN, checksum); result.put(FILESIZE_COLUMN, filesize); result.put(VERSION_COLUMN, version); @@ -478,6 +500,8 @@ public class MetadataDbHelper extends SQLiteOpenHelper { if (null == result.get(REMOTE_FILENAME_COLUMN)) result.put(REMOTE_FILENAME_COLUMN, ""); // 0 for the update date : 1970/1/1. Unless specified. if (null == result.get(DATE_COLUMN)) result.put(DATE_COLUMN, 0); + // Raw checksum unknown unless specified + if (null == result.get(RAW_CHECKSUM_COLUMN)) result.put(RAW_CHECKSUM_COLUMN, ""); // Checksum unknown unless specified if (null == result.get(CHECKSUM_COLUMN)) result.put(CHECKSUM_COLUMN, ""); // No filesize unless specified @@ -525,6 +549,7 @@ public class MetadataDbHelper extends SQLiteOpenHelper { putStringResult(result, cursor, LOCAL_FILENAME_COLUMN); putStringResult(result, cursor, REMOTE_FILENAME_COLUMN); putIntResult(result, cursor, DATE_COLUMN); + putStringResult(result, cursor, RAW_CHECKSUM_COLUMN); putStringResult(result, cursor, CHECKSUM_COLUMN); putIntResult(result, cursor, FILESIZE_COLUMN); putIntResult(result, cursor, VERSION_COLUMN); diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataHandler.java b/java/src/com/android/inputmethod/dictionarypack/MetadataHandler.java index 5c2289911..63e419871 100644 --- a/java/src/com/android/inputmethod/dictionarypack/MetadataHandler.java +++ b/java/src/com/android/inputmethod/dictionarypack/MetadataHandler.java @@ -52,6 +52,8 @@ public class MetadataHandler { final int idIndex = results.getColumnIndex(MetadataDbHelper.WORDLISTID_COLUMN); final int updateIndex = results.getColumnIndex(MetadataDbHelper.DATE_COLUMN); final int fileSizeIndex = results.getColumnIndex(MetadataDbHelper.FILESIZE_COLUMN); + final int rawChecksumIndex = + results.getColumnIndex(MetadataDbHelper.RAW_CHECKSUM_COLUMN); final int checksumIndex = results.getColumnIndex(MetadataDbHelper.CHECKSUM_COLUMN); final int localFilenameIndex = results.getColumnIndex(MetadataDbHelper.LOCAL_FILENAME_COLUMN); @@ -66,6 +68,7 @@ public class MetadataHandler { results.getString(descriptionColumn), results.getLong(updateIndex), results.getLong(fileSizeIndex), + results.getString(rawChecksumIndex), results.getString(checksumIndex), results.getString(localFilenameIndex), results.getString(remoteFilenameIndex), diff --git a/java/src/com/android/inputmethod/dictionarypack/MetadataParser.java b/java/src/com/android/inputmethod/dictionarypack/MetadataParser.java index 27670fddf..a88173e8e 100644 --- a/java/src/com/android/inputmethod/dictionarypack/MetadataParser.java +++ b/java/src/com/android/inputmethod/dictionarypack/MetadataParser.java @@ -37,6 +37,7 @@ public class MetadataParser { private static final String DESCRIPTION_FIELD_NAME = MetadataDbHelper.DESCRIPTION_COLUMN; private static final String UPDATE_FIELD_NAME = "update"; private static final String FILESIZE_FIELD_NAME = MetadataDbHelper.FILESIZE_COLUMN; + private static final String RAW_CHECKSUM_FIELD_NAME = MetadataDbHelper.RAW_CHECKSUM_COLUMN; private static final String CHECKSUM_FIELD_NAME = MetadataDbHelper.CHECKSUM_COLUMN; private static final String REMOTE_FILENAME_FIELD_NAME = MetadataDbHelper.REMOTE_FILENAME_COLUMN; @@ -80,6 +81,7 @@ public class MetadataParser { arguments.get(DESCRIPTION_FIELD_NAME), Long.parseLong(arguments.get(UPDATE_FIELD_NAME)), Long.parseLong(arguments.get(FILESIZE_FIELD_NAME)), + arguments.get(RAW_CHECKSUM_FIELD_NAME), arguments.get(CHECKSUM_FIELD_NAME), null, arguments.get(REMOTE_FILENAME_FIELD_NAME), diff --git a/java/src/com/android/inputmethod/dictionarypack/WordListMetadata.java b/java/src/com/android/inputmethod/dictionarypack/WordListMetadata.java index 69bff9597..9e510a68b 100644 --- a/java/src/com/android/inputmethod/dictionarypack/WordListMetadata.java +++ b/java/src/com/android/inputmethod/dictionarypack/WordListMetadata.java @@ -30,6 +30,7 @@ public class WordListMetadata { public final String mDescription; public final long mLastUpdate; public final long mFileSize; + public final String mRawChecksum; public final String mChecksum; public final String mLocalFilename; public final String mRemoteFilename; @@ -50,13 +51,15 @@ public class WordListMetadata { public WordListMetadata(final String id, final int type, final String description, final long lastUpdate, final long fileSize, - final String checksum, final String localFilename, final String remoteFilename, - final int version, final int formatVersion, final int flags, final String locale) { + final String rawChecksum, final String checksum, final String localFilename, + final String remoteFilename, final int version, final int formatVersion, + final int flags, final String locale) { mId = id; mType = type; mDescription = description; mLastUpdate = lastUpdate; // In milliseconds mFileSize = fileSize; + mRawChecksum = rawChecksum; mChecksum = checksum; mLocalFilename = localFilename; mRemoteFilename = remoteFilename; @@ -77,6 +80,7 @@ public class WordListMetadata { final String description = values.getAsString(MetadataDbHelper.DESCRIPTION_COLUMN); final Long lastUpdate = values.getAsLong(MetadataDbHelper.DATE_COLUMN); final Long fileSize = values.getAsLong(MetadataDbHelper.FILESIZE_COLUMN); + final String rawChecksum = values.getAsString(MetadataDbHelper.RAW_CHECKSUM_COLUMN); final String checksum = values.getAsString(MetadataDbHelper.CHECKSUM_COLUMN); final String localFilename = values.getAsString(MetadataDbHelper.LOCAL_FILENAME_COLUMN); final String remoteFilename = values.getAsString(MetadataDbHelper.REMOTE_FILENAME_COLUMN); @@ -98,8 +102,8 @@ public class WordListMetadata { || null == locale) { throw new IllegalArgumentException(); } - return new WordListMetadata(id, type, description, lastUpdate, fileSize, checksum, - localFilename, remoteFilename, version, formatVersion, flags, locale); + return new WordListMetadata(id, type, description, lastUpdate, fileSize, rawChecksum, + checksum, localFilename, remoteFilename, version, formatVersion, flags, locale); } @Override @@ -110,6 +114,7 @@ public class WordListMetadata { sb.append("\nDescription : ").append(mDescription); sb.append("\nLastUpdate : ").append(mLastUpdate); sb.append("\nFileSize : ").append(mFileSize); + sb.append("\nRawChecksum : ").append(mRawChecksum); sb.append("\nChecksum : ").append(mChecksum); sb.append("\nLocalFilename : ").append(mLocalFilename); sb.append("\nRemoteFilename : ").append(mRemoteFilename); diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java index e428b1d54..72757e086 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java @@ -28,6 +28,7 @@ import android.text.TextUtils; import android.util.Log; import com.android.inputmethod.dictionarypack.DictionaryPackConstants; +import com.android.inputmethod.dictionarypack.MD5Calculator; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils.DictionaryInfo; @@ -38,6 +39,7 @@ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -167,8 +169,9 @@ public final class BinaryDictionaryFileDumper { do { final String wordListId = cursor.getString(0); final String wordListLocale = cursor.getString(1); + final String wordListRawChecksum = cursor.getString(2); if (TextUtils.isEmpty(wordListId)) continue; - list.add(new WordListInfo(wordListId, wordListLocale)); + list.add(new WordListInfo(wordListId, wordListLocale, wordListRawChecksum)); } while (cursor.moveToNext()); return list; } catch (RemoteException e) { @@ -217,7 +220,8 @@ public final class BinaryDictionaryFileDumper { * and creating it (and its containing directory) if necessary. */ private static void cacheWordList(final String wordlistId, final String locale, - final ContentProviderClient providerClient, final Context context) { + final String rawChecksum, final ContentProviderClient providerClient, + final Context context) { final int COMPRESSED_CRYPTED_COMPRESSED = 0; final int CRYPTED_COMPRESSED = 1; final int COMPRESSED_CRYPTED = 2; @@ -299,6 +303,13 @@ public final class BinaryDictionaryFileDumper { checkMagicAndCopyFileTo(bufferedInputStream, bufferedOutputStream); bufferedOutputStream.flush(); bufferedOutputStream.close(); + final String actualRawChecksum = MD5Calculator.checksum( + new BufferedInputStream(new FileInputStream(outputFile))); + Log.i(TAG, "Computed checksum for downloaded dictionary. Expected = " + rawChecksum + + " ; actual = " + actualRawChecksum); + if (!TextUtils.isEmpty(rawChecksum) && !rawChecksum.equals(actualRawChecksum)) { + throw new IOException("Could not decode the file correctly : checksum differs"); + } final File finalFile = new File(finalFileName); finalFile.delete(); if (!outputFile.renameTo(finalFile)) { @@ -408,7 +419,7 @@ public final class BinaryDictionaryFileDumper { final List<WordListInfo> idList = getWordListWordListInfos(locale, context, hasDefaultWordList); for (WordListInfo id : idList) { - cacheWordList(id.mId, id.mLocale, providerClient, context); + cacheWordList(id.mId, id.mLocale, id.mRawChecksum, providerClient, context); } } finally { providerClient.release(); diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 19c777a3e..ab7e66a09 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -84,7 +84,6 @@ import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.CoordinateUtils; import com.android.inputmethod.latin.utils.DialogUtils; import com.android.inputmethod.latin.utils.DistracterFilter; -import com.android.inputmethod.latin.utils.DistracterFilterUtils; import com.android.inputmethod.latin.utils.ImportantNoticeUtils; import com.android.inputmethod.latin.utils.IntentUtils; import com.android.inputmethod.latin.utils.JniUtils; @@ -1748,7 +1747,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen @UsedForTesting /* package for test */ DistracterFilter createDistracterFilter() { - return DistracterFilterUtils.createDistracterFilter(this /* Context */, mKeyboardSwitcher); + return new DistracterFilter(this /* Context */, + mRichImm.getMyEnabledInputMethodSubtypeList( + true /* allowsImplicitlySelectedSubtypes */)); } public void dumpDictionaryForDebug(final String dictName) { diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index e3759a586..43daee4d2 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -18,7 +18,6 @@ package com.android.inputmethod.latin; import android.text.TextUtils; -import com.android.inputmethod.event.Event; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; @@ -112,7 +111,10 @@ public final class Suggest { additionalFeaturesOptions, SESSION_TYPING, rawSuggestions); final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); - final boolean isAllUpperCase = wordComposer.isAllUpperCase(); + // If resumed, then we don't want to upcase everything: resuming on a fully-capitalized + // words is rarely done to switch to another fully-capitalized word, but usually to a + // normal, non-capitalized suggestion. + final boolean isAllUpperCase = wordComposer.isAllUpperCase() && !wordComposer.isResumed(); final String firstSuggestion; final String whitelistedWord; if (suggestionResults.isEmpty()) { diff --git a/java/src/com/android/inputmethod/latin/WordListInfo.java b/java/src/com/android/inputmethod/latin/WordListInfo.java index 5ac806a0c..268fe9818 100644 --- a/java/src/com/android/inputmethod/latin/WordListInfo.java +++ b/java/src/com/android/inputmethod/latin/WordListInfo.java @@ -22,8 +22,10 @@ package com.android.inputmethod.latin; public final class WordListInfo { public final String mId; public final String mLocale; - public WordListInfo(final String id, final String locale) { + public final String mRawChecksum; + public WordListInfo(final String id, final String locale, final String rawChecksum) { mId = id; mLocale = locale; + mRawChecksum = rawChecksum; } } diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java index faab76944..7536ff94c 100644 --- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java +++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java @@ -805,10 +805,11 @@ public final class InputLogic { final int codePoint = inputTransaction.mEvent.mCodePoint; final SettingsValues settingsValues = inputTransaction.mSettingsValues; boolean didAutoCorrect = false; + final boolean wasComposingWord = mWordComposer.isComposingWord(); // We avoid sending spaces in languages without spaces if we were composing. final boolean shouldAvoidSendingCode = Constants.CODE_SPACE == codePoint && !settingsValues.mSpacingAndPunctuations.mCurrentLanguageHasSpaces - && mWordComposer.isComposingWord(); + && wasComposingWord; if (mWordComposer.isCursorFrontOrMiddleOfComposingWord()) { // If we are in the middle of a recorrection, we need to commit the recorrection // first so that we can insert the separator at the current cursor position. @@ -852,7 +853,7 @@ public final class InputLogic { promotePhantomSpace(settingsValues); } if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { - ResearchLogger.latinIME_handleSeparator(codePoint, mWordComposer.isComposingWord()); + ResearchLogger.latinIME_handleSeparator(codePoint, wasComposingWord); } if (!shouldAvoidSendingCode) { @@ -868,7 +869,9 @@ public final class InputLogic { } startDoubleSpacePeriodCountdown(inputTransaction); - inputTransaction.setRequiresUpdateSuggestions(); + if (wasComposingWord) { + inputTransaction.setRequiresUpdateSuggestions(); + } } else { if (swapWeakSpace) { swapSwapperAndSpace(inputTransaction); diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 9ea7e217e..f1057da0b 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -16,13 +16,23 @@ package com.android.inputmethod.latin.utils; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.concurrent.TimeUnit; import android.content.Context; +import android.content.res.Resources; +import android.text.InputType; import android.util.Log; +import android.view.inputmethod.EditorInfo; +import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardId; +import com.android.inputmethod.keyboard.KeyboardLayoutSet; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.Suggest; @@ -41,8 +51,10 @@ public class DistracterFilter { private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120; private final Context mContext; + private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap; + private final Map<Locale, Keyboard> mLocaleToKeyboardMap; private final Suggest mSuggest; - private final Keyboard mKeyboard; + private Keyboard mKeyboard; // If the score of the top suggestion exceeds this value, the tested word (e.g., // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to @@ -51,17 +63,34 @@ public class DistracterFilter { // the dictionary. private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; + // Create empty distracter filter. + public DistracterFilter() { + this(null, new ArrayList<InputMethodSubtype>()); + } + /** * Create a DistracterFilter instance. * * @param context the context. - * @param keyboard the keyboard that is currently being used. This information is needed - * when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions. + * @param enabledSubtypes the enabled subtypes. */ - public DistracterFilter(final Context context, final Keyboard keyboard) { + public DistracterFilter(final Context context, final List<InputMethodSubtype> enabledSubtypes) { mContext = context; + mLocaleToSubtypeMap = new HashMap<>(); + if (enabledSubtypes != null) { + for (final InputMethodSubtype subtype : enabledSubtypes) { + final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); + if (mLocaleToSubtypeMap.containsKey(locale)) { + // Multiple subtypes are enabled for one locale. + // TODO: Investigate what we should do for this case. + continue; + } + mLocaleToSubtypeMap.put(locale, subtype); + } + } + mLocaleToKeyboardMap = new HashMap<>(); mSuggest = new Suggest(); - mKeyboard = keyboard; + mKeyboard = null; } private static boolean suggestionExceedsDistracterThreshold( @@ -78,6 +107,30 @@ public class DistracterFilter { return false; } + private void loadKeyboardForLocale(final Locale newLocale) { + final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale); + if (cachedKeyboard != null) { + mKeyboard = cachedKeyboard; + return; + } + final InputMethodSubtype subtype = mLocaleToSubtypeMap.get(newLocale); + if (subtype == null) { + return; + } + final EditorInfo editorInfo = new EditorInfo(); + editorInfo.inputType = InputType.TYPE_CLASS_TEXT; + final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( + mContext, editorInfo); + final Resources res = mContext.getResources(); + final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); + final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); + builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); + builder.setSubtype(subtype); + builder.setIsSpellChecker(false /* isSpellChecker */); + final KeyboardLayoutSet layoutSet = builder.build(); + mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); + } + private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { mSuggest.mDictionaryFacilitator.resetDictionaries(mContext, newlocale, false /* useContactsDict */, false /* usePersonalizedDicts */, @@ -92,15 +145,21 @@ public class DistracterFilter { * @param prevWordsInfo the information of previous words. * @param testedWord the word that will be tested to see whether it is a distracter to words * in dictionaries. - * @param locale the locale of words. + * @param locale the locale of word. * @return true if testedWord is a distracter, otherwise false. */ public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, final String testedWord, final Locale locale) { - if (mKeyboard == null || locale == null) { + if (locale == null) { return false; } if (!locale.equals(mSuggest.mDictionaryFacilitator.getLocale())) { + if (!mLocaleToSubtypeMap.containsKey(locale)) { + Log.e(TAG, "Locale " + locale + " is not enabled."); + // TODO: Investigate what we should do for disabled locales. + return false; + } + loadKeyboardForLocale(locale); // Reset dictionaries for the locale. try { loadDictionariesForLocale(locale); @@ -109,11 +168,12 @@ public class DistracterFilter { return false; } } - + if (mKeyboard == null) { + return false; + } final WordComposer composer = new WordComposer(); final int[] codePoints = StringUtils.toCodePointArray(testedWord); - final int[] coordinates; - coordinates = mKeyboard.getCoordinates(codePoints); + final int[] coordinates = mKeyboard.getCoordinates(codePoints); composer.setComposingWord(codePoints, coordinates, prevWordsInfo); final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterUtils.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterUtils.java deleted file mode 100644 index 8a711a24e..000000000 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterUtils.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.utils; - -import android.content.Context; - -import com.android.inputmethod.keyboard.Keyboard; -import com.android.inputmethod.keyboard.KeyboardSwitcher; -import com.android.inputmethod.keyboard.MainKeyboardView; - -public class DistracterFilterUtils { - private DistracterFilterUtils() { - // This utility class is not publicly instantiable. - } - - public static final DistracterFilter createDistracterFilter(final Context context, - final KeyboardSwitcher keyboardSwitcher) { - final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView(); - // TODO: Create Keyboard when mainKeyboardView is null. - // TODO: Figure out the most reasonable keyboard for the filter. Refer to the - // spellchecker's logic. - final Keyboard keyboard = (mainKeyboardView != null) ? - mainKeyboardView.getKeyboard() : null; - final DistracterFilter distracterFilter = new DistracterFilter(context, keyboard); - return distracterFilter; - } -} diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 28aaf2d1a..6223f86f4 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -343,7 +343,7 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); - dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty); + dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -363,7 +363,9 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, timestamp, 0 /* level */, 1 /* count */); - dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -378,8 +380,9 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz jsize word1Length = env->GetArrayLength(word1); int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length); } // Returns how many language model params are processed. @@ -449,7 +452,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); - dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty); + dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty); if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); const std::vector<int> bigramTargetCodePoints( @@ -457,7 +460,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability, timestamp, 0 /* level */, 1 /* count */); - dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { return i + 1; @@ -541,7 +546,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength, + if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength, wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; @@ -561,8 +566,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } + const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength, + false /* isStartOfSentence */); for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { - if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength, + if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, &bigramProperty)) { LogUtils::logToJava(env, "Cannot add bigram to the new dict."); return false; diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index e69d2c46b..ef03d2b6d 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -203,12 +203,12 @@ class DicNode { return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1; } - // Used to get n-gram probability in DicNodeUtils + // Used to get n-gram probability in DicNodeUtils. int getPtNodePos() const { return mDicNodeProperties.getPtNodePos(); } - // Used to get n-gram probability in DicNodeUtils + // Used to get n-gram probability in DicNodeUtils. n is 1-indexed. int getNthPrevWordTerminalPtNodePos(const int n) const { if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index c860d82af..fe3167a61 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -74,28 +74,28 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } -int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, - int length1) const { +int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, + int length) const { TimeKeeper::setCurrentTime(); - return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1); + return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length); } -void Dictionary::addUnigramWord(const int *const word, const int length, +void Dictionary::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty); + mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); } -void Dictionary::addBigramWords(const int *const word0, const int length0, +void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty); + mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); } -void Dictionary::removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) { +void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); + mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length); } void Dictionary::flush(const char *const filePath) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index b63c61fbb..817d9f7fc 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -73,16 +73,16 @@ class Dictionary { int getProbability(const int *word, int length) const; int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const; + const int *word, int length) const; - void addUnigramWord(const int *const codePoints, const int codePointCount, + void addUnigramEntry(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); - void addBigramWords(const int *const word0, const int length0, + void addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - void removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1); + void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length); void flush(const char *const filePath); diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/bigram_property.h index 8d3429b5b..343af143c 100644 --- a/native/jni/src/suggest/core/dictionary/property/bigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/bigram_property.h @@ -23,6 +23,7 @@ namespace latinime { +// TODO: Change to NgramProperty. class BigramProperty { public: BigramProperty(const std::vector<int> *const targetCodePoints, diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index ce5a49f83..3fd815f98 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -29,6 +29,7 @@ class DicNodeVector; class DictionaryBigramsStructurePolicy; class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; +class PrevWordsInfo; class UnigramProperty; /* @@ -69,16 +70,16 @@ class DictionaryStructureWithBufferPolicy { virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; // Returns whether the update was success or not. - virtual bool addUnigramWord(const int *const word, const int length, + virtual bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) = 0; // Returns whether the update was success or not. - virtual bool addBigramWords(const int *const word0, const int length0, + virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) = 0; // Returns whether the update was success or not. - virtual bool removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) = 0; + virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) = 0; virtual void flush(const char *const filePath) = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index dc2b66a2c..f1e411f38 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -36,7 +36,7 @@ void DicTraverseSession::init(const Dictionary *const dictionary, ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; prevWordsInfo->getPrevWordsTerminalPtNodePos( - getDictionaryStructurePolicy(), mPrevWordsPtNodePos); + getDictionaryStructurePolicy(), mPrevWordsPtNodePos, true /* tryLowerCaseSearch */); } void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 70a99ef38..e4de1f4cc 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -41,13 +41,23 @@ class PrevWordsInfo { mIsBeginningOfSentence[0] = isBeginningOfSentence; } + bool isValid() const { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) { + return false; + } + } + return true; + } + void getPrevWordsTerminalPtNodePos( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, - int *const outPrevWordsTerminalPtNodePos) const { + int *const outPrevWordsTerminalPtNodePos, + const bool tryLowerCaseSearch) const { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, mPrevWordCodePoints[i], mPrevWordCodePointCount[i], - mIsBeginningOfSentence[i]); + mIsBeginningOfSentence[i], tryLowerCaseSearch); } } @@ -66,19 +76,37 @@ class PrevWordsInfo { dictStructurePolicy->getBigramsStructurePolicy(), pos); } + // n is 1-indexed. + const int *getNthPrevWordCodePoints(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return nullptr; + } + return mPrevWordCodePoints[n - 1]; + } + + // n is 1-indexed. + int getNthPrevWordCodePointCount(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return 0; + } + return mPrevWordCodePointCount[n - 1]; + } + private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); static int getTerminalPtNodePosOfWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, - const bool isBeginningOfSentence) { + const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { if (!dictStructurePolicy || !wordCodePoints) { return NOT_A_DICT_POS; } const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); - if (wordPtNodePos != NOT_A_DICT_POS) { + if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) { + // Return the position when when the word was found or doesn't try lower case + // search. return wordPtNodePos; } // Check bigrams for lower-cased previous word if original was not found. Useful for diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp index dde1af299..97e1120a3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp @@ -31,6 +31,7 @@ #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -163,10 +164,10 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons ptNodeParams.getTerminalId()); } -bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, +bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -218,10 +219,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len } } -bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { + const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); + const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -257,8 +260,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le } } -bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, const int length1) { + const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); + const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h index 2f8ad539c..95813881d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h @@ -108,14 +108,14 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1); + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length); void flush(const char *const filePath); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 59f1f29e9..f93d2894c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -145,7 +145,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str char dictPath[dictDirPathBufSize]; if (!FileUtils::getFilePathWithoutSuffix(headerFilePath, DictConstants::HEADER_FILE_EXTENSION, dictDirPathBufSize, dictPath)) { - AKLOGE("Dictionary file name is not valid as a ver4 dictionary. path: %s", path); + AKLOGE("Dictionary file name is not valid as a ver4 dictionary. header path: %s", + headerFilePath); ASSERT(false); return nullptr; } @@ -153,7 +154,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), formatVersion); if (!dictBuffers || !dictBuffers->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s", - path); + dictPath); ASSERT(false); return nullptr; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 54d1e0f6d..6240d46aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -81,24 +81,24 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutListPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1) { + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 7da9e3072..439e90e44 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -23,6 +23,7 @@ #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -155,10 +156,10 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons ptNodeParams.getTerminalId()); } -bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, +bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -210,10 +211,10 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len } } -bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -221,15 +222,20 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH - || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. " - "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size()); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + AKLOGE("The word is too long to insert the ngram to the dictionary. " + "length: %d", bigramProperty->getTargetCodePoints()->size()); + return false; + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalPtNodePositionOfWord( @@ -239,7 +245,8 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le return false; } bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) { + if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, + &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -249,10 +256,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le } } -bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) { +bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -260,22 +267,26 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to remove the bigram to from the " - "dictionary. length0: %d, length1: %d", length0, length1); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (length > MAX_WORD_LENGTH) { + AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSerch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + const int wordPos = getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_DICT_POS) { + if (wordPos == NOT_A_DICT_POS) { return false; } - if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { mBigramCount--; return true; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index b78576484..008f2e423 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -90,13 +90,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, const int length1); void flush(const char *const filePath); |