From 9ffb94fa1318f354692fab7abf4775fa14397a96 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Fri, 16 Mar 2012 18:01:27 +0900 Subject: Rename the user history dictionary. UserBigramDictionary -> UserHistoryDictionary. Also update all methods, strings and comments, except those needed for backward compatibility (which only include the name of the database file). Change-Id: I0bccea29880dc566b90100575b83baaa947b03ae --- .../com/android/inputmethod/latin/LatinIME.java | 29 +- .../src/com/android/inputmethod/latin/Suggest.java | 24 +- .../inputmethod/latin/UserBigramDictionary.java | 469 --------------------- .../inputmethod/latin/UserHistoryDictionary.java | 457 ++++++++++++++++++++ 4 files changed, 480 insertions(+), 499 deletions(-) delete mode 100644 java/src/com/android/inputmethod/latin/UserBigramDictionary.java create mode 100644 java/src/com/android/inputmethod/latin/UserHistoryDictionary.java (limited to 'java/src') diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index d5cd35db6..99a4d54d8 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -202,7 +202,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar private boolean mShouldSwitchToLastSubtype = true; private UserDictionary mUserDictionary; - private UserBigramDictionary mUserBigramDictionary; + private UserHistoryDictionary mUserHistoryDictionary; private boolean mIsUserDictionaryAvailable; private LastComposedWord mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; @@ -526,11 +526,9 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar resetContactsDictionary(oldContactsDictionary); - // TODO: rename UserBigramDictionary into UserHistoryDictionary - mUserBigramDictionary - = new UserBigramDictionary(this, this, localeStr, Suggest.DIC_USER_BIGRAM); - mSuggest.setUserUnigramDictionary(mUserBigramDictionary); - mSuggest.setUserBigramDictionary(mUserBigramDictionary); + mUserHistoryDictionary + = new UserHistoryDictionary(this, this, localeStr, Suggest.DIC_USER_HISTORY); + mSuggest.setUserHistoryDictionary(mUserHistoryDictionary); LocaleUtils.setSystemLocale(res, savedLocale); } @@ -772,7 +770,7 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar KeyboardView inputView = mKeyboardSwitcher.getKeyboardView(); if (inputView != null) inputView.closing(); - if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites(); + if (mUserHistoryDictionary != null) mUserHistoryDictionary.flushPendingWrites(); } private void onFinishInputViewInternal(boolean finishingInput) { @@ -1990,9 +1988,6 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar setSuggestionStripShown(isSuggestionsStripVisible()); } - /** - * Adds to the UserBigramDictionary and/or UserUnigramDictionary - */ private void addToUserHistoryDictionary(final CharSequence suggestion) { if (suggestion == null || suggestion.length() < 1) return; @@ -2004,16 +1999,16 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar return; } - if (mUserBigramDictionary != null) { - mUserBigramDictionary.addUnigram(suggestion.toString()); + if (mUserHistoryDictionary != null) { final InputConnection ic = getCurrentInputConnection(); + final CharSequence prevWord; if (null != ic) { - final CharSequence prevWord = - EditingUtils.getPreviousWord(ic, mSettingsValues.mWordSeparators); - if (null != prevWord) { - mUserBigramDictionary.addBigramPair(prevWord.toString(), suggestion.toString()); - } + prevWord = EditingUtils.getPreviousWord(ic, mSettingsValues.mWordSeparators); + } else { + prevWord = null; } + mUserHistoryDictionary.addToUserHistory(null == prevWord ? null : prevWord.toString(), + suggestion.toString()); } } diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 08f0e425b..9ae2506f4 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -62,9 +62,8 @@ public class Suggest implements Dictionary.WordCallback { public static final int DIC_USER_TYPED = 0; public static final int DIC_MAIN = 1; public static final int DIC_USER = 2; - public static final int DIC_USER_UNIGRAM = 3; + public static final int DIC_USER_HISTORY = 3; public static final int DIC_CONTACTS = 4; - public static final int DIC_USER_BIGRAM = 5; public static final int DIC_WHITELIST = 6; // If you add a type of dictionary, increment DIC_TYPE_LAST_ID // TODO: this value seems unused. Remove it? @@ -73,10 +72,10 @@ public class Suggest implements Dictionary.WordCallback { public static final String DICT_KEY_CONTACTS = "contacts"; // User dictionary, the system-managed one. public static final String DICT_KEY_USER = "user"; - // User unigram dictionary, internal to LatinIME - public static final String DICT_KEY_USER_UNIGRAM = "user_unigram"; - // User bigram dictionary, internal to LatinIME - public static final String DICT_KEY_USER_BIGRAM = "user_bigram"; + // User history dictionary for the unigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_UNIGRAM = "history_unigram"; + // User history dictionary for the bigram map, internal to LatinIME + public static final String DICT_KEY_USER_HISTORY_BIGRAM = "history_bigram"; public static final String DICT_KEY_WHITELIST ="whitelist"; private static final boolean DBG = LatinImeLogger.sDBG; @@ -203,12 +202,11 @@ public class Suggest implements Dictionary.WordCallback { addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); } - public void setUserUnigramDictionary(Dictionary userUnigramDictionary) { - addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary); - } - - public void setUserBigramDictionary(Dictionary userBigramDictionary) { - addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary); + public void setUserHistoryDictionary(Dictionary userHistoryDictionary) { + addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_HISTORY_UNIGRAM, + userHistoryDictionary); + addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_HISTORY_BIGRAM, + userHistoryDictionary); } public void setAutoCorrectionThreshold(double threshold) { @@ -347,7 +345,7 @@ public class Suggest implements Dictionary.WordCallback { // At second character typed, search the unigrams (scores being affected by bigrams) for (final String key : mUnigramDictionaries.keySet()) { // Skip UserUnigramDictionary and WhitelistDictionary to lookup - if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) + if (key.equals(DICT_KEY_USER_HISTORY_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) continue; final Dictionary dictionary = mUnigramDictionaries.get(key); dictionary.getWords(wordComposerForLookup, this, proximityInfo); diff --git a/java/src/com/android/inputmethod/latin/UserBigramDictionary.java b/java/src/com/android/inputmethod/latin/UserBigramDictionary.java deleted file mode 100644 index 91f84bed1..000000000 --- a/java/src/com/android/inputmethod/latin/UserBigramDictionary.java +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.latin; - -import android.content.ContentValues; -import android.content.Context; -import android.database.Cursor; -import android.database.sqlite.SQLiteDatabase; -import android.database.sqlite.SQLiteOpenHelper; -import android.database.sqlite.SQLiteQueryBuilder; -import android.os.AsyncTask; -import android.provider.BaseColumns; -import android.text.TextUtils; -import android.util.Log; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; - -/** - * Stores all the pairs user types in databases. Prune the database if the size - * gets too big. Unlike AutoDictionary, it even stores the pairs that are already - * in the dictionary. - */ -public class UserBigramDictionary extends ExpandableDictionary { - private static final String TAG = "UserBigramDictionary"; - - /** Any pair being typed or picked */ - private static final int FREQUENCY_FOR_TYPED = 2; - - /** Maximum frequency for all pairs */ - private static final int FREQUENCY_MAX = 127; - - /** Maximum number of pairs. Pruning will start when databases goes above this number. */ - private static int sMaxUserBigrams = 10000; - - /** - * When it hits maximum bigram pair, it will delete until you are left with - * only (sMaxUserBigrams - sDeleteUserBigrams) pairs. - * Do not keep this number small to avoid deleting too often. - */ - private static int sDeleteUserBigrams = 1000; - - /** - * Database version should increase if the database structure changes - */ - private static final int DATABASE_VERSION = 1; - - private static final String DATABASE_NAME = "userbigram_dict.db"; - - /** Name of the words table in the database */ - private static final String MAIN_TABLE_NAME = "main"; - // TODO: Consume less space by using a unique id for locale instead of the whole - // 2-5 character string. (Same TODO from AutoDictionary) - private static final String MAIN_COLUMN_ID = BaseColumns._ID; - private static final String MAIN_COLUMN_WORD1 = "word1"; - private static final String MAIN_COLUMN_WORD2 = "word2"; - private static final String MAIN_COLUMN_LOCALE = "locale"; - - /** Name of the frequency table in the database */ - private static final String FREQ_TABLE_NAME = "frequency"; - private static final String FREQ_COLUMN_ID = BaseColumns._ID; - private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; - private static final String FREQ_COLUMN_FREQUENCY = "freq"; - - private final LatinIME mIme; - - /** Locale for which this auto dictionary is storing words */ - private String mLocale; - - private HashSet mPendingWrites = new HashSet(); - private final Object mPendingWritesLock = new Object(); - private static volatile boolean sUpdatingDB = false; - - private final static HashMap sDictProjectionMap; - - static { - sDictProjectionMap = new HashMap(); - sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); - sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); - sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); - sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); - - sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); - sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); - sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); - } - - private static DatabaseHelper sOpenHelper = null; - - private static class Bigram { - public final String mWord1; - public final String mWord2; - public final int mFrequency; - - Bigram(String word1, String word2, int frequency) { - this.mWord1 = word1; - this.mWord2 = word2; - this.mFrequency = frequency; - } - - @Override - public boolean equals(Object bigram) { - if (!(bigram instanceof Bigram)) { - return false; - } - final Bigram bigram2 = (Bigram) bigram; - final boolean eq1 = - mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1); - if (!eq1) { - return false; - } - return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2); - } - - @Override - public int hashCode() { - return (mWord1 + " " + mWord2).hashCode(); - } - } - - public void setDatabaseMax(int maxUserBigram) { - sMaxUserBigrams = maxUserBigram; - } - - public void setDatabaseDelete(int deleteUserBigram) { - sDeleteUserBigrams = deleteUserBigram; - } - - public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { - super(context, dicTypeId); - mIme = ime; - mLocale = locale; - if (sOpenHelper == null) { - sOpenHelper = new DatabaseHelper(getContext()); - } - if (mLocale != null && mLocale.length() > 1) { - loadDictionary(); - } - } - - @Override - public void close() { - flushPendingWrites(); - // Don't close the database as locale changes will require it to be reopened anyway - // Also, the database is written to somewhat frequently, so it needs to be kept alive - // throughout the life of the process. - // mOpenHelper.close(); - super.close(); - } - - /** - * Return whether the passed charsequence is in the dictionary. - */ - @Override - public boolean isValidWord(final CharSequence word) { - // TODO: figure out what is the correct thing to do here. - return false; - } - - /** - * Add a single word without context. - * - * This is a temporary method to match the interface to UserUnigramDictionary. In the end - * this should be merged with addBigramPair. - */ - public void addUnigram(final String newWord) { - addBigramPair(null, newWord); - } - - /** - * Pair will be added to the user history dictionary. - * - * The first word may be null. That means we don't know the context, in other words, - * it's only a unigram. The first word may also be an empty string : this means start - * context, as in beginning of a sentence for example. - * The second word may not be null (a NullPointerException would be thrown). - */ - public int addBigramPair(final String word1, String word2) { - // remove caps if second word is autocapitalized - if (mIme != null && mIme.isAutoCapitalized()) { - word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); - } - // Do not insert a word as a bigram of itself - if (word2.equals(word1)) { - return 0; - } - - int freq; - if (null == word1) { - freq = FREQUENCY_FOR_TYPED; - super.addWord(word2, FREQUENCY_FOR_TYPED); - } else { - freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); - } - if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; - synchronized (mPendingWritesLock) { - if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { - mPendingWrites.add(new Bigram(word1, word2, freq)); - } else { - Bigram bi = new Bigram(word1, word2, freq); - mPendingWrites.remove(bi); - mPendingWrites.add(bi); - } - } - - return freq; - } - - /** - * Schedules a background thread to write any pending words to the database. - */ - public void flushPendingWrites() { - synchronized (mPendingWritesLock) { - // Nothing pending? Return - if (mPendingWrites.isEmpty()) return; - // Create a background thread to write the pending entries - new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale).execute(); - // Create a new map for writing new entries into while the old one is written to db - mPendingWrites = new HashSet(); - } - } - - /** Used for testing purpose **/ - void waitUntilUpdateDBDone() { - synchronized (mPendingWritesLock) { - while (sUpdatingDB) { - try { - Thread.sleep(100); - } catch (InterruptedException e) { - } - } - return; - } - } - - @Override - public void loadDictionaryAsync() { - // Load the words that correspond to the current input locale - final Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); - if (null == cursor) return; - try { - if (cursor.moveToFirst()) { - int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); - int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); - int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); - while (!cursor.isAfterLast()) { - String word1 = cursor.getString(word1Index); - String word2 = cursor.getString(word2Index); - int frequency = cursor.getInt(frequencyIndex); - // Safeguard against adding really long words. Stack may overflow due - // to recursive lookup - if (null == word1) { - super.addWord(word2, frequency); - } else if (word1.length() < MAX_WORD_LENGTH - && word2.length() < MAX_WORD_LENGTH) { - super.setBigram(word1, word2, frequency); - } - cursor.moveToNext(); - } - } - } finally { - cursor.close(); - } - } - - /** - * Query the database - */ - private static Cursor query(String selection, String[] selectionArgs) { - SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); - - // main INNER JOIN frequency ON (main._id=freq.pair_id) - qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" - + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." - + FREQ_COLUMN_PAIR_ID +")"); - - qb.setProjectionMap(sDictProjectionMap); - - // Get the database and run the query - try { - SQLiteDatabase db = sOpenHelper.getReadableDatabase(); - Cursor c = qb.query(db, - new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, - selection, selectionArgs, null, null, null); - return c; - } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { - // Can't open the database : presumably we can't access storage. That may happen - // when the device is wedged; do a best effort to still start the keyboard. - return null; - } - } - - /** - * This class helps open, create, and upgrade the database file. - */ - private static class DatabaseHelper extends SQLiteOpenHelper { - - DatabaseHelper(Context context) { - super(context, DATABASE_NAME, null, DATABASE_VERSION); - } - - @Override - public void onCreate(SQLiteDatabase db) { - db.execSQL("PRAGMA foreign_keys = ON;"); - db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" - + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," - + MAIN_COLUMN_WORD1 + " TEXT," - + MAIN_COLUMN_WORD2 + " TEXT," - + MAIN_COLUMN_LOCALE + " TEXT" - + ");"); - db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" - + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," - + FREQ_COLUMN_PAIR_ID + " INTEGER," - + FREQ_COLUMN_FREQUENCY + " INTEGER," - + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME - + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" - + ");"); - } - - @Override - public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { - Log.w(TAG, "Upgrading database from version " + oldVersion + " to " - + newVersion + ", which will destroy all old data"); - db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); - db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); - onCreate(db); - } - } - - /** - * Async task to write pending words to the database so that it stays in sync with - * the in-memory trie. - */ - private static class UpdateDbTask extends AsyncTask { - private final HashSet mMap; - private final DatabaseHelper mDbHelper; - private final String mLocale; - - public UpdateDbTask(DatabaseHelper openHelper, HashSet pendingWrites, - String locale) { - mMap = pendingWrites; - mLocale = locale; - mDbHelper = openHelper; - } - - /** Prune any old data if the database is getting too big. */ - private static void checkPruneData(SQLiteDatabase db) { - db.execSQL("PRAGMA foreign_keys = ON;"); - Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, - null, null, null, null, null); - try { - int totalRowCount = c.getCount(); - // prune out old data if we have too much data - if (totalRowCount > sMaxUserBigrams) { - int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams; - int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); - c.moveToFirst(); - int count = 0; - while (count < numDeleteRows && !c.isAfterLast()) { - String pairId = c.getString(pairIdColumnId); - // Deleting from MAIN table will delete the frequencies - // due to FOREIGN KEY .. ON DELETE CASCADE - db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", - new String[] { pairId }); - c.moveToNext(); - count++; - } - } - } finally { - c.close(); - } - } - - @Override - protected void onPreExecute() { - sUpdatingDB = true; - } - - @Override - protected Void doInBackground(Void... v) { - SQLiteDatabase db = null; - try { - db = mDbHelper.getWritableDatabase(); - } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { - // If we can't open the db, don't do anything. Exit through the next test - // for non-nullity of the db variable. - } - if (null == db) { - // Not much we can do. Just exit. - sUpdatingDB = false; - return null; - } - db.execSQL("PRAGMA foreign_keys = ON;"); - // Write all the entries to the db - Iterator iterator = mMap.iterator(); - while (iterator.hasNext()) { - // TODO: this process of making a text search for each pair each time - // is terribly inefficient. Optimize this. - Bigram bi = iterator.next(); - - // find pair id - final Cursor c; - if (null != bi.mWord1) { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " - + MAIN_COLUMN_LOCALE + "=?", - new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, null); - } else { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND " - + MAIN_COLUMN_LOCALE + "=?", - new String[] { bi.mWord2, mLocale }, null, null, null); - } - - int pairId; - if (c.moveToFirst()) { - // existing pair - pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); - db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", - new String[] { Integer.toString(pairId) }); - } else { - // new pair - Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, - getContentValues(bi.mWord1, bi.mWord2, mLocale)); - pairId = pairIdLong.intValue(); - } - c.close(); - - // insert new frequency - db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.mFrequency)); - } - checkPruneData(db); - sUpdatingDB = false; - - return null; - } - - private static ContentValues getContentValues(String word1, String word2, String locale) { - ContentValues values = new ContentValues(3); - values.put(MAIN_COLUMN_WORD1, word1); - values.put(MAIN_COLUMN_WORD2, word2); - values.put(MAIN_COLUMN_LOCALE, locale); - return values; - } - - private static ContentValues getFrequencyContentValues(int pairId, int frequency) { - ContentValues values = new ContentValues(2); - values.put(FREQ_COLUMN_PAIR_ID, pairId); - values.put(FREQ_COLUMN_FREQUENCY, frequency); - return values; - } - } - -} diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java new file mode 100644 index 000000000..4e798460c --- /dev/null +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java @@ -0,0 +1,457 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.content.ContentValues; +import android.content.Context; +import android.database.Cursor; +import android.database.sqlite.SQLiteDatabase; +import android.database.sqlite.SQLiteOpenHelper; +import android.database.sqlite.SQLiteQueryBuilder; +import android.os.AsyncTask; +import android.provider.BaseColumns; +import android.util.Log; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; + +/** + * Locally gathers stats about the words user types and various other signals like auto-correction + * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. + */ +public class UserHistoryDictionary extends ExpandableDictionary { + private static final String TAG = "UserHistoryDictionary"; + + /** Any pair being typed or picked */ + private static final int FREQUENCY_FOR_TYPED = 2; + + /** Maximum frequency for all pairs */ + private static final int FREQUENCY_MAX = 127; + + /** Maximum number of pairs. Pruning will start when databases goes above this number. */ + private static int sMaxHistoryBigrams = 10000; + + /** + * When it hits maximum bigram pair, it will delete until you are left with + * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. + * Do not keep this number small to avoid deleting too often. + */ + private static int sDeleteHistoryBigrams = 1000; + + /** + * Database version should increase if the database structure changes + */ + private static final int DATABASE_VERSION = 1; + + private static final String DATABASE_NAME = "userbigram_dict.db"; + + /** Name of the words table in the database */ + private static final String MAIN_TABLE_NAME = "main"; + // TODO: Consume less space by using a unique id for locale instead of the whole + // 2-5 character string. + private static final String MAIN_COLUMN_ID = BaseColumns._ID; + private static final String MAIN_COLUMN_WORD1 = "word1"; + private static final String MAIN_COLUMN_WORD2 = "word2"; + private static final String MAIN_COLUMN_LOCALE = "locale"; + + /** Name of the frequency table in the database */ + private static final String FREQ_TABLE_NAME = "frequency"; + private static final String FREQ_COLUMN_ID = BaseColumns._ID; + private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; + private static final String FREQ_COLUMN_FREQUENCY = "freq"; + + private final LatinIME mIme; + + /** Locale for which this auto dictionary is storing words */ + private String mLocale; + + private HashSet mPendingWrites = new HashSet(); + private final Object mPendingWritesLock = new Object(); + private static volatile boolean sUpdatingDB = false; + + private final static HashMap sDictProjectionMap; + + static { + sDictProjectionMap = new HashMap(); + sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); + sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); + sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); + sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); + + sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); + sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); + sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); + } + + private static DatabaseHelper sOpenHelper = null; + + private static class Bigram { + public final String mWord1; + public final String mWord2; + public final int mFrequency; + + Bigram(String word1, String word2, int frequency) { + this.mWord1 = word1; + this.mWord2 = word2; + this.mFrequency = frequency; + } + + @Override + public boolean equals(Object bigram) { + if (!(bigram instanceof Bigram)) { + return false; + } + final Bigram bigram2 = (Bigram) bigram; + final boolean eq1 = + mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1); + if (!eq1) { + return false; + } + return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2); + } + + @Override + public int hashCode() { + return (mWord1 + " " + mWord2).hashCode(); + } + } + + public void setDatabaseMax(int maxHistoryBigram) { + sMaxHistoryBigrams = maxHistoryBigram; + } + + public void setDatabaseDelete(int deleteHistoryBigram) { + sDeleteHistoryBigrams = deleteHistoryBigram; + } + + public UserHistoryDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { + super(context, dicTypeId); + mIme = ime; + mLocale = locale; + if (sOpenHelper == null) { + sOpenHelper = new DatabaseHelper(getContext()); + } + if (mLocale != null && mLocale.length() > 1) { + loadDictionary(); + } + } + + @Override + public void close() { + flushPendingWrites(); + // Don't close the database as locale changes will require it to be reopened anyway + // Also, the database is written to somewhat frequently, so it needs to be kept alive + // throughout the life of the process. + // mOpenHelper.close(); + super.close(); + } + + /** + * Return whether the passed charsequence is in the dictionary. + */ + @Override + public boolean isValidWord(final CharSequence word) { + // TODO: figure out what is the correct thing to do here. + return false; + } + + /** + * Pair will be added to the user history dictionary. + * + * The first word may be null. That means we don't know the context, in other words, + * it's only a unigram. The first word may also be an empty string : this means start + * context, as in beginning of a sentence for example. + * The second word may not be null (a NullPointerException would be thrown). + */ + public int addToUserHistory(final String word1, String word2) { + // remove caps if second word is autocapitalized + if (mIme != null && mIme.isAutoCapitalized()) { + word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); + } + super.addWord(word2, FREQUENCY_FOR_TYPED); + // Do not insert a word as a bigram of itself + if (word2.equals(word1)) { + return 0; + } + + int freq; + if (null == word1) { + freq = FREQUENCY_FOR_TYPED; + } else { + freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); + } + if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; + synchronized (mPendingWritesLock) { + if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { + mPendingWrites.add(new Bigram(word1, word2, freq)); + } else { + Bigram bi = new Bigram(word1, word2, freq); + mPendingWrites.remove(bi); + mPendingWrites.add(bi); + } + } + + return freq; + } + + /** + * Schedules a background thread to write any pending words to the database. + */ + public void flushPendingWrites() { + synchronized (mPendingWritesLock) { + // Nothing pending? Return + if (mPendingWrites.isEmpty()) return; + // Create a background thread to write the pending entries + new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale).execute(); + // Create a new map for writing new entries into while the old one is written to db + mPendingWrites = new HashSet(); + } + } + + /** Used for testing purpose **/ + void waitUntilUpdateDBDone() { + synchronized (mPendingWritesLock) { + while (sUpdatingDB) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + } + } + return; + } + } + + @Override + public void loadDictionaryAsync() { + // Load the words that correspond to the current input locale + final Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); + if (null == cursor) return; + try { + if (cursor.moveToFirst()) { + int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); + int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); + int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); + while (!cursor.isAfterLast()) { + String word1 = cursor.getString(word1Index); + String word2 = cursor.getString(word2Index); + int frequency = cursor.getInt(frequencyIndex); + // Safeguard against adding really long words. Stack may overflow due + // to recursive lookup + if (null == word1) { + super.addWord(word2, frequency); + } else if (word1.length() < MAX_WORD_LENGTH + && word2.length() < MAX_WORD_LENGTH) { + super.setBigram(word1, word2, frequency); + } + cursor.moveToNext(); + } + } + } finally { + cursor.close(); + } + } + + /** + * Query the database + */ + private static Cursor query(String selection, String[] selectionArgs) { + SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); + + // main INNER JOIN frequency ON (main._id=freq.pair_id) + qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" + + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." + + FREQ_COLUMN_PAIR_ID +")"); + + qb.setProjectionMap(sDictProjectionMap); + + // Get the database and run the query + try { + SQLiteDatabase db = sOpenHelper.getReadableDatabase(); + Cursor c = qb.query(db, + new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, + selection, selectionArgs, null, null, null); + return c; + } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { + // Can't open the database : presumably we can't access storage. That may happen + // when the device is wedged; do a best effort to still start the keyboard. + return null; + } + } + + /** + * This class helps open, create, and upgrade the database file. + */ + private static class DatabaseHelper extends SQLiteOpenHelper { + + DatabaseHelper(Context context) { + super(context, DATABASE_NAME, null, DATABASE_VERSION); + } + + @Override + public void onCreate(SQLiteDatabase db) { + db.execSQL("PRAGMA foreign_keys = ON;"); + db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" + + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," + + MAIN_COLUMN_WORD1 + " TEXT," + + MAIN_COLUMN_WORD2 + " TEXT," + + MAIN_COLUMN_LOCALE + " TEXT" + + ");"); + db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" + + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," + + FREQ_COLUMN_PAIR_ID + " INTEGER," + + FREQ_COLUMN_FREQUENCY + " INTEGER," + + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME + + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" + + ");"); + } + + @Override + public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { + Log.w(TAG, "Upgrading database from version " + oldVersion + " to " + + newVersion + ", which will destroy all old data"); + db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); + db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); + onCreate(db); + } + } + + /** + * Async task to write pending words to the database so that it stays in sync with + * the in-memory trie. + */ + private static class UpdateDbTask extends AsyncTask { + private final HashSet mMap; + private final DatabaseHelper mDbHelper; + private final String mLocale; + + public UpdateDbTask(DatabaseHelper openHelper, HashSet pendingWrites, + String locale) { + mMap = pendingWrites; + mLocale = locale; + mDbHelper = openHelper; + } + + /** Prune any old data if the database is getting too big. */ + private static void checkPruneData(SQLiteDatabase db) { + db.execSQL("PRAGMA foreign_keys = ON;"); + Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, + null, null, null, null, null); + try { + int totalRowCount = c.getCount(); + // prune out old data if we have too much data + if (totalRowCount > sMaxHistoryBigrams) { + int numDeleteRows = (totalRowCount - sMaxHistoryBigrams) + sDeleteHistoryBigrams; + int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); + c.moveToFirst(); + int count = 0; + while (count < numDeleteRows && !c.isAfterLast()) { + String pairId = c.getString(pairIdColumnId); + // Deleting from MAIN table will delete the frequencies + // due to FOREIGN KEY .. ON DELETE CASCADE + db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", + new String[] { pairId }); + c.moveToNext(); + count++; + } + } + } finally { + c.close(); + } + } + + @Override + protected void onPreExecute() { + sUpdatingDB = true; + } + + @Override + protected Void doInBackground(Void... v) { + SQLiteDatabase db = null; + try { + db = mDbHelper.getWritableDatabase(); + } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { + // If we can't open the db, don't do anything. Exit through the next test + // for non-nullity of the db variable. + } + if (null == db) { + // Not much we can do. Just exit. + sUpdatingDB = false; + return null; + } + db.execSQL("PRAGMA foreign_keys = ON;"); + // Write all the entries to the db + Iterator iterator = mMap.iterator(); + while (iterator.hasNext()) { + // TODO: this process of making a text search for each pair each time + // is terribly inefficient. Optimize this. + Bigram bi = iterator.next(); + + // find pair id + final Cursor c; + if (null != bi.mWord1) { + c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, + MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " + + MAIN_COLUMN_LOCALE + "=?", + new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, null); + } else { + c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, + MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND " + + MAIN_COLUMN_LOCALE + "=?", + new String[] { bi.mWord2, mLocale }, null, null, null); + } + + int pairId; + if (c.moveToFirst()) { + // existing pair + pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); + db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", + new String[] { Integer.toString(pairId) }); + } else { + // new pair + Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, + getContentValues(bi.mWord1, bi.mWord2, mLocale)); + pairId = pairIdLong.intValue(); + } + c.close(); + + // insert new frequency + db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.mFrequency)); + } + checkPruneData(db); + sUpdatingDB = false; + + return null; + } + + private static ContentValues getContentValues(String word1, String word2, String locale) { + ContentValues values = new ContentValues(3); + values.put(MAIN_COLUMN_WORD1, word1); + values.put(MAIN_COLUMN_WORD2, word2); + values.put(MAIN_COLUMN_LOCALE, locale); + return values; + } + + private static ContentValues getFrequencyContentValues(int pairId, int frequency) { + ContentValues values = new ContentValues(2); + values.put(FREQ_COLUMN_PAIR_ID, pairId); + values.put(FREQ_COLUMN_FREQUENCY, frequency); + return values; + } + } + +} -- cgit v1.2.3-83-g751a