aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java11
-rw-r--r--java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java137
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java79
-rw-r--r--native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp15
-rw-r--r--native/jni/src/bigram_dictionary.cpp22
-rw-r--r--native/jni/src/bigram_dictionary.h1
-rw-r--r--native/jni/src/dictionary.cpp5
-rw-r--r--native/jni/src/dictionary.h1
8 files changed, 241 insertions, 30 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index a644ec0d9..cc20f4294 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -17,6 +17,7 @@
package com.android.inputmethod.latin;
import android.content.Context;
+import android.text.TextUtils;
import com.android.inputmethod.keyboard.ProximityInfo;
@@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary {
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
private native void closeNative(long dict);
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
+ private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
boolean useFullEditDistance, char[] outputChars, int[] scores);
@@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary {
return isValidWordNative(mNativeDict, chars, chars.length);
}
+ // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
+ // calls when checking for changes in an entire dictionary.
+ public boolean isValidBigram(CharSequence word1, CharSequence word2) {
+ if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
+ int[] chars1 = StringUtils.toCodePointArray(word1.toString());
+ int[] chars2 = StringUtils.toCodePointArray(word2.toString());
+ return isValidBigramNative(mNativeDict, chars1, chars2);
+ }
+
@Override
public synchronized void close() {
closeInternal();
diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
index 65f97e987..22787c218 100644
--- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java
@@ -18,6 +18,7 @@ import android.content.ContentResolver;
import android.content.Context;
import android.database.ContentObserver;
import android.database.Cursor;
+import android.os.SystemClock;
import android.provider.BaseColumns;
import android.provider.ContactsContract.Contacts;
import android.text.TextUtils;
@@ -30,18 +31,27 @@ import java.util.Locale;
public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
+ private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
private static final String NAME = "contacts";
+ private static boolean DEBUG = false;
+
/**
* Frequency for contacts information into the dictionary
*/
private static final int FREQUENCY_FOR_CONTACTS = 40;
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
+ /** The maximum number of contacts that this dictionary supports. */
+ private static final int MAX_CONTACT_COUNT = 10000;
+
private static final int INDEX_NAME = 1;
+ /** The number of contacts in the most recent dictionary rebuild. */
+ static private int sContactCountAtLastRebuild = 0;
+
private ContentObserver mObserver;
/**
@@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
if (cursor != null) {
try {
if (cursor.moveToFirst()) {
+ sContactCountAtLastRebuild = getContactCount();
addWords(cursor);
}
} finally {
@@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
private void addWords(Cursor cursor) {
clearFusionDictionary();
- while (!cursor.isAfterLast()) {
+ int count = 0;
+ while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
String name = cursor.getString(INDEX_NAME);
- if (name != null && -1 == name.indexOf('@')) {
+ if (isValidName(name)) {
addName(name);
+ ++count;
}
cursor.moveToNext();
}
}
+ private int getContactCount() {
+ // TODO: consider switching to a rawQuery("select count(*)...") on the database if
+ // performance is a bottleneck.
+ final Cursor cursor = mContext.getContentResolver().query(
+ Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
+ if (cursor != null) {
+ return cursor.getCount();
+ }
+ return 0;
+ }
+
/**
* Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
* bigrams depending on locale.
@@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
// TODO: Better tokenization for non-Latin writing systems
for (int i = 0; i < len; i++) {
if (Character.isLetter(name.codePointAt(i))) {
- int j;
- for (j = i + 1; j < len; j++) {
- final int codePoint = name.codePointAt(j);
- if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE
- || Character.isLetter(codePoint))) {
- break;
- }
- }
- String word = name.substring(i, j);
- i = j - 1;
+ int end = getWordEndPosition(name, len, i);
+ String word = name.substring(i, end);
+ i = end - 1;
// Don't add single letter words, possibly confuses
// capitalization of i.
final int wordLen = word.codePointCount(0, word.length());
@@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
}
}
}
+
+ /**
+ * Returns the index of the last letter in the word, starting from position startIndex.
+ */
+ private static int getWordEndPosition(String string, int len, int startIndex) {
+ int end;
+ int cp = 0;
+ for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
+ cp = string.codePointAt(end);
+ if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE
+ || Character.isLetter(cp))) {
+ break;
+ }
+ }
+ return end;
+ }
+
+ @Override
+ protected boolean hasContentChanged() {
+ final long startTime = SystemClock.uptimeMillis();
+ final int contactCount = getContactCount();
+ if (contactCount > MAX_CONTACT_COUNT) {
+ // If there are too many contacts then return false. In this rare case it is impossible
+ // to include all of them anyways and the cost of rebuilding the dictionary is too high.
+ // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
+ return false;
+ }
+ if (contactCount != sContactCountAtLastRebuild) {
+ return true;
+ }
+ // Check all contacts since it's not possible to find out which names have changed.
+ // This is needed because it's possible to receive extraneous onChange events even when no
+ // name has changed.
+ Cursor cursor = mContext.getContentResolver().query(
+ Contacts.CONTENT_URI, PROJECTION, null, null, null);
+ if (cursor != null) {
+ try {
+ if (cursor.moveToFirst()) {
+ while (!cursor.isAfterLast()) {
+ String name = cursor.getString(INDEX_NAME);
+ if (isValidName(name) && !isNameInDictionary(name)) {
+ if (DEBUG) {
+ Log.d(TAG, "Contact name missing: " + name + " (runtime = "
+ + (SystemClock.uptimeMillis() - startTime) + " ms)");
+ }
+ return true;
+ }
+ cursor.moveToNext();
+ }
+ }
+ } finally {
+ cursor.close();
+ }
+ }
+ if (DEBUG) {
+ Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
+ + " ms)");
+ }
+ return false;
+ }
+
+ private static boolean isValidName(String name) {
+ if (name != null && -1 == name.indexOf('@')) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Checks if the words in a name are in the current binary dictionary.
+ */
+ private boolean isNameInDictionary(String name) {
+ int len = name.codePointCount(0, name.length());
+ String prevWord = null;
+ for (int i = 0; i < len; i++) {
+ if (Character.isLetter(name.codePointAt(i))) {
+ int end = getWordEndPosition(name, len, i);
+ String word = name.substring(i, end);
+ i = end - 1;
+ final int wordLen = word.codePointCount(0, word.length());
+ if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
+ if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
+ if (!super.isValidBigramLocked(prevWord, word)) {
+ return false;
+ }
+ } else {
+ if (!super.isValidWordLocked(word)) {
+ return false;
+ }
+ }
+ prevWord = word;
+ }
+ }
+ }
+ return true;
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 3d89226c0..22d8f24f1 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -96,6 +96,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
protected abstract void loadDictionaryAsync();
/**
+ * Indicates that the source dictionary content has changed and a rebuild of the binary file is
+ * required. If it returns false, the next reload will only read the current binary dictionary
+ * from file. Note that the shared binary dictionary is locked when this is called.
+ */
+ protected abstract boolean hasContentChanged();
+
+ /**
* Gets the shared dictionary controller for the given filename.
*/
private static synchronized DictionaryController getSharedDictionaryController(
@@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* the native side.
*/
public void clearFusionDictionary() {
- mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions(
- new HashMap<String, String>(), false, false));
+ mFusionDictionary = new FusionDictionary(new Node(),
+ new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
+ false));
}
/**
@@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
protected boolean isValidWordInner(final CharSequence word) {
if (mLocalDictionaryController.tryLock()) {
try {
- if (mBinaryDictionary != null) {
- return mBinaryDictionary.isValidWord(word);
- }
+ return isValidWordLocked(word);
} finally {
mLocalDictionaryController.unlock();
}
@@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
return false;
}
+ protected boolean isValidWordLocked(final CharSequence word) {
+ if (mBinaryDictionary == null) return false;
+ return mBinaryDictionary.isValidWord(word);
+ }
+
+ protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) {
+ if (mBinaryDictionary == null) return false;
+ return mBinaryDictionary.isValidBigram(word1, word2);
+ }
+
+ protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) {
+ if (mLocalDictionaryController.tryLock()) {
+ try {
+ return isValidBigramLocked(word1, word2);
+ } finally {
+ mLocalDictionaryController.unlock();
+ }
+ }
+ return false;
+ }
+
+ protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) {
+ if (mBinaryDictionary == null) return false;
+ return mBinaryDictionary.isValidBigram(word1, word2);
+ }
+
/**
* Load the current binary dictionary from internal storage in a background thread. If no binary
* dictionary exists, this method will generate one.
@@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
}
/**
- * Sets whether or not the dictionary is out of date and requires a reload.
+ * Marks that the dictionary is out of date and requires a reload.
+ *
+ * @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild
+ * of the binary file is required. If not true, the next reload process will only read
+ * the current binary dictionary from file.
*/
- protected void setRequiresReload(final boolean reload) {
- final long time = reload ? SystemClock.uptimeMillis() : 0;
- mSharedDictionaryController.mLastUpdateRequestTime = time;
+ protected void setRequiresReload(final boolean requiresRebuild) {
+ final long time = SystemClock.uptimeMillis();
mLocalDictionaryController.mLastUpdateRequestTime = time;
+ mSharedDictionaryController.mLastUpdateRequestTime = time;
if (DEBUG) {
Log.d(TAG, "Reload request: request=" + time + " update="
+ mSharedDictionaryController.mLastUpdateTime);
@@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
// If the shared dictionary file does not exist or is out of date, the first
// instance that acquires the lock will generate a new one.
- mSharedDictionaryController.mLastUpdateTime = time;
- mLocalDictionaryController.mLastUpdateTime = time;
- generateBinaryDictionary();
- loadBinaryDictionary();
- } else if (mLocalDictionaryController.isOutOfDate()) {
- // Otherwise, if only the local dictionary for this instance is out of date, load
- // the shared dictionary from file.
- mLocalDictionaryController.mLastUpdateTime = time;
+ if (hasContentChanged()) {
+ // If the source content has changed, rebuild the binary dictionary.
+ mSharedDictionaryController.mLastUpdateTime = time;
+ generateBinaryDictionary();
+ loadBinaryDictionary();
+ } else {
+ // If not, the reload request was unnecessary so revert LastUpdateRequestTime
+ // to LastUpdateTime.
+ mSharedDictionaryController.mLastUpdateRequestTime =
+ mSharedDictionaryController.mLastUpdateTime;
+ }
+ } else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime
+ < mSharedDictionaryController.mLastUpdateTime) {
+ // Otherwise, if the local dictionary is older than the shared dictionary, load the
+ // shared dictionary.
loadBinaryDictionary();
}
+ mLocalDictionaryController.mLastUpdateTime = time;
} finally {
mSharedDictionaryController.unlock();
}
}
+ // TODO: cache the file's existence so that we avoid doing a disk access each time.
private boolean dictionaryFileExists() {
final File file = new File(mContext.getFilesDir(), mFilename);
return file.exists();
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index de9dbf9fd..b8f4ec77a 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -182,6 +182,20 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
return result;
}
+static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
+ jintArray wordArray1, jintArray wordArray2) {
+ Dictionary *dictionary = (Dictionary*)dict;
+ if (!dictionary) return (jboolean) false;
+ jint *word1 = env->GetIntArrayElements(wordArray1, 0);
+ jint *word2 = env->GetIntArrayElements(wordArray2, 0);
+ jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0;
+ jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0;
+ jboolean result = dictionary->isValidBigram(word1, length1, word2, length2);
+ env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT);
+ env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT);
+ return result;
+}
+
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
jchar *beforeChars = env->GetCharArrayElements(before, 0);
@@ -239,6 +253,7 @@ static JNINativeMethod sMethods[] = {
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
(void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+ {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
{"calcNormalizedScoreNative", "([CI[CII)D",
(void*)latinime_BinaryDictionary_calcNormalizedScore},
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 07031086c..7ed4dc439 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -128,7 +128,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
++bigramCount;
}
}
- } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
+ } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
return bigramCount;
}
@@ -189,5 +189,25 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
return false;
}
+bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
+ int length2) {
+ const uint8_t* const root = DICT;
+ int pos = getBigramListPositionForWord(word1, length1);
+ // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
+ if (0 == pos) return false;
+ int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
+ if (NOT_VALID_WORD == nextWordPos) return false;
+ int bigramFlags;
+ do {
+ bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+ const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
+ &pos);
+ if (bigramPos == nextWordPos) {
+ return true;
+ }
+ } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
+ return false;
+}
+
// TODO: Move functions related to bigram to here
} // namespace latinime
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 7328d5828..b8763a515 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -33,6 +33,7 @@ class BigramDictionary {
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter);
+ bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
~BigramDictionary();
private:
bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 9dc207223..8ea7c49fa 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -58,4 +58,9 @@ bool Dictionary::isValidWord(const int32_t *word, int length) {
return mUnigramDictionary->isValidWord(word, length);
}
+bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
+ int length2) {
+ return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
+}
+
} // namespace latinime
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index bce86d1ad..87891ee4d 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -53,6 +53,7 @@ class Dictionary {
}
bool isValidWord(const int32_t *word, int length);
+ bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
void *getDict() { return (void *)mDict; }
int getDictSize() { return mDictSize; }
int getMmapFd() { return mMmapFd; }