diff options
author | 2014-06-24 12:37:07 +0900 | |
---|---|---|
committer | 2014-06-24 12:37:07 +0900 | |
commit | 88fa47a27d45f6460971d0d223aa558e121b3478 (patch) | |
tree | 66ec3c729f26a82efdbfc84f6ce838f9b315b5fe /java/src/com | |
parent | f7322b166b88f72b19509d8416700d4ec8ea7753 (diff) | |
download | latinime-88fa47a27d45f6460971d0d223aa558e121b3478.tar.gz latinime-88fa47a27d45f6460971d0d223aa558e121b3478.tar.xz latinime-88fa47a27d45f6460971d0d223aa558e121b3478.zip |
Support migration/dump of Beginning-of-Sentence entries.
Bug: 14119293
Change-Id: Ie975138f819794d5c34a7a547be5a6117050e084
Diffstat (limited to 'java/src/com')
3 files changed, 27 insertions, 16 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 42105e2c3..335e52fef 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -64,11 +64,12 @@ public final class BinaryDictionary extends Dictionary { public static final int NOT_A_VALID_TIMESTAMP = -1; // Format to get unigram flags from native side via getWordPropertyNative(). - private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4; + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; // Format to get probability and historical info from native side via getWordPropertyNative(). public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; @@ -176,10 +177,12 @@ public final class BinaryDictionary extends Dictionary { private static native int getBigramProbabilityNative(long dict, int[] word0, boolean isBeginningOfSentence, int[] word1); private static native void getWordPropertyNative(long dict, int[] word, - int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, - ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, - ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); - private static native int getNextWordNative(long dict, int token, int[] outCodePoints); + boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, + int[] outProbabilityInfo, ArrayList<int[]> outBigramTargets, + ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outShortcutTargets, + ArrayList<Integer> outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints, + boolean[] outIsBeginningOfSentence); private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, @@ -358,8 +361,8 @@ public final class BinaryDictionary extends Dictionary { prevWordsInfo.mIsBeginningOfSentence, codePoints1); } - public WordProperty getWordProperty(final String word) { - if (TextUtils.isEmpty(word)) { + public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { + if (word == null) { return null; } final int[] codePoints = StringUtils.toCodePointArray(word); @@ -371,14 +374,15 @@ public final class BinaryDictionary extends Dictionary { final ArrayList<int[]> outBigramProbabilityInfo = new ArrayList<>(); final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); - getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo, - outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, - outShortcutProbabilities); + getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, + outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, + outShortcutTargets, outShortcutProbabilities); return new WordProperty(codePoints, outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], - outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo, + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); } @@ -399,9 +403,12 @@ public final class BinaryDictionary extends Dictionary { */ public GetNextWordPropertyResult getNextWordProperty(final int token) { final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; - final int nextToken = getNextWordNative(mNativeDict, token, codePoints); + final boolean[] isBeginningOfSentence = new boolean[1]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints, + isBeginningOfSentence); final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); - return new GetNextWordPropertyResult(getWordProperty(word), nextToken); + return new GetNextWordPropertyResult( + getWordProperty(word, isBeginningOfSentence[0]), nextToken); } // Add a unigram entry to binary dictionary with unigram attributes in native code. diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index 31cb59756..cd78e2235 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -70,8 +70,8 @@ public final class WordProperty implements Comparable<WordProperty> { // Construct word property using information from native code. // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. public WordProperty(final int[] codePoints, final boolean isNotAWord, - final boolean isBlacklisted, final boolean hasBigram, - final boolean hasShortcuts, final int[] probabilityInfo, + final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts, + final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo, final ArrayList<int[]> shortcutTargets, final ArrayList<Integer> shortcutProbabilities) { @@ -79,7 +79,7 @@ public final class WordProperty implements Comparable<WordProperty> { mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mShortcutTargets = new ArrayList<>(); mBigrams = new ArrayList<>(); - mIsBeginningOfSentence = false; + mIsBeginningOfSentence = isBeginningOfSentence; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklisted; mHasShortcuts = hasShortcuts; diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java index c66007537..34f59e8bc 100644 --- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -31,6 +31,7 @@ public class CombinedFormatUtils { public static final String HISTORICAL_INFO_TAG = "historicalInfo"; public static final String HISTORICAL_INFO_SEPARATOR = ":"; public static final String WORD_TAG = "word"; + public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence"; public static final String NOT_A_WORD_TAG = "not_a_word"; public static final String BLACKLISTED_TAG = "blacklisted"; @@ -56,6 +57,9 @@ public class CombinedFormatUtils { builder.append(" " + WORD_TAG + "=" + wordProperty.mWord); builder.append(","); builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo)); + if (wordProperty.mIsBeginningOfSentence) { + builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true"); + } if (wordProperty.mIsNotAWord) { builder.append("," + NOT_A_WORD_TAG + "=true"); } |