aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-10-22 18:15:53 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-10-22 18:15:53 +0900
commitb5ef884fbb6bfd08ce793604cdf7f0941e958a84 (patch)
tree4105ac7240773a2421a5ee44128a5fe2862ad430 /java/src/com/android/inputmethod
parent1249395563d43c818e12038231ec89dcbcdc5cd0 (diff)
downloadlatinime-b5ef884fbb6bfd08ce793604cdf7f0941e958a84.tar.gz
latinime-b5ef884fbb6bfd08ce793604cdf7f0941e958a84.tar.xz
latinime-b5ef884fbb6bfd08ce793604cdf7f0941e958a84.zip
Support dumping ngram entries.
Bug: 14425059 Change-Id: Ib03a0c3d166ed6f1e60c67127b28006d55143b6b
Diffstat (limited to 'java/src/com/android/inputmethod')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/WordProperty.java19
-rw-r--r--java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java18
2 files changed, 27 insertions, 10 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index b129c3e40..e7808e46e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -87,7 +87,7 @@ public final class WordProperty implements Comparable<WordProperty> {
final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
final boolean isBeginningOfSentence, final int[] probabilityInfo,
final ArrayList<int[][]> ngramPrevWordsArray,
- final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
+ final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo,
final ArrayList<int[]> shortcutTargets,
final ArrayList<Integer> shortcutProbabilities) {
@@ -102,16 +102,22 @@ public final class WordProperty implements Comparable<WordProperty> {
mHasNgrams = hasBigram;
final int relatedNgramCount = ngramTargets.size();
- final WordInfo currentWordInfo =
- mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
- : new WordInfo(mWord);
- final NgramContext ngramContext = new NgramContext(currentWordInfo);
for (int i = 0; i < relatedNgramCount; i++) {
final String ngramTargetString =
StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
- // TODO: Support n-gram.
+ final int[][] prevWords = ngramPrevWordsArray.get(i);
+ final boolean[] isBeginningOfSentenceArray =
+ ngramPrevWordIsBeginningOfSentenceArray.get(i);
+ final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
+ for (int j = 0; j < prevWords.length; j++) {
+ wordInfoArray[j] = isBeginningOfSentenceArray[j]
+ ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
+ : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
+ prevWords[j]));
+ }
+ final NgramContext ngramContext = new NgramContext(wordInfoArray);
ngrams.add(new NgramProperty(ngramTarget, ngramContext));
}
mNgrams = ngrams.isEmpty() ? null : ngrams;
@@ -126,6 +132,7 @@ public final class WordProperty implements Comparable<WordProperty> {
}
// TODO: Remove
+ @UsedForTesting
public ArrayList<WeightedString> getBigrams() {
if (null == mNgrams) {
return null;
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 248246232..4e0f5f583 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -17,6 +17,7 @@
package com.android.inputmethod.latin.utils;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.NgramProperty;
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import com.android.inputmethod.latin.makedict.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
@@ -26,6 +27,8 @@ import java.util.HashMap;
public class CombinedFormatUtils {
public static final String DICTIONARY_TAG = "dictionary";
public static final String BIGRAM_TAG = "bigram";
+ public static final String NGRAM_TAG = "ngram";
+ public static final String NGRAM_PREV_WORD_TAG = "prev_word";
public static final String SHORTCUT_TAG = "shortcut";
public static final String PROBABILITY_TAG = "f";
public static final String HISTORICAL_INFO_TAG = "historicalInfo";
@@ -76,12 +79,19 @@ public class CombinedFormatUtils {
}
}
if (wordProperty.mHasNgrams) {
- // TODO: Support ngram.
- for (final WeightedString bigram : wordProperty.getBigrams()) {
- builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
+ for (final NgramProperty ngramProperty : wordProperty.mNgrams) {
+ builder.append(" " + NGRAM_TAG + "=" + ngramProperty.mTargetWord.mWord);
builder.append(",");
- builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
+ builder.append(formatProbabilityInfo(ngramProperty.mTargetWord.mProbabilityInfo));
builder.append("\n");
+ for (int i = 0; i < ngramProperty.mNgramContext.getPrevWordCount(); i++) {
+ builder.append(" " + NGRAM_PREV_WORD_TAG + "[" + i + "]="
+ + ngramProperty.mNgramContext.getNthPrevWord(i + 1));
+ if (ngramProperty.mNgramContext.isNthPrevWordBeginningOfSontence(i + 1)) {
+ builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
+ }
+ builder.append("\n");
+ }
}
}
return builder.toString();