aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-10-01 11:21:08 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-10-01 11:21:08 +0900
commitc6a6f6a9905ab98516d944ac85933d016e4147fb (patch)
treed191a3f3e93e5972e35293d264900b5222147110
parent79bb37d499ed6fcabe981153d5ff0b5b69509933 (diff)
downloadlatinime-c6a6f6a9905ab98516d944ac85933d016e4147fb.tar.gz
latinime-c6a6f6a9905ab98516d944ac85933d016e4147fb.tar.xz
latinime-c6a6f6a9905ab98516d944ac85933d016e4147fb.zip
Introduce NgramProperty in Java side.
Bug: 14425059 Change-Id: I8b3458ad22730b3dccbe0caea2c5930f5276dc82
-rw-r--r--java/src/com/android/inputmethod/latin/NgramContext.java10
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/NgramProperty.java26
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/WordProperty.java57
-rw-r--r--java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java7
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java18
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java4
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java4
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java5
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java5
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java2
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java4
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java6
12 files changed, 105 insertions, 43 deletions
diff --git a/java/src/com/android/inputmethod/latin/NgramContext.java b/java/src/com/android/inputmethod/latin/NgramContext.java
index c35c6e2c8..6d438584f 100644
--- a/java/src/com/android/inputmethod/latin/NgramContext.java
+++ b/java/src/com/android/inputmethod/latin/NgramContext.java
@@ -169,8 +169,14 @@ public class NgramContext {
@Override
public int hashCode() {
- // Just for having equals().
- return mPrevWordsInfo[0].hashCode();
+ int hashValue = 0;
+ for (final WordInfo wordInfo : mPrevWordsInfo) {
+ if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
+ break;
+ }
+ hashValue ^= wordInfo.hashCode();
+ }
+ return hashValue;
}
@Override
diff --git a/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java
new file mode 100644
index 000000000..99e0e273f
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java
@@ -0,0 +1,26 @@
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.NgramContext;
+
+public class NgramProperty {
+ public final WeightedString mTargetWord;
+ public final NgramContext mNgramContext;
+
+ public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) {
+ mTargetWord = targetWord;
+ mNgramContext = ngramContext;
+ }
+
+ @Override
+ public int hashCode() {
+ return mTargetWord.hashCode() ^ mNgramContext.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if (!(o instanceof NgramProperty)) return false;
+ final NgramProperty n = (NgramProperty)o;
+ return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext);
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index cd78e2235..46705f9db 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.NgramContext;
+import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils;
@@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> {
public final String mWord;
public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets;
- public final ArrayList<WeightedString> mBigrams;
+ public final ArrayList<NgramProperty> mNgrams;
// TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts;
- public final boolean mHasBigrams;
+ public final boolean mHasNgrams;
private int mHashCode = 0;
+ // TODO: Support n-gram.
@UsedForTesting
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets,
@@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = word;
mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets;
- mBigrams = bigrams;
+ mNgrams = new ArrayList<>();
+ final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
+ if (bigrams != null) {
+ for (final WeightedString bigramTarget : bigrams) {
+ mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
+ }
+ }
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
- mHasBigrams = bigrams != null && !bigrams.isEmpty();
+ mHasNgrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
}
@@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = new ArrayList<>();
- mBigrams = new ArrayList<>();
+ mNgrams = new ArrayList<>();
mIsBeginningOfSentence = isBeginningOfSentence;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts;
- mHasBigrams = hasBigram;
-
- final int bigramTargetCount = bigramTargets.size();
- for (int i = 0; i < bigramTargetCount; i++) {
- final String bigramTargetString =
+ mHasNgrams = hasBigram;
+
+ final int relatedNgramCount = bigramTargets.size();
+ final WordInfo currentWordInfo =
+ mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
+ final NgramContext ngramContext = new NgramContext(currentWordInfo);
+ for (int i = 0; i < relatedNgramCount; i++) {
+ final String ngramTargetString =
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
- mBigrams.add(new WeightedString(bigramTargetString,
- createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
+ final WeightedString ngramTarget = new WeightedString(ngramTargetString,
+ createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
+ // TODO: Support n-gram.
+ mNgrams.add(new NgramProperty(ngramTarget, ngramContext));
}
final int shortcutTargetCount = shortcutTargets.size();
@@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> {
}
}
+ // TODO: Remove
+ public ArrayList<WeightedString> getBigrams() {
+ final ArrayList<WeightedString> bigrams = new ArrayList<>();
+ for (final NgramProperty ngram : mNgrams) {
+ if (ngram.mNgramContext.getPrevWordCount() == 1) {
+ bigrams.add(ngram.mTargetWord);
+ }
+ }
+ return bigrams;
+ }
+
public int getProbability() {
return mProbabilityInfo.mProbability;
}
@@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> {
return Arrays.hashCode(new Object[] {
word.mWord,
word.mProbabilityInfo,
- word.mShortcutTargets.hashCode(),
- word.mBigrams.hashCode(),
+ word.mShortcutTargets,
+ word.mNgrams,
word.mIsNotAWord,
word.mIsBlacklistEntry
});
@@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> {
if (!(o instanceof WordProperty)) return false;
WordProperty w = (WordProperty)o;
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
- && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
+ && mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams)
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
- && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
+ && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
}
@Override
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 34f59e8bc..7e8e55990 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -67,7 +67,7 @@ public class CombinedFormatUtils {
builder.append("," + BLACKLISTED_TAG + "=true");
}
builder.append("\n");
- if (wordProperty.mShortcutTargets != null) {
+ if (wordProperty.mHasShortcuts) {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
builder.append(",");
@@ -75,8 +75,9 @@ public class CombinedFormatUtils {
builder.append("\n");
}
}
- if (wordProperty.mBigrams != null) {
- for (final WeightedString bigram : wordProperty.mBigrams) {
+ if (wordProperty.mHasNgrams) {
+ // TODO: Support ngram.
+ for (final WeightedString bigram : wordProperty.getBigrams()) {
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
builder.append(",");
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index e6f00b668..9c7792cf2 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
- assertEquals(false, wordProperty.mHasBigrams);
+ assertEquals(false, wordProperty.mHasNgrams);
assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
assertTrue(wordProperty.mShortcutTargets.isEmpty());
@@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final HashSet<String> bigramWord1s = bigrams.get(word0);
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
false /* isBeginningOfSentence */);
- assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
- for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
- final String word1 = wordProperty.mBigrams.get(j).mWord;
+ assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
+ // TODO: Support ngram.
+ for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+ final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
- assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+ assertEquals(bigramProbability, bigramTarget.getProbability());
}
}
}
@@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mProbabilityInfo.mProbability);
wordSet.remove(word0);
final HashSet<String> bigramWord1s = bigrams.get(word0);
- for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
- final String word1 = wordProperty.mBigrams.get(j).mWord;
+ // TODO: Support ngram.
+ for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+ final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
final Pair<String, String> bigram = new Pair<>(word0, word1);
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
- assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+ assertEquals(bigramProbability, bigramTarget.getProbability());
}
bigramSet.remove(bigram);
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index 406046a74..f8b68e0ce 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertTrue(shortcutList.isEmpty());
}
- for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
- final String word1 = wordProperty.mBigrams.get(j).mWord;
+ for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+ final String word1 = bigramTarget.mWord;
final Pair<String, String> bigram = new Pair<>(word0, word1);
assertTrue(bigramSet.contains(bigram));
bigramSet.remove(bigram);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index 65b84d5f7..18f4bcf5f 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
}
// Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
- if (wordProperty.mBigrams == null) {
+ if (!wordProperty.mHasNgrams) {
continue;
}
final String word0 = wordProperty.mWord;
- for (final WeightedString bigram : wordProperty.mBigrams) {
+ for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
}
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 5e8417ed6..0da915a75 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
}
// Insert bigrams into the fusion dictionary.
+ // TODO: Support ngrams.
for (final WordProperty wordProperty : wordProperties) {
- if (wordProperty.mBigrams == null) {
+ if (!wordProperty.mHasNgrams) {
continue;
}
final String word0 = wordProperty.mWord;
- for (final WeightedString bigram : wordProperty.mBigrams) {
+ for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
}
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 74da93766..401ffde6d 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
for (final WordProperty word0Property : dict) {
- if (null == word0Property.mBigrams) continue;
- for (final WeightedString word1 : word0Property.mBigrams) {
+ if (!word0Property.mHasNgrams) continue;
+ // TODO: Support ngram.
+ for (final WeightedString word1 : word0Property.getBigrams()) {
final NgramContext ngramContext =
new NgramContext(new NgramContext.WordInfo(word0Property.mWord));
if (!binaryDict.addNgramEntry(ngramContext, word1.mWord,
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 94d1ae8bb..c6818ce0c 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command {
hasDifferences = true;
}
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
- "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+ "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
"Shortcut", word0Property.mShortcutTargets,
word1PtNode.getShortcutTargets());
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 9b2567fd3..2850e1ff6 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -45,8 +45,8 @@ public class Info extends Dicttool.Command {
int whitelistCount = 0;
for (final WordProperty wordProperty : dict) {
++wordCount;
- if (null != wordProperty.mBigrams) {
- bigramCount += wordProperty.mBigrams.size();
+ if (wordProperty.mHasNgrams) {
+ bigramCount += wordProperty.mNgrams.size();
}
if (null != wordProperty.mShortcutTargets) {
shortcutCount += wordProperty.mShortcutTargets.size();
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index bdec44761..cd3ce70eb 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -353,7 +353,7 @@ public class XmlDictInputOutput {
+ "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ "\">");
- if (null != wordProperty.mShortcutTargets) {
+ if (wordProperty.mHasShortcuts) {
destination.write("\n");
for (WeightedString target : wordProperty.mShortcutTargets) {
destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
@@ -362,9 +362,9 @@ public class XmlDictInputOutput {
}
destination.write(" ");
}
- if (null != wordProperty.mBigrams) {
+ if (wordProperty.mHasNgrams) {
destination.write("\n");
- for (WeightedString bigram : wordProperty.mBigrams) {
+ for (WeightedString bigram : wordProperty.getBigrams()) {
destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n");