aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-02-06 15:13:33 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-02-06 15:13:33 +0900
commit5f5feeba13f6f1a907d90365d8037a361d0ff5da (patch)
treee3c827e288873d7157d50224bdb0064dbc54de49
parentdf1d3e733e2b000c776e74b54d3c62f0d433b013 (diff)
downloadlatinime-5f5feeba13f6f1a907d90365d8037a361d0ff5da.tar.gz
latinime-5f5feeba13f6f1a907d90365d8037a361d0ff5da.tar.xz
latinime-5f5feeba13f6f1a907d90365d8037a361d0ff5da.zip
Consolidate WordProperty and Word.
Bug: 11281877 Bug: 12810574 Change-Id: I9dc99188f80f25a8780c1860dab46e4aa80a23e5
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java2
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java2
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java16
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java20
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java32
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java24
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Word.java100
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/WordProperty.java189
-rw-r--r--java/src/com/android/inputmethod/latin/utils/WordProperty.java127
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java43
-rw-r--r--tools/dicttool/Android.mk3
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java37
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java52
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java14
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java33
-rw-r--r--tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java6
16 files changed, 341 insertions, 359 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 80a27e23f..3a5fe439b 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -26,12 +26,12 @@ import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.settings.NativeSuggestOptions;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
-import com.android.inputmethod.latin.utils.WordProperty;
import java.io.File;
import java.util.ArrayList;
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index f0dc7720d..54730e645 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -23,13 +23,13 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.FormatSpec;
+import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.utils.AsyncResultHolder;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
-import com.android.inputmethod.latin.utils.WordProperty;
import java.io.File;
import java.util.ArrayList;
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 9f2345962..782ada3f4 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -606,19 +606,21 @@ public final class BinaryDictDecoderUtils {
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) {
- for (final Word w : dict) {
- if (w.mIsBlacklistEntry) {
- newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
+ for (final WordProperty wordProperty : dict) {
+ if (wordProperty.mIsBlacklistEntry) {
+ newDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
+ wordProperty.mIsNotAWord);
} else {
- newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
+ newDict.add(wordProperty.mWord, wordProperty.getProbability(),
+ wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
}
}
- for (final Word w : dict) {
+ for (final WordProperty wordProperty : dict) {
// By construction a binary dictionary may not have bigrams pointing to
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
- for (final WeightedString bigram : w.mBigrams) {
- newDict.setBigram(w.mWord, bigram.mWord, bigram.getProbability());
+ for (final WeightedString bigram : wordProperty.mBigrams) {
+ newDict.setBigram(wordProperty.mWord, bigram.mWord, bigram.getProbability());
}
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index ef23acb71..e9561afd3 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -31,7 +31,7 @@ import java.util.LinkedList;
* A dictionary that can fusion heads and tails of words for more compression.
*/
@UsedForTesting
-public final class FusionDictionary implements Iterable<Word> {
+public final class FusionDictionary implements Iterable<WordProperty> {
private static final boolean DBG = MakedictLog.DBG;
private static int CHARACTER_NOT_FOUND_INDEX = -1;
@@ -76,8 +76,12 @@ public final class FusionDictionary implements Iterable<Word> {
public ProbabilityInfo mProbabilityInfo;
public WeightedString(final String word, final int probability) {
+ this(word, new ProbabilityInfo(probability));
+ }
+
+ public WeightedString(final String word, final ProbabilityInfo probabilityInfo) {
mWord = word;
- mProbabilityInfo = new ProbabilityInfo(probability);
+ mProbabilityInfo = probabilityInfo;
}
public int getProbability() {
@@ -90,9 +94,7 @@ public final class FusionDictionary implements Iterable<Word> {
@Override
public int hashCode() {
- return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo.mProbability,
- mProbabilityInfo.mTimestamp, mProbabilityInfo.mLevel,
- mProbabilityInfo.mCount });
+ return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo});
}
@Override
@@ -704,7 +706,7 @@ public final class FusionDictionary implements Iterable<Word> {
*
* This is purely for convenience.
*/
- public static final class DictionaryIterator implements Iterator<Word> {
+ public static final class DictionaryIterator implements Iterator<WordProperty> {
private static final class Position {
public Iterator<PtNode> pos;
public int length;
@@ -734,7 +736,7 @@ public final class FusionDictionary implements Iterable<Word> {
}
@Override
- public Word next() {
+ public WordProperty next() {
Position currentPos = mPositions.getLast();
mCurrentString.setLength(currentPos.length);
@@ -751,7 +753,7 @@ public final class FusionDictionary implements Iterable<Word> {
mPositions.addLast(currentPos);
}
if (currentPtNode.mFrequency >= 0) {
- return new Word(mCurrentString.toString(), currentPtNode.mFrequency,
+ return new WordProperty(mCurrentString.toString(), currentPtNode.mFrequency,
currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry);
}
@@ -777,7 +779,7 @@ public final class FusionDictionary implements Iterable<Word> {
* and say : for (Word w : x) {}
*/
@Override
- public Iterator<Word> iterator() {
+ public Iterator<WordProperty> iterator() {
return new DictionaryIterator(mRootNodeArray.mData);
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
index 79f924cc6..d6ce88d72 100644
--- a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
+++ b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
@@ -19,6 +19,8 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import java.util.Arrays;
+
public final class ProbabilityInfo {
public final int mProbability;
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
@@ -45,19 +47,29 @@ public final class ProbabilityInfo {
}
@Override
+ public int hashCode() {
+ if (hasHistoricalInfo()) {
+ return Arrays.hashCode(new Object[] { mProbability, mTimestamp, mLevel, mCount });
+ } else {
+ return Arrays.hashCode(new Object[] { mProbability });
+ }
+ }
+
+ @Override
public String toString() {
- return mTimestamp + ":" + mLevel + ":" + mCount;
+ return "f=" + mProbability + (hasHistoricalInfo() ?
+ ",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
}
@Override
public boolean equals(Object o) {
- if (o == this) return true;
- if (!(o instanceof ProbabilityInfo)) return false;
- final ProbabilityInfo p = (ProbabilityInfo)o;
- if (!hasHistoricalInfo() && !p.hasHistoricalInfo()) {
- return mProbability == p.mProbability;
- }
- return mProbability == p.mProbability && mTimestamp == p.mTimestamp && mLevel == p.mLevel
- && mCount == p.mCount;
- }
+ if (o == this) return true;
+ if (!(o instanceof ProbabilityInfo)) return false;
+ final ProbabilityInfo p = (ProbabilityInfo)o;
+ if (!hasHistoricalInfo() && !p.hasHistoricalInfo()) {
+ return mProbability == p.mProbability;
+ }
+ return mProbability == p.mProbability && mTimestamp == p.mTimestamp && mLevel == p.mLevel
+ && mCount == p.mCount;
+ }
} \ No newline at end of file
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index a5a613810..64edb01b2 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -69,27 +69,29 @@ public class Ver4DictEncoder implements DictEncoder {
// Somehow createEmptyDictFile returned true, but the file was not created correctly
throw new IOException("Cannot create dictionary file");
}
- for (final Word word : dict) {
+ for (final WordProperty wordProperty : dict) {
// TODO: switch to addMultipleDictionaryEntries when they support shortcuts
- if (null == word.mShortcutTargets || word.mShortcutTargets.isEmpty()) {
- binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
+ binaryDict.addUnigramWord(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */,
- word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
+ wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
+ 0 /* timestamp */);
} else {
- for (final WeightedString shortcutTarget : word.mShortcutTargets) {
- binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ binaryDict.addUnigramWord(wordProperty.mWord, wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(),
- word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
+ wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
+ 0 /* timestamp */);
}
}
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDict.flushWithGC();
}
}
- for (final Word word0 : dict) {
- if (null == word0.mBigrams) continue;
- for (final WeightedString word1 : word0.mBigrams) {
- binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.getProbability(),
+ for (final WordProperty word0Property : dict) {
+ if (null == word0Property.mBigrams) continue;
+ for (final WeightedString word1 : word0Property.mBigrams) {
+ binaryDict.addBigramWords(word0Property.mWord, word1.mWord, word1.getProbability(),
0 /* timestamp */);
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDict.flushWithGC();
diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java
deleted file mode 100644
index 0eabb7bf3..000000000
--- a/java/src/com/android/inputmethod/latin/makedict/Word.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-/**
- * Utility class for a word with a frequency.
- *
- * This is chiefly used to iterate a dictionary.
- */
-public final class Word implements Comparable<Word> {
- public final String mWord;
- public final int mFrequency;
- public final ArrayList<WeightedString> mShortcutTargets;
- public final ArrayList<WeightedString> mBigrams;
- public final boolean mIsNotAWord;
- public final boolean mIsBlacklistEntry;
-
- private int mHashCode = 0;
-
- public Word(final String word, final int frequency,
- final ArrayList<WeightedString> shortcutTargets,
- final ArrayList<WeightedString> bigrams,
- final boolean isNotAWord, final boolean isBlacklistEntry) {
- mWord = word;
- mFrequency = frequency;
- mShortcutTargets = shortcutTargets;
- mBigrams = bigrams;
- mIsNotAWord = isNotAWord;
- mIsBlacklistEntry = isBlacklistEntry;
- }
-
- private static int computeHashCode(Word word) {
- return Arrays.hashCode(new Object[] {
- word.mWord,
- word.mFrequency,
- word.mShortcutTargets.hashCode(),
- word.mBigrams.hashCode(),
- word.mIsNotAWord,
- word.mIsBlacklistEntry
- });
- }
-
- /**
- * Three-way comparison.
- *
- * A Word x is greater than a word y if x has a higher frequency. If they have the same
- * frequency, they are sorted in lexicographic order.
- */
- @Override
- public int compareTo(Word w) {
- if (mFrequency < w.mFrequency) return 1;
- if (mFrequency > w.mFrequency) return -1;
- return mWord.compareTo(w.mWord);
- }
-
- /**
- * Equality test.
- *
- * Words are equal if they have the same frequency, the same spellings, and the same
- * attributes.
- */
- @Override
- public boolean equals(Object o) {
- if (o == this) return true;
- if (!(o instanceof Word)) return false;
- Word w = (Word)o;
- return mFrequency == w.mFrequency && mWord.equals(w.mWord)
- && mShortcutTargets.equals(w.mShortcutTargets)
- && mBigrams.equals(w.mBigrams)
- && mIsNotAWord == w.mIsNotAWord
- && mIsBlacklistEntry == w.mIsBlacklistEntry;
- }
-
- @Override
- public int hashCode() {
- if (mHashCode == 0) {
- mHashCode = computeHashCode(this);
- }
- return mHashCode;
- }
-}
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
new file mode 100644
index 000000000..e764ae3d6
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CollectionUtils;
+import com.android.inputmethod.latin.utils.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * Utility class for a word with a probability.
+ *
+ * This is chiefly used to iterate a dictionary.
+ */
+public final class WordProperty implements Comparable<WordProperty> {
+ public final String mWord;
+ public final ProbabilityInfo mProbabilityInfo;
+ public final ArrayList<WeightedString> mShortcutTargets;
+ public final ArrayList<WeightedString> mBigrams;
+ public final boolean mIsNotAWord;
+ public final boolean mIsBlacklistEntry;
+ public final boolean mHasShortcuts;
+ public final boolean mHasBigrams;
+
+ private int mHashCode = 0;
+
+ public WordProperty(final String word, final int probability,
+ final ArrayList<WeightedString> shortcutTargets,
+ final ArrayList<WeightedString> bigrams,
+ final boolean isNotAWord, final boolean isBlacklistEntry) {
+ mWord = word;
+ mProbabilityInfo = new ProbabilityInfo(probability);
+ mShortcutTargets = shortcutTargets;
+ mBigrams = bigrams;
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklistEntry;
+ mHasBigrams = !bigrams.isEmpty();
+ mHasShortcuts = !shortcutTargets.isEmpty();
+ }
+
+ private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
+ return new ProbabilityInfo(
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
+ }
+
+ // Construct word property using information from native code.
+ // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
+ public WordProperty(final int[] codePoints, final boolean isNotAWord,
+ final boolean isBlacklisted, final boolean hasBigram,
+ final boolean hasShortcuts, final int[] probabilityInfo,
+ final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
+ final ArrayList<int[]> shortcutTargets,
+ final ArrayList<Integer> shortcutProbabilities) {
+ mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
+ mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
+ mShortcutTargets = CollectionUtils.newArrayList();
+ mBigrams = CollectionUtils.newArrayList();
+ mIsNotAWord = isNotAWord;
+ mIsBlacklistEntry = isBlacklisted;
+ mHasShortcuts = hasShortcuts;
+ mHasBigrams = hasBigram;
+
+ final int bigramTargetCount = bigramTargets.size();
+ for (int i = 0; i < bigramTargetCount; i++) {
+ final String bigramTargetString =
+ StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
+ mBigrams.add(new WeightedString(bigramTargetString,
+ createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
+ }
+
+ final int shortcutTargetCount = shortcutTargets.size();
+ for (int i = 0; i < shortcutTargetCount; i++) {
+ final String shortcutTargetString =
+ StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
+ mShortcutTargets.add(
+ new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
+ }
+ }
+
+ public int getProbability() {
+ return mProbabilityInfo.mProbability;
+ }
+
+ private static int computeHashCode(WordProperty word) {
+ return Arrays.hashCode(new Object[] {
+ word.mWord,
+ word.mProbabilityInfo,
+ word.mShortcutTargets.hashCode(),
+ word.mBigrams.hashCode(),
+ word.mIsNotAWord,
+ word.mIsBlacklistEntry
+ });
+ }
+
+ /**
+ * Three-way comparison.
+ *
+ * A Word x is greater than a word y if x has a higher frequency. If they have the same
+ * frequency, they are sorted in lexicographic order.
+ */
+ @Override
+ public int compareTo(final WordProperty w) {
+ if (getProbability() < w.getProbability()) return 1;
+ if (getProbability() > w.getProbability()) return -1;
+ return mWord.compareTo(w.mWord);
+ }
+
+ /**
+ * Equality test.
+ *
+ * Words are equal if they have the same frequency, the same spellings, and the same
+ * attributes.
+ */
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if (!(o instanceof WordProperty)) return false;
+ WordProperty w = (WordProperty)o;
+ return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
+ && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
+ && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
+ && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
+ }
+
+ @Override
+ public int hashCode() {
+ if (mHashCode == 0) {
+ mHashCode = computeHashCode(this);
+ }
+ return mHashCode;
+ }
+
+ @UsedForTesting
+ public boolean isValid() {
+ return getProbability() != BinaryDictionary.NOT_A_PROBABILITY;
+ }
+
+ @Override
+ public String toString() {
+ // TODO: Move this logic to CombinedInputOutput.
+ final StringBuffer builder = new StringBuffer();
+ builder.append(" word=" + mWord);
+ builder.append(",");
+ builder.append(mProbabilityInfo.toString());
+ if (mIsNotAWord) {
+ builder.append(",");
+ builder.append("not_a_word=true");
+ }
+ if (mIsBlacklistEntry) {
+ builder.append(",");
+ builder.append("blacklisted=true");
+ }
+ builder.append("\n");
+ for (int i = 0; i < mBigrams.size(); i++) {
+ builder.append(" bigram=" + mBigrams.get(i).mWord);
+ builder.append(",");
+ builder.append(mBigrams.get(i).mProbabilityInfo.toString());
+ builder.append("\n");
+ }
+ for (int i = 0; i < mShortcutTargets.size(); i++) {
+ builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
+ builder.append(",");
+ builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
+ builder.append("\n");
+ }
+ return builder.toString();
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
deleted file mode 100644
index 37d1102e3..000000000
--- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package com.android.inputmethod.latin.utils;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.BinaryDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.ProbabilityInfo;
-
-import java.util.ArrayList;
-
-// This has information that belong to a unigram. This class has some detailed attributes such as
-// historical information but they have to be checked only for testing purpose.
-@UsedForTesting
-public class WordProperty {
- public final String mCodePoints;
- public final boolean mIsNotAWord;
- public final boolean mIsBlacklisted;
- public final boolean mHasBigrams;
- public final boolean mHasShortcuts;
- public final ProbabilityInfo mProbabilityInfo;
- public final ArrayList<WeightedString> mBigramTargets = CollectionUtils.newArrayList();
- public final ArrayList<ProbabilityInfo> mBigramProbabilityInfo = CollectionUtils.newArrayList();
- public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
-
- private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
- return new ProbabilityInfo(
- probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
- probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
- probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
- probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
- }
-
- // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
- public WordProperty(final int[] codePoints, final boolean isNotAWord,
- final boolean isBlacklisted, final boolean hasBigram,
- final boolean hasShortcuts, final int[] probabilityInfo,
- final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
- final ArrayList<int[]> shortcutTargets,
- final ArrayList<Integer> shortcutProbabilities) {
- mCodePoints = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
- mIsNotAWord = isNotAWord;
- mIsBlacklisted = isBlacklisted;
- mHasBigrams = hasBigram;
- mHasShortcuts = hasShortcuts;
- mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
-
- final int bigramTargetCount = bigramTargets.size();
- for (int i = 0; i < bigramTargetCount; i++) {
- final String bigramTargetString =
- StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
- final ProbabilityInfo bigramProbability =
- createProbabilityInfoFromArray(bigramProbabilityInfo.get(i));
- mBigramTargets.add(
- new WeightedString(bigramTargetString, bigramProbability.mProbability));
- mBigramProbabilityInfo.add(bigramProbability);
- }
-
- final int shortcutTargetCount = shortcutTargets.size();
- for (int i = 0; i < shortcutTargetCount; i++) {
- final String shortcutTargetString =
- StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
- mShortcutTargets.add(
- new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
- }
- }
-
- @UsedForTesting
- public boolean isValid() {
- return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
- }
-
- @Override
- public String toString() {
- // TODO: Move this logic to CombinedInputOutput.
- final StringBuffer builder = new StringBuffer();
- builder.append(" word=" + mCodePoints);
- builder.append(",");
- builder.append("f=" + mProbabilityInfo.mProbability);
- if (mIsNotAWord) {
- builder.append(",");
- builder.append("not_a_word=true");
- }
- if (mIsBlacklisted) {
- builder.append(",");
- builder.append("blacklisted=true");
- }
- if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
- builder.append(",");
- builder.append("historicalInfo=" + mProbabilityInfo);
- }
- builder.append("\n");
- for (int i = 0; i < mBigramTargets.size(); i++) {
- builder.append(" bigram=" + mBigramTargets.get(i).mWord);
- builder.append(",");
- builder.append("f=" + mBigramTargets.get(i).getProbability());
- if (mBigramProbabilityInfo.get(i).mTimestamp
- != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
- builder.append(",");
- builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
- }
- builder.append("\n");
- }
- for (int i = 0; i < mShortcutTargets.size(); i++) {
- builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
- builder.append(",");
- builder.append("f=" + mShortcutTargets.get(i).getProbability());
- builder.append("\n");
- }
- return builder.toString();
- }
-} \ No newline at end of file
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 3e42f3423..324bad6c4 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -24,9 +24,9 @@ import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.makedict.WordProperty;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
-import com.android.inputmethod.latin.utils.WordProperty;
import java.io.File;
import java.io.IOException;
@@ -916,15 +916,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
words.add(word);
wordProbabilities.put(word, unigramProbability);
- final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
- assertEquals(word, unigramProperty.mCodePoints);
- assertTrue(unigramProperty.isValid());
- assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
- assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
- assertEquals(false, unigramProperty.mHasBigrams);
- assertEquals(false, unigramProperty.mHasShortcuts);
- assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability);
- assertTrue(unigramProperty.mShortcutTargets.isEmpty());
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
+ assertEquals(word, wordProperty.mWord);
+ assertTrue(wordProperty.isValid());
+ assertEquals(isNotAWord, wordProperty.mIsNotAWord);
+ assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
+ assertEquals(false, wordProperty.mHasBigrams);
+ assertEquals(false, wordProperty.mHasShortcuts);
+ assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
+ assertTrue(wordProperty.mShortcutTargets.isEmpty());
}
for (int i = 0; i < BIGRAM_COUNT; i++) {
@@ -955,18 +955,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
continue;
}
final HashSet<String> bigramWord1s = bigrams.get(word0);
- final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0);
- assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size());
- assertEquals(unigramProperty.mBigramTargets.size(),
- unigramProperty.mBigramProbabilityInfo.size());
- for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
- final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
+ final WordProperty wordProperty = binaryDictionary.getWordProperty(word0);
+ assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
+ for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
+ final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
- final int probability = unigramProperty.mBigramTargets.get(j).getProbability();
+ final int probability = wordProperty.mBigrams.get(j).getProbability();
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
probability);
- assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
- probability);
+ assertEquals(wordProperty.mBigrams.get(j).getProbability(), probability);
}
}
}
@@ -1045,15 +1042,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final BinaryDictionary.GetNextWordPropertyResult result =
binaryDictionary.getNextWordProperty(token);
final WordProperty wordProperty = result.mWordProperty;
- final String word0 = wordProperty.mCodePoints;
+ final String word0 = wordProperty.mWord;
assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
wordProperty.mProbabilityInfo.mProbability);
wordSet.remove(word0);
final HashSet<String> bigramWord1s = bigrams.get(word0);
- for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
- final String word1 = wordProperty.mBigramTargets.get(j).mWord;
+ for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
+ final String word1 = wordProperty.mBigrams.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
- final int probability = wordProperty.mBigramTargets.get(j).getProbability();
+ final int probability = wordProperty.mBigrams.get(j).getProbability();
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
bigramSet.remove(bigram);
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index adfb920dd..5d1a30ebd 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -48,8 +48,7 @@ USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/LocaleUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ResizableIntArray.java \
- $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java \
- $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/WordProperty.java
+ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
$(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin/makedict/
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index b9840607a..eae9d9fc1 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -21,7 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.io.BufferedReader;
import java.io.File;
@@ -45,7 +45,7 @@ public class CombinedInputOutput {
private static final String DICTIONARY_TAG = "dictionary";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_TAG = "f";
+ private static final String PROBABILITY_TAG = "f";
private static final String WORD_TAG = "word";
private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist";
@@ -138,7 +138,7 @@ public class CombinedInputOutput {
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (WORD_TAG.equals(params[0])) {
word = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (PROBABILITY_TAG.equals(params[0])) {
freq = Integer.parseInt(params[1]);
} else if (NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]);
@@ -152,7 +152,7 @@ public class CombinedInputOutput {
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]);
@@ -171,7 +171,7 @@ public class CombinedInputOutput {
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
if (BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1];
- } else if (FREQUENCY_TAG.equals(params[0])) {
+ } else if (PROBABILITY_TAG.equals(params[0])) {
bigramFreq = Integer.parseInt(params[1]);
}
}
@@ -200,9 +200,10 @@ public class CombinedInputOutput {
*/
public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word); // This for ordering by frequency, then by asciibetic order
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (WordProperty wordProperty: dict) {
+ // This for ordering by frequency, then by asciibetic order
+ wordPropertiesInDict.add(wordProperty);
}
final HashMap<String, String> options = dict.mOptions.mAttributes;
destination.write(DICTIONARY_TAG + "=");
@@ -215,20 +216,20 @@ public class CombinedInputOutput {
destination.write("," + key + "=" + value);
}
destination.write("\n");
- for (Word word : set) {
- destination.write(" " + WORD_TAG + "=" + word.mWord + ","
- + FREQUENCY_TAG + "=" + word.mFrequency
- + (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
- if (null != word.mShortcutTargets) {
- for (WeightedString target : word.mShortcutTargets) {
+ for (WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
+ + PROBABILITY_TAG + "=" + wordProperty.getProbability()
+ + (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
+ if (null != wordProperty.mShortcutTargets) {
+ for (WeightedString target : wordProperty.mShortcutTargets) {
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
- + FREQUENCY_TAG + "=" + target.getProbability() + "\n");
+ + PROBABILITY_TAG + "=" + target.getProbability() + "\n");
}
}
- if (null != word.mBigrams) {
- for (WeightedString bigram : word.mBigrams) {
+ if (null != wordProperty.mBigrams) {
+ for (WeightedString bigram : wordProperty.mBigrams) {
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
- + FREQUENCY_TAG + "=" + bigram.getProbability() + "\n");
+ + PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
}
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index c9f6bd508..9947608ea 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -108,42 +108,46 @@ public class Diff extends Dicttool.Command {
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
boolean hasDifferences = false;
- for (final Word word0 : dict0) {
- final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
- word0.mWord);
- if (null == word1) {
+ for (final WordProperty word0Property : dict0) {
+ final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
+ word0Property.mWord);
+ if (null == word1PtNode) {
// This word is not in dict1
- System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
+ System.out.println("Deleted: " + word0Property.mWord + " "
+ + word0Property.getProbability());
hasDifferences = true;
} else {
// We found the word. Compare frequencies, shortcuts, bigrams
- if (word0.mFrequency != word1.getFrequency()) {
- System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
- + " -> " + word1.getFrequency());
+ if (word0Property.getProbability() != word1PtNode.getFrequency()) {
+ System.out.println("Probability changed: " + word0Property.mWord + " "
+ + word0Property.getProbability() + " -> " + word1PtNode.getFrequency());
hasDifferences = true;
}
- if (word0.mIsNotAWord != word1.getIsNotAWord()) {
- System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
- + " -> " + word1.getIsNotAWord());
+ if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
+ System.out.println("Not a word: " + word0Property.mWord + " "
+ + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
hasDifferences = true;
}
- if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
- System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
- + " -> " + word1.getIsBlacklistEntry());
+ if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
+ System.out.println("Blacklist: " + word0Property.mWord + " "
+ + word0Property.mIsBlacklistEntry + " -> "
+ + word1PtNode.getIsBlacklistEntry());
hasDifferences = true;
}
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Bigram", word0.mBigrams, word1.getBigrams());
- hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
- "Shortcut", word0.mShortcutTargets, word1.getShortcutTargets());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+ hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
+ "Shortcut", word0Property.mShortcutTargets,
+ word1PtNode.getShortcutTargets());
}
}
- for (final Word word1 : dict1) {
- final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
- word1.mWord);
- if (null == word0) {
+ for (final WordProperty word1Property : dict1) {
+ final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
+ word1Property.mWord);
+ if (null == word0PtNode) {
// This word is not in dict0
- System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
+ System.out.println("Added: " + word1Property.mWord + " "
+ + word1Property.getProbability());
hasDifferences = true;
}
}
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 8f17fcd94..c1eb0f8e7 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.util.Arrays;
import java.util.ArrayList;
@@ -43,14 +43,14 @@ public class Info extends Dicttool.Command {
int bigramCount = 0;
int shortcutCount = 0;
int whitelistCount = 0;
- for (final Word w : dict) {
+ for (final WordProperty wordProperty : dict) {
++wordCount;
- if (null != w.mBigrams) {
- bigramCount += w.mBigrams.size();
+ if (null != wordProperty.mBigrams) {
+ bigramCount += wordProperty.mBigrams.size();
}
- if (null != w.mShortcutTargets) {
- shortcutCount += w.mShortcutTargets.size();
- for (WeightedString shortcutTarget : w.mShortcutTargets) {
+ if (null != wordProperty.mShortcutTargets) {
+ shortcutCount += wordProperty.mShortcutTargets.size();
+ for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
== shortcutTarget.getProbability()) {
++whitelistCount;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index cdc487b16..c6c60b8e2 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import java.io.BufferedReader;
import java.io.File;
@@ -52,7 +52,7 @@ public class XmlDictInputOutput {
private static final String WORD_TAG = "w";
private static final String BIGRAM_TAG = "bigram";
private static final String SHORTCUT_TAG = "shortcut";
- private static final String FREQUENCY_ATTR = "f";
+ private static final String PROBABILITY_ATTR = "f";
private static final String WORD_ATTR = "word";
private static final String NOT_A_WORD_ATTR = "not_a_word";
@@ -107,7 +107,7 @@ public class XmlDictInputOutput {
mWord = "";
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex);
- if (FREQUENCY_ATTR.equals(attrName)) {
+ if (PROBABILITY_ATTR.equals(attrName)) {
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
}
}
@@ -348,9 +348,9 @@ public class XmlDictInputOutput {
*/
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
throws IOException {
- final TreeSet<Word> set = new TreeSet<Word>();
- for (Word word : dict) {
- set.add(word);
+ final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
+ for (WordProperty wordProperty : dict) {
+ wordPropertiesInDict.add(wordProperty);
}
// TODO: use an XMLSerializer if this gets big
destination.write("<wordlist format=\"2\"");
@@ -361,23 +361,24 @@ public class XmlDictInputOutput {
}
destination.write(">\n");
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
- for (Word word : set) {
- destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
- + FREQUENCY_ATTR + "=\"" + word.mFrequency
- + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
- if (null != word.mShortcutTargets) {
+ for (WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
+ + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ + "\">");
+ if (null != wordProperty.mShortcutTargets) {
destination.write("\n");
- for (WeightedString target : word.mShortcutTargets) {
- destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
+ for (WeightedString target : wordProperty.mShortcutTargets) {
+ destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ ">\n");
}
destination.write(" ");
}
- if (null != word.mBigrams) {
+ if (null != wordProperty.mBigrams) {
destination.write("\n");
- for (WeightedString bigram : word.mBigrams) {
- destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
+ for (WeightedString bigram : wordProperty.mBigrams) {
+ destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n");
}
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
index 76dadc25c..191546433 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java
@@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.Word;
+import com.android.inputmethod.latin.makedict.WordProperty;
import junit.framework.TestCase;
@@ -87,8 +87,8 @@ public class FusionDictionaryTest extends TestCase {
}
private void dumpDict(final FusionDictionary dict) {
- for (Word w : dict) {
- System.out.println("Word " + dumpWord(w.mWord));
+ for (WordProperty wordProperty : dict) {
+ System.out.println("Word " + dumpWord(wordProperty.mWord));
}
}