aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/utils
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/utils')
-rw-r--r--java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java6
-rw-r--r--java/src/com/android/inputmethod/latin/utils/StringUtils.java20
-rw-r--r--java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java181
-rw-r--r--java/src/com/android/inputmethod/latin/utils/WordProperty.java89
4 files changed, 76 insertions, 220 deletions
diff --git a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java
index 306735779..a15556511 100644
--- a/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/DictionaryInfoUtils.java
@@ -29,7 +29,7 @@ import com.android.inputmethod.latin.BinaryDictionaryGetter;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.R;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import java.io.File;
@@ -282,7 +282,7 @@ public class DictionaryInfoUtils {
BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
}
- public static FileHeader getDictionaryFileHeaderOrNull(final File file) {
+ public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) {
return BinaryDictIOUtils.getDictionaryFileHeaderOrNull(file, 0, file.length());
}
@@ -294,7 +294,7 @@ public class DictionaryInfoUtils {
*/
private static DictionaryInfo createDictionaryInfoFromFileAddress(
final AssetFileAddress fileAddress) {
- final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull(
+ final DictionaryHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull(
new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength);
if (header == null) {
return null;
diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
index c632a71a9..e7932b5a6 100644
--- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
@@ -46,7 +46,7 @@ public final class StringUtils {
public static String newSingleCodePointString(int codePoint) {
if (Character.charCount(codePoint) == 1) {
- // Optimization: avoid creating an temporary array for characters that are
+ // Optimization: avoid creating a temporary array for characters that are
// represented by a single char value
return String.valueOf((char) codePoint);
}
@@ -205,6 +205,24 @@ public final class StringUtils {
return codePoints;
}
+ /**
+ * Construct a String from a code point array
+ *
+ * @param codePoints a code point array that is null terminated when its logical length is
+ * shorter than the array length.
+ * @return a string constructed from the code point array.
+ */
+ public static String getStringFromNullTerminatedCodePointArray(final int[] codePoints) {
+ int stringLength = codePoints.length;
+ for (int i = 0; i < codePoints.length; i++) {
+ if (codePoints[i] == 0) {
+ stringLength = i;
+ break;
+ }
+ }
+ return new String(codePoints, 0 /* offset */, stringLength);
+ }
+
// This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE.
public static int getCapitalizationType(final String text) {
// If the first char is not uppercase, then the word is either all lower case or
diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
deleted file mode 100644
index 7af03da59..000000000
--- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.utils;
-
-import android.util.Log;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
-import com.android.inputmethod.latin.makedict.DictDecoder;
-import com.android.inputmethod.latin.makedict.DictEncoder;
-import com.android.inputmethod.latin.makedict.FormatSpec;
-import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.PendingAttribute;
-import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Reads and writes Binary files for a UserHistoryDictionary.
- *
- * All the methods in this class are static.
- */
-public final class UserHistoryDictIOUtils {
- private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
- private static final boolean DEBUG = false;
-
- public interface OnAddWordListener {
- /**
- * Callback to be notified when a word is added to the dictionary.
- * @param word The added word.
- * @param shortcutTarget A shortcut target for this word, or null if none.
- * @param frequency The frequency for this word.
- * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
- * Unspecified if shortcutTarget is null - do not rely on its value.
- */
- public void setUnigram(final String word, final String shortcutTarget, final int frequency,
- final int shortcutFreq);
- public void setBigram(final String word1, final String word2, final int frequency);
- }
-
- @UsedForTesting
- public interface BigramDictionaryInterface {
- public int getFrequency(final String word1, final String word2);
- }
-
- /**
- * Writes dictionary to file.
- */
- @UsedForTesting
- public static void writeDictionary(final DictEncoder dictEncoder,
- final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
- final FormatOptions formatOptions, final HashMap<String, String> options) {
- final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options);
- fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY,
- FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
- fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY,
- String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
- try {
- dictEncoder.writeDictionary(fusionDict, formatOptions);
- Log.d(TAG, "end writing");
- } catch (IOException e) {
- Log.e(TAG, "IO exception while writing file", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Unsupported format", e);
- }
- }
-
- /**
- * Constructs a new FusionDictionary from BigramDictionaryInterface.
- */
- @UsedForTesting
- static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
- final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
- final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
- new FusionDictionary.DictionaryOptions(options));
- int profTotal = 0;
- for (final String word1 : bigrams.keySet()) {
- final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
- for (final String word2 : word1Bigrams.keySet()) {
- final int freq = dict.getFrequency(word1, word2);
- if (freq == -1) {
- // don't add this bigram.
- continue;
- }
- if (DEBUG) {
- if (word1 == null) {
- Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
- } else {
- Log.d(TAG, "add bigram: " + word1
- + "," + word2 + "," + Integer.toString(freq));
- }
- profTotal++;
- }
- if (word1 == null) { // unigram
- fusionDict.add(word2, freq, null, false /* isNotAWord */);
- } else { // bigram
- if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
- fusionDict.add(word1, 2, null, false /* isNotAWord */);
- }
- fusionDict.setBigram(word1, word2, freq);
- }
- bigrams.updateBigram(word1, word2, (byte)freq);
- }
- }
- if (DEBUG) {
- Log.d(TAG, "add " + profTotal + "words");
- }
- return fusionDict;
- }
-
- /**
- * Reads dictionary from file.
- */
- public static void readDictionaryBinary(final DictDecoder dictDecoder,
- final OnAddWordListener dict) {
- final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
- final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
- final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
- try {
- dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
- } catch (IOException e) {
- Log.e(TAG, "IO exception while reading file", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Unsupported format", e);
- } catch (ArrayIndexOutOfBoundsException e) {
- Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
- }
- addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
- }
-
- /**
- * Adds all unigrams and bigrams in maps to OnAddWordListener.
- */
- @UsedForTesting
- static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
- final TreeMap<Integer, Integer> frequencies,
- final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
- final OnAddWordListener to) {
- for (Entry<Integer, String> entry : unigrams.entrySet()) {
- final String word1 = entry.getValue();
- final int unigramFrequency = frequencies.get(entry.getKey());
- to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
- final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
- if (attrList != null) {
- for (final PendingAttribute attr : attrList) {
- final String word2 = unigrams.get(attr.mAddress);
- if (word1 == null || word2 == null) {
- Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
- continue;
- }
- to.setBigram(word1, word2,
- BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
- attr.mFrequency));
- }
- }
- }
-
- }
-}
diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
index ba9b114b0..da56b213f 100644
--- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java
@@ -20,6 +20,7 @@ package com.android.inputmethod.latin.utils;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import java.util.ArrayList;
@@ -37,32 +38,12 @@ public class WordProperty {
public final ArrayList<ProbabilityInfo> mBigramProbabilityInfo = CollectionUtils.newArrayList();
public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
- // TODO: Use this kind of Probability class for dictionary read/write code under the makedict
- // package.
- public static final class ProbabilityInfo {
- public final int mProbability;
- // wTimestamp, mLevel and mCount are historical info. These values are depend on the
- // implementation in native code; thus, we must not use them and have any assumptions about
- // them except for tests.
- public final int mTimestamp;
- public final int mLevel;
- public final int mCount;
-
- public ProbabilityInfo(final int[] probabilityInfo) {
- mProbability = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX];
- mTimestamp = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX];
- mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
- mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
- }
- }
-
- private static int getCodePointCount(final int[] codePoints) {
- for (int i = 0; i < codePoints.length; i++) {
- if (codePoints[i] == 0) {
- return i;
- }
- }
- return codePoints.length;
+ private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
+ return new ProbabilityInfo(
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
+ probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
}
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
@@ -72,20 +53,19 @@ public class WordProperty {
final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
final ArrayList<int[]> shortcutTargets,
final ArrayList<Integer> shortcutProbabilities) {
- mCodePoints = new String(codePoints, 0 /* offset */, getCodePointCount(codePoints));
+ mCodePoints = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
mIsNotAWord = isNotAWord;
mIsBlacklisted = isBlacklisted;
mHasBigrams = hasBigram;
mHasShortcuts = hasShortcuts;
- mProbabilityInfo = new ProbabilityInfo(probabilityInfo);
+ mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
final int bigramTargetCount = bigramTargets.size();
for (int i = 0; i < bigramTargetCount; i++) {
- final int[] bigramTargetCodePointArray = bigramTargets.get(i);
- final String bigramTargetString = new String(bigramTargetCodePointArray,
- 0 /* offset */, getCodePointCount(bigramTargetCodePointArray));
+ final String bigramTargetString =
+ StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
final ProbabilityInfo bigramProbability =
- new ProbabilityInfo(bigramProbabilityInfo.get(i));
+ createProbabilityInfoFromArray(bigramProbabilityInfo.get(i));
mBigramTargets.add(
new WeightedString(bigramTargetString, bigramProbability.mProbability));
mBigramProbabilityInfo.add(bigramProbability);
@@ -93,9 +73,8 @@ public class WordProperty {
final int shortcutTargetCount = shortcutTargets.size();
for (int i = 0; i < shortcutTargetCount; i++) {
- final int[] shortcutTargetCodePointArray = shortcutTargets.get(i);
- final String shortcutTargetString = new String(shortcutTargetCodePointArray,
- 0 /* offset */, getCodePointCount(shortcutTargetCodePointArray));
+ final String shortcutTargetString =
+ StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
mShortcutTargets.add(
new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
}
@@ -105,4 +84,44 @@ public class WordProperty {
public boolean isValid() {
return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
}
+
+ @Override
+ public String toString() {
+ // TODO: Move this logic to CombinedInputOutput.
+ final StringBuffer builder = new StringBuffer();
+ builder.append(" word=" + mCodePoints);
+ builder.append(",");
+ builder.append("f=" + mProbabilityInfo.mProbability);
+ if (mIsNotAWord) {
+ builder.append(",");
+ builder.append("not_a_word=true");
+ }
+ if (mIsBlacklisted) {
+ builder.append(",");
+ builder.append("blacklisted=true");
+ }
+ if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
+ builder.append(",");
+ builder.append("historicalInfo=" + mProbabilityInfo);
+ }
+ builder.append("\n");
+ for (int i = 0; i < mBigramTargets.size(); i++) {
+ builder.append(" bigram=" + mBigramTargets.get(i).mWord);
+ builder.append(",");
+ builder.append("f=" + mBigramTargets.get(i).mFrequency);
+ if (mBigramProbabilityInfo.get(i).mTimestamp
+ != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
+ builder.append(",");
+ builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
+ }
+ builder.append("\n");
+ }
+ for (int i = 0; i < mShortcutTargets.size(); i++) {
+ builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
+ builder.append(",");
+ builder.append("f=" + mShortcutTargets.get(i).mFrequency);
+ builder.append("\n");
+ }
+ return builder.toString();
+ }
} \ No newline at end of file