aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java5
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/WordProperty.java33
-rw-r--r--java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java99
-rw-r--r--tools/dicttool/Android.mk1
-rw-r--r--tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java82
5 files changed, 140 insertions, 80 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
index d6ce88d72..dafbc04b9 100644
--- a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
+++ b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java
@@ -17,7 +17,7 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.BinaryDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.util.Arrays;
@@ -57,8 +57,7 @@ public final class ProbabilityInfo {
@Override
public String toString() {
- return "f=" + mProbability + (hasHistoricalInfo() ?
- ",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
+ return CombinedFormatUtils.formatProbabilityInfo(this);
}
@Override
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index e764ae3d6..b93a0a525 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -20,6 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.CollectionUtils;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils;
import java.util.ArrayList;
@@ -52,8 +53,8 @@ public final class WordProperty implements Comparable<WordProperty> {
mBigrams = bigrams;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
- mHasBigrams = !bigrams.isEmpty();
- mHasShortcuts = !shortcutTargets.isEmpty();
+ mHasBigrams = bigrams != null && !bigrams.isEmpty();
+ mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
}
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
@@ -158,32 +159,6 @@ public final class WordProperty implements Comparable<WordProperty> {
@Override
public String toString() {
- // TODO: Move this logic to CombinedInputOutput.
- final StringBuffer builder = new StringBuffer();
- builder.append(" word=" + mWord);
- builder.append(",");
- builder.append(mProbabilityInfo.toString());
- if (mIsNotAWord) {
- builder.append(",");
- builder.append("not_a_word=true");
- }
- if (mIsBlacklistEntry) {
- builder.append(",");
- builder.append("blacklisted=true");
- }
- builder.append("\n");
- for (int i = 0; i < mBigrams.size(); i++) {
- builder.append(" bigram=" + mBigrams.get(i).mWord);
- builder.append(",");
- builder.append(mBigrams.get(i).mProbabilityInfo.toString());
- builder.append("\n");
- }
- for (int i = 0; i < mShortcutTargets.size(); i++) {
- builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
- builder.append(",");
- builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
- builder.append("\n");
- }
- return builder.toString();
+ return CombinedFormatUtils.formatWordProperty(this);
}
}
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
new file mode 100644
index 000000000..1348d5e77
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin.utils;
+
+import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.ProbabilityInfo;
+import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.util.HashMap;
+
+public class CombinedFormatUtils {
+ public static final String DICTIONARY_TAG = "dictionary";
+ public static final String BIGRAM_TAG = "bigram";
+ public static final String SHORTCUT_TAG = "shortcut";
+ public static final String PROBABILITY_TAG = "f";
+ public static final String HISTORICAL_INFO_TAG = "historicalInfo";
+ public static final String HISTORICAL_INFO_SEPARATOR = ":";
+ public static final String WORD_TAG = "word";
+ public static final String NOT_A_WORD_TAG = "not_a_word";
+ public static final String BLACKLISTED_TAG = "blacklisted";
+
+ public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
+ final StringBuilder builder = new StringBuilder();
+ builder.append(DICTIONARY_TAG + "=");
+ if (attributeMap.containsKey(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY)) {
+ builder.append(attributeMap.get(DictionaryHeader.DICTIONARY_DESCRIPTION_KEY));
+ }
+ for (final String key : attributeMap.keySet()) {
+ if (key == DictionaryHeader.DICTIONARY_DESCRIPTION_KEY) {
+ continue;
+ }
+ final String value = attributeMap.get(key);
+ builder.append("," + key + "=" + value);
+ }
+ builder.append("\n");
+ return builder.toString();
+ }
+
+ public static String formatWordProperty(final WordProperty wordProperty) {
+ final StringBuilder builder = new StringBuilder();
+ builder.append(" " + WORD_TAG + "=" + wordProperty.mWord);
+ builder.append(",");
+ builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
+ if (wordProperty.mIsNotAWord) {
+ builder.append("," + NOT_A_WORD_TAG + "=true");
+ }
+ if (wordProperty.mIsBlacklistEntry) {
+ builder.append("," + BLACKLISTED_TAG + "=true");
+ }
+ builder.append("\n");
+ if (wordProperty.mShortcutTargets != null) {
+ for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
+ builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
+ builder.append(",");
+ builder.append(formatProbabilityInfo(shortcutTarget.mProbabilityInfo));
+ builder.append("\n");
+ }
+ }
+ if (wordProperty.mBigrams != null) {
+ for (final WeightedString bigram : wordProperty.mBigrams) {
+ builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
+ builder.append(",");
+ builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
+ builder.append("\n");
+ }
+ }
+ return builder.toString();
+ }
+
+ public static String formatProbabilityInfo(final ProbabilityInfo probabilityInfo) {
+ final StringBuilder builder = new StringBuilder();
+ builder.append(PROBABILITY_TAG + "=" + probabilityInfo.mProbability);
+ if (probabilityInfo.hasHistoricalInfo()) {
+ builder.append(",");
+ builder.append(HISTORICAL_INFO_TAG + "=");
+ builder.append(probabilityInfo.mTimestamp);
+ builder.append(HISTORICAL_INFO_SEPARATOR);
+ builder.append(probabilityInfo.mLevel);
+ builder.append(HISTORICAL_INFO_SEPARATOR);
+ builder.append(probabilityInfo.mCount);
+ }
+ return builder.toString();
+ }
+}
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index 5d1a30ebd..b83ce57e3 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -43,6 +43,7 @@ USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/settings/NativeSuggestOptions.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
+ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CombinedFormatUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CoordinateUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/FileUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index eae9d9fc1..8d2f5fbbf 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
+import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import java.io.BufferedReader;
import java.io.File;
@@ -41,16 +42,10 @@ import java.util.TreeSet;
* All functions in this class are static.
*/
public class CombinedInputOutput {
-
- private static final String DICTIONARY_TAG = "dictionary";
- private static final String BIGRAM_TAG = "bigram";
- private static final String SHORTCUT_TAG = "shortcut";
- private static final String PROBABILITY_TAG = "f";
- private static final String WORD_TAG = "word";
- private static final String NOT_A_WORD_TAG = "not_a_word";
private static final String WHITELIST_TAG = "whitelist";
private static final String OPTIONS_TAG = "options";
private static final String COMMENT_LINE_STARTER = "#";
+ private static final int HISTORICAL_INFO_ELEMENT_COUNT = 3;
/**
* Basic test to find out whether the file is in the combined format or not.
@@ -68,7 +63,8 @@ public class CombinedInputOutput {
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
firstLine = reader.readLine();
}
- return firstLine.matches("^" + DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
+ return firstLine.matches(
+ "^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
@@ -123,7 +119,7 @@ public class CombinedInputOutput {
while (null != (line = reader.readLine())) {
if (line.startsWith(COMMENT_LINE_STARTER)) continue;
final String args[] = line.trim().split(",");
- if (args[0].matches(WORD_TAG + "=.*")) {
+ if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
if (null != word) {
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
@@ -136,23 +132,30 @@ public class CombinedInputOutput {
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (WORD_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
word = params[1];
- } else if (PROBABILITY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
freq = Integer.parseInt(params[1]);
- } else if (NOT_A_WORD_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ // TODO: Use parsed historical info.
+ } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
isNotAWord = "true".equals(params[1]);
}
}
- } else if (args[0].matches(SHORTCUT_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
String shortcut = null;
int shortcutFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (SHORTCUT_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.SHORTCUT_TAG.equals(params[0])) {
shortcut = params[1];
- } else if (PROBABILITY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
shortcutFreq = WHITELIST_TAG.equals(params[1])
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
: Integer.parseInt(params[1]);
@@ -163,16 +166,23 @@ public class CombinedInputOutput {
} else {
throw new RuntimeException("Wrong format : " + line);
}
- } else if (args[0].matches(BIGRAM_TAG + "=.*")) {
+ } else if (args[0].matches(CombinedFormatUtils.BIGRAM_TAG + "=.*")) {
String secondWordOfBigram = null;
int bigramFreq = 0;
for (String param : args) {
final String params[] = param.split("=", 2);
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
- if (BIGRAM_TAG.equals(params[0])) {
+ if (CombinedFormatUtils.BIGRAM_TAG.equals(params[0])) {
secondWordOfBigram = params[1];
- } else if (PROBABILITY_TAG.equals(params[0])) {
+ } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
bigramFreq = Integer.parseInt(params[1]);
+ } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
+ final String[] historicalInfoParams =
+ params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+ if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+ throw new RuntimeException("Wrong format (historical info) : " + line);
+ }
+ // TODO: Use parsed historical info.
}
}
if (null != secondWordOfBigram) {
@@ -198,40 +208,16 @@ public class CombinedInputOutput {
* @param destination a destination stream to write to.
* @param dict the dictionary to write.
*/
- public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
- throws IOException {
+ public static void writeDictionaryCombined(
+ final Writer destination, final FusionDictionary dict) throws IOException {
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
- for (WordProperty wordProperty: dict) {
+ for (final WordProperty wordProperty : dict) {
// This for ordering by frequency, then by asciibetic order
wordPropertiesInDict.add(wordProperty);
}
- final HashMap<String, String> options = dict.mOptions.mAttributes;
- destination.write(DICTIONARY_TAG + "=");
- if (options.containsKey(DICTIONARY_TAG)) {
- destination.write(options.get(DICTIONARY_TAG));
- options.remove(DICTIONARY_TAG);
- }
- for (final String key : dict.mOptions.mAttributes.keySet()) {
- final String value = dict.mOptions.mAttributes.get(key);
- destination.write("," + key + "=" + value);
- }
- destination.write("\n");
- for (WordProperty wordProperty : wordPropertiesInDict) {
- destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
- + PROBABILITY_TAG + "=" + wordProperty.getProbability()
- + (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
- if (null != wordProperty.mShortcutTargets) {
- for (WeightedString target : wordProperty.mShortcutTargets) {
- destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
- + PROBABILITY_TAG + "=" + target.getProbability() + "\n");
- }
- }
- if (null != wordProperty.mBigrams) {
- for (WeightedString bigram : wordProperty.mBigrams) {
- destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
- + PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
- }
- }
+ destination.write(CombinedFormatUtils.formatAttributeMap(dict.mOptions.mAttributes));
+ for (final WordProperty wordProperty : wordPropertiesInDict) {
+ destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
}
destination.close();
}