25 files changed, 132 insertions, 552 deletions
diff --git a/Android.mk b/Android.mk
index aa869112c..17eeba8f0 100644
--- a/Android.mk
+++ b/Android.mk
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-subdirs := native java tests tools
+subdirs := common native java tests tools
 include $(call all-named-subdir-makefiles, $(subdirs))
diff --git a/common/Android.mk b/common/Android.mk
new file mode 100644
index 000000000..99aed4c5d
--- /dev/null
+++ b/common/Android.mk
@@ -0,0 +1,26 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := $(call all-java-files-under, src)
+LOCAL_MODULE := latinime-common
+LOCAL_SDK_VERSION := 21
+include $(BUILD_STATIC_JAVA_LIBRARY)
+
+# Also build a host side library
+include $(CLEAR_VARS)
+LOCAL_MODULE := latinime-common-host
+LOCAL_SRC_FILES := $(call all-java-files-under, src)
+include $(BUILD_HOST_JAVA_LIBRARY)
diff --git a/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java b/common/src/com/android/inputmethod/latin/common/CodePointUtils.java
index a270ee774..38aba7bd2 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/CodePointUtils.java
+++ b/common/src/com/android/inputmethod/latin/common/CodePointUtils.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.inputmethod.latin.makedict;
+package com.android.inputmethod.latin.common;
 
 import java.util.Random;
 
diff --git a/java/Android.mk b/java/Android.mk
index 0d12c45fe..a2c5697d3 100644
--- a/java/Android.mk
+++ b/java/Android.mk
@@ -25,7 +25,8 @@ LOCAL_CERTIFICATE := shared
 
 LOCAL_JNI_SHARED_LIBRARIES := libjni_latinime
 
-LOCAL_STATIC_JAVA_LIBRARIES := android-common inputmethod-common android-support-v4 jsr305
+LOCAL_STATIC_JAVA_LIBRARIES := \
+        android-common inputmethod-common android-support-v4 jsr305 latinime-common
 
 # Do not compress dictionary files to mmap dict data runtime
 LOCAL_AAPT_FLAGS := -0 .dict
diff --git a/java/res/values-ca/strings.xml b/java/res/values-ca/strings.xml
index da93a58cf..17615e538 100644
--- a/java/res/values-ca/strings.xml
+++ b/java/res/values-ca/strings.xml
@@ -78,7 +78,7 @@
     <string name="voice_input_disabled_summary" msgid="8141750303464726129">"No hi ha cap mètode d\'introducció activat. Comprova la configuració d\'Idioma i introducció de text."</string>
     <string name="configure_input_method" msgid="373356270290742459">"Configura mètodes d\'entrada"</string>
     <string name="language_selection_title" msgid="3666971864764478269">"Idiomes"</string>
-    <string name="help_and_feedback" msgid="5328219371839879161">"Ajuda i opinió"</string>
+    <string name="help_and_feedback" msgid="5328219371839879161">"Ajuda i suggeriments"</string>
     <string name="select_language" msgid="5709487854987078367">"Idiomes"</string>
     <string name="hint_add_to_dictionary" msgid="573678656946085380">"Torna a tocar per desar"</string>
     <string name="hint_add_to_dictionary_without_word" msgid="3040385779511255101">"Toca aquí per desar."</string>
diff --git a/java/res/values-fi/strings.xml b/java/res/values-fi/strings.xml
index 3739cdf79..b8fcd8131 100644
--- a/java/res/values-fi/strings.xml
+++ b/java/res/values-fi/strings.xml
@@ -74,7 +74,7 @@
     <string name="gesture_floating_preview_text_summary" msgid="4472696213996203533">"Näytä ehdotettu sana piirron aikana"</string>
     <string name="gesture_space_aware" msgid="2078291600664682496">"Ilmausele"</string>
     <string name="gesture_space_aware_summary" msgid="4371385818348528538">"Lisää välilyöntejä eleiden aikana liukumalla välilyöntinäppäim."</string>
-    <string name="voice_input" msgid="3583258583521397548">"Äänisyöteavain"</string>
+    <string name="voice_input" msgid="3583258583521397548">"Äänisyötenäppäin"</string>
     <string name="voice_input_disabled_summary" msgid="8141750303464726129">"Äänen syöttötapoja ei ole otettu käyttöön. Tarkista Kieli ja syöttötapa -asetukset."</string>
     <string name="configure_input_method" msgid="373356270290742459">"Määritä syöttötavat"</string>
     <string name="language_selection_title" msgid="3666971864764478269">"Kielet"</string>
diff --git a/java/res/values-vi/strings.xml b/java/res/values-vi/strings.xml
index 3c803a366..1f9590ef1 100644
--- a/java/res/values-vi/strings.xml
+++ b/java/res/values-vi/strings.xml
@@ -25,7 +25,7 @@
     <string name="use_contacts_for_spellchecking_option_summary" msgid="8754413382543307713">"Trình kiểm tra chính tả sử dụng các mục nhập từ danh sách liên hệ của bạn"</string>
     <string name="vibrate_on_keypress" msgid="5258079494276955460">"Rung khi nhấn phím"</string>
     <string name="sound_on_keypress" msgid="6093592297198243644">"Âm thanh khi nhấn phím"</string>
-    <string name="popup_on_keypress" msgid="123894815723512944">"Cửa sổ bật lên khi nhấn phím"</string>
+    <string name="popup_on_keypress" msgid="123894815723512944">"Bật lên khi nhấn phím"</string>
     <string name="settings_screen_preferences" msgid="2696713156722014624">"Tùy chọn"</string>
     <string name="settings_screen_accounts" msgid="7570397912370223287">"Tài khoản và bảo mật"</string>
     <string name="settings_screen_appearance" msgid="9153102634339912029">"Giao diện và bố cục"</string>
@@ -48,8 +48,8 @@
     <string name="use_contacts_dict_summary" msgid="6599983334507879959">"Sử dụng tên từ Danh bạ cho các đề xuất và chỉnh sửa"</string>
     <string name="use_personalized_dicts" msgid="5167396352105467626">"Đề xuất được cá nhân hóa"</string>
     <string name="enable_metrics_logging" msgid="5506372337118822837">"Cải thiện <xliff:g id="APPLICATION_NAME">%s</xliff:g>"</string>
-    <string name="use_double_space_period" msgid="8781529969425082860">"Dấu cách đôi"</string>
-    <string name="use_double_space_period_summary" msgid="6532892187247952799">"Nhấn đúp vào phím cách sẽ chèn thêm một dấu sau dấu cách"</string>
+    <string name="use_double_space_period" msgid="8781529969425082860">"Nhấn đúp phím cách chèn dấu chấm câu"</string>
+    <string name="use_double_space_period_summary" msgid="6532892187247952799">"Nhấn đúp phím cách sẽ chèn thêm một dấu chấm câu, theo sau là dấu cách"</string>
     <string name="auto_cap" msgid="1719746674854628252">"Tự động viết hoa"</string>
     <string name="auto_cap_summary" msgid="7934452761022946874">"Viết hoa chữ đầu tiên của mỗi câu"</string>
     <string name="edit_personal_dictionary" msgid="3996910038952940420">"Từ điển cá nhân"</string>
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index b129c3e40..e7808e46e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -87,7 +87,7 @@ public final class WordProperty implements Comparable<WordProperty> {
             final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
             final boolean isBeginningOfSentence, final int[] probabilityInfo,
             final ArrayList<int[][]> ngramPrevWordsArray,
-            final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
+            final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
             final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo,
             final ArrayList<int[]> shortcutTargets,
             final ArrayList<Integer> shortcutProbabilities) {
@@ -102,16 +102,22 @@ public final class WordProperty implements Comparable<WordProperty> {
         mHasNgrams = hasBigram;
 
         final int relatedNgramCount = ngramTargets.size();
-        final WordInfo currentWordInfo =
-                mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
-                        : new WordInfo(mWord);
-        final NgramContext ngramContext = new NgramContext(currentWordInfo);
         for (int i = 0; i < relatedNgramCount; i++) {
             final String ngramTargetString =
                     StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
             final WeightedString ngramTarget = new WeightedString(ngramTargetString,
                     createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
-            // TODO: Support n-gram.
+            final int[][] prevWords = ngramPrevWordsArray.get(i);
+            final boolean[] isBeginningOfSentenceArray =
+                    ngramPrevWordIsBeginningOfSentenceArray.get(i);
+            final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
+            for (int j = 0; j < prevWords.length; j++) {
+                wordInfoArray[j] = isBeginningOfSentenceArray[j]
+                        ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
+                        : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
+                                prevWords[j]));
+            }
+            final NgramContext ngramContext = new NgramContext(wordInfoArray);
             ngrams.add(new NgramProperty(ngramTarget, ngramContext));
         }
         mNgrams = ngrams.isEmpty() ? null : ngrams;
@@ -126,6 +132,7 @@ public final class WordProperty implements Comparable<WordProperty> {
     }
 
     // TODO: Remove
+    @UsedForTesting
     public ArrayList<WeightedString> getBigrams() {
         if (null == mNgrams) {
             return null;
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 248246232..4e0f5f583 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -17,6 +17,7 @@
 package com.android.inputmethod.latin.utils;
 
 import com.android.inputmethod.latin.makedict.DictionaryHeader;
+import com.android.inputmethod.latin.makedict.NgramProperty;
 import com.android.inputmethod.latin.makedict.ProbabilityInfo;
 import com.android.inputmethod.latin.makedict.WeightedString;
 import com.android.inputmethod.latin.makedict.WordProperty;
@@ -26,6 +27,8 @@ import java.util.HashMap;
 public class CombinedFormatUtils {
     public static final String DICTIONARY_TAG = "dictionary";
     public static final String BIGRAM_TAG = "bigram";
+    public static final String NGRAM_TAG = "ngram";
+    public static final String NGRAM_PREV_WORD_TAG = "prev_word";
     public static final String SHORTCUT_TAG = "shortcut";
     public static final String PROBABILITY_TAG = "f";
     public static final String HISTORICAL_INFO_TAG = "historicalInfo";
@@ -76,12 +79,19 @@ public class CombinedFormatUtils {
             }
         }
         if (wordProperty.mHasNgrams) {
-            // TODO: Support ngram.
-            for (final WeightedString bigram : wordProperty.getBigrams()) {
-                builder.append("  " + BIGRAM_TAG + "=" + bigram.mWord);
+            for (final NgramProperty ngramProperty : wordProperty.mNgrams) {
+                builder.append(" " + NGRAM_TAG + "=" + ngramProperty.mTargetWord.mWord);
                 builder.append(",");
-                builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
+                builder.append(formatProbabilityInfo(ngramProperty.mTargetWord.mProbabilityInfo));
                 builder.append("\n");
+                for (int i = 0; i < ngramProperty.mNgramContext.getPrevWordCount(); i++) {
+                    builder.append("  " + NGRAM_PREV_WORD_TAG + "[" + i + "]="
+                            + ngramProperty.mNgramContext.getNthPrevWord(i + 1));
+                    if (ngramProperty.mNgramContext.isNthPrevWordBeginningOfSontence(i + 1)) {
+                        builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
+                    }
+                    builder.append("\n");
+                }
             }
         }
         return builder.toString();
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 461d1d859..9239c8400 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -327,8 +327,8 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
 
 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
         jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
-        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
-        jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
+        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
+        jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
         jobject outNgramProbabilityInfo, jobject outShortcutTargets,
         jobject outShortcutProbabilities) {
     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@@ -352,6 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
     const WordProperty wordProperty = dictionary->getWordProperty(
             CodePointArrayView(wordCodePoints, codePointCount));
     wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
+            outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
             outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
             outShortcutProbabilities);
 }
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
index a707f1ba2..019f0880f 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -22,8 +22,9 @@
 namespace latinime {
 
 void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
-        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
-        jobject outBigramProbabilities, jobject outShortcutTargets,
+        jbooleanArray outFlags, jintArray outProbabilityInfo,
+        jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
+        jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
         jobject outShortcutProbabilities) const {
     JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
             MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
@@ -43,16 +44,39 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
     jclass arrayListClass = env->FindClass("java/util/ArrayList");
     jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
 
-    // Output bigrams.
-    // TODO: Support n-gram
+    // Output ngrams.
+    jclass intArrayClass = env->FindClass("[I");
     for (const auto &ngramProperty : mNgrams) {
-        const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints();
-        jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
-        JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
-                word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
-                false /* needsNullTermination */);
-        env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
-        env->DeleteLocalRef(bigramWord1CodePointArray);
+        const NgramContext *const ngramContext = ngramProperty.getNgramContext();
+        jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
+                ngramContext->getPrevWordCount(), intArrayClass, nullptr);
+        jbooleanArray prevWordIsBeginningOfSentenceArray =
+                env->NewBooleanArray(ngramContext->getPrevWordCount());
+        for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
+            const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
+            jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
+            JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
+                    codePoints.size(), codePoints.data(), codePoints.size(),
+                    false /* needsNullTermination */);
+            env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
+            env->DeleteLocalRef(prevWordCodePoints);
+            JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
+                    ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
+        }
+        env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
+        env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
+                prevWordIsBeginningOfSentenceArray);
+        env->DeleteLocalRef(prevWordWordCodePointsArray);
+        env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);
+
+        const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
+        jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
+        JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
+                targetWordCodePoints->size(), targetWordCodePoints->data(),
+                targetWordCodePoints->size(), false /* needsNullTermination */);
+        env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
+        env->DeleteLocalRef(targetWordCodePointArray);
+
         const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
         int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
                 ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
@@ -60,7 +84,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
         jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
         env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
                 NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
-        env->CallBooleanMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
+        env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
         env->DeleteLocalRef(bigramProbabilityInfoArray);
     }
 
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h
index 01b8987b5..b5314faaa 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.h
@@ -39,8 +39,10 @@ class WordProperty {
               mNgrams(*ngrams) {}
 
     void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
-            jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
-            jobject outShortcutTargets, jobject outShortcutProbabilities) const;
+            jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
+            jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
+            jobject outNgramProbabilities, jobject outShortcutTargets,
+            jobject outShortcutProbabilities) const;
 
     const UnigramProperty *getUnigramProperty() const {
         return &mUnigramProperty;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index b96290437..509bd683b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -90,8 +90,8 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
         // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in
         // probabilityEntry.
         const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId);
-        return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(),
-                unigramProbabilityEntry.isBlacklisted(),
+        return WordAttributes(probability, unigramProbabilityEntry.isBlacklisted(),
+                unigramProbabilityEntry.isNotAWord(),
                 unigramProbabilityEntry.isPossiblyOffensive());
     }
     // Cannot find the word.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 193326d82..249d822b2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -488,9 +488,6 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
         AKLOGE("getWordProperty is called for invalid word.");
         return WordProperty();
     }
-    const int ptNodePos =
-            mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
-    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
     const LanguageModelDictContent *const languageModelDictContent =
             mBuffers->getLanguageModelDictContent();
     // Fetch ngram information.
@@ -541,12 +538,13 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
                     shortcutProbability);
         }
     }
-    const ProbabilityEntry probabilityEntry = languageModelDictContent->getProbabilityEntry(
-            ptNodeParams.getTerminalId());
+    const WordAttributes wordAttributes = languageModelDictContent->getWordAttributes(
+            WordIdArrayView(), wordId, mHeaderPolicy);
+    const ProbabilityEntry probabilityEntry = languageModelDictContent->getProbabilityEntry(wordId);
     const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
     const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
-            probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
-            probabilityEntry.isPossiblyOffensive(), probabilityEntry.getProbability(),
+            wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(),
+            wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(),
             *historicalInfo, std::move(shortcuts));
     return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
 }
diff --git a/tests/Android.mk b/tests/Android.mk
index a084ad10d..7810184d3 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -24,7 +24,7 @@ LOCAL_AAPT_FLAGS += -0 .dict
 # Do not compress test data file
 LOCAL_AAPT_FLAGS += -0 .txt
 
-LOCAL_STATIC_JAVA_LIBRARIES := mockito-target android-support-test
+LOCAL_STATIC_JAVA_LIBRARIES := android-support-test latinime-common mockito-target
 
 # Include all test java files.
 LOCAL_SRC_FILES := $(call all-java-files-under, src)
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index 991dd0b28..15f7568c8 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -21,8 +21,8 @@ import android.test.suitebuilder.annotation.LargeTest;
 import android.util.Pair;
 
 import com.android.inputmethod.latin.NgramContext.WordInfo;
+import com.android.inputmethod.latin.common.CodePointUtils;
 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
-import com.android.inputmethod.latin.makedict.CodePointUtils;
 import com.android.inputmethod.latin.makedict.DictDecoder;
 import com.android.inputmethod.latin.makedict.DictionaryHeader;
 import com.android.inputmethod.latin.makedict.FormatSpec;
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 5d6378937..5a72e417e 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -22,7 +22,7 @@ import android.text.TextUtils;
 import android.util.Pair;
 
 import com.android.inputmethod.latin.NgramContext.WordInfo;
-import com.android.inputmethod.latin.makedict.CodePointUtils;
+import com.android.inputmethod.latin.common.CodePointUtils;
 import com.android.inputmethod.latin.makedict.DictionaryHeader;
 import com.android.inputmethod.latin.makedict.FormatSpec;
 import com.android.inputmethod.latin.makedict.WeightedString;
diff --git a/tests/src/com/android/inputmethod/latin/LatinImeStressTests.java b/tests/src/com/android/inputmethod/latin/LatinImeStressTests.java
index f5e993de8..22114b7a0 100644
--- a/tests/src/com/android/inputmethod/latin/LatinImeStressTests.java
+++ b/tests/src/com/android/inputmethod/latin/LatinImeStressTests.java
@@ -18,7 +18,7 @@ package com.android.inputmethod.latin;
 
 import android.test.suitebuilder.annotation.LargeTest;
 
-import com.android.inputmethod.latin.makedict.CodePointUtils;
+import com.android.inputmethod.latin.common.CodePointUtils;
 
 import java.util.Random;
 
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index d1cb14196..a35fa13ce 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -23,6 +23,7 @@ import android.util.Pair;
 import android.util.SparseArray;
 
 import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.common.CodePointUtils;
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
diff --git a/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java
index afabbbd38..6ccb79d76 100644
--- a/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryTests.java
@@ -31,7 +31,7 @@ import com.android.inputmethod.latin.DictionaryFacilitator;
 import com.android.inputmethod.latin.ExpandableBinaryDictionary;
 import com.android.inputmethod.latin.RichInputMethodManager;
 import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback;
-import com.android.inputmethod.latin.makedict.CodePointUtils;
+import com.android.inputmethod.latin.common.CodePointUtils;
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 
 import android.test.AndroidTestCase;
diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk
index 7f34ccf20..ff6c1e433 100644
--- a/tools/dicttool/Android.mk
+++ b/tools/dicttool/Android.mk
@@ -93,7 +93,7 @@ LOCAL_SRC_FILES := $(LOCAL_TOOL_SRC_FILES) \
         $(call all-java-files-under, $(DICTTOOL_ONDEVICE_TESTS_DIR))
 
 LOCAL_JAVA_LIBRARIES := junit
-LOCAL_STATIC_JAVA_LIBRARIES := jsr305lib
+LOCAL_STATIC_JAVA_LIBRARIES := jsr305lib latinime-common-host
 LOCAL_REQUIRED_MODULES := $(LATINIME_HOST_NATIVE_LIBNAME)
 LOCAL_JAR_MANIFEST := etc/manifest.txt
 LOCAL_MODULE := dicttool_aosp
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 3ef03f4bd..4c7187fcd 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -22,8 +22,6 @@ import com.android.inputmethod.latin.makedict.DictDecoder;
 import com.android.inputmethod.latin.makedict.FusionDictionary;
 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
 
-import org.xml.sax.SAXException;
-
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
@@ -36,8 +34,6 @@ import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.util.ArrayList;
 
-import javax.xml.parsers.ParserConfigurationException;
-
 /**
  * Class grouping utilities for offline dictionary making.
  *
@@ -177,14 +173,6 @@ public final class BinaryDictOffdeviceUtils {
             System.out.println("Size : " + file.length() + " bytes");
         }
         try {
-            if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
-                if (report) {
-                    System.out.println("Format : XML unigram list");
-                }
-                return XmlDictInputOutput.readDictionaryXml(
-                        new BufferedInputStream(new FileInputStream(file)),
-                        null /* shortcuts */, null /* bigrams */);
-            }
             final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
             if (null == decodedSpec) {
                 throw new RuntimeException("Does not seem to be a dictionary file " + filename);
@@ -209,8 +197,7 @@ public final class BinaryDictOffdeviceUtils {
                 System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
             }
             return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
-        } catch (final IOException | SAXException | ParserConfigurationException |
-                UnsupportedFormatException e) {
+        } catch (final IOException | UnsupportedFormatException e) {
             throw new RuntimeException("Can't read file " + filename, e);
         }
     }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 2925fdc34..e04751ddc 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -27,8 +27,6 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
 import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
 import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
 
-import org.xml.sax.SAXException;
-
 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
@@ -41,8 +39,6 @@ import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.LinkedList;
 
-import javax.xml.parsers.ParserConfigurationException;
-
 /**
  * Main class/method for DictionaryMaker.
  */
@@ -52,10 +48,7 @@ public class DictionaryMaker {
         private static final String OPTION_VERSION_2 = "-2";
         private static final String OPTION_VERSION_4 = "-4";
         private static final String OPTION_INPUT_SOURCE = "-s";
-        private static final String OPTION_INPUT_BIGRAM_XML = "-b";
-        private static final String OPTION_INPUT_SHORTCUT_XML = "-c";
         private static final String OPTION_OUTPUT_BINARY = "-d";
-        private static final String OPTION_OUTPUT_XML = "-x";
         private static final String OPTION_OUTPUT_COMBINED = "-o";
         private static final String OPTION_HELP = "-h";
         private static final String OPTION_CODE_POINT_TABLE = "-t";
@@ -63,11 +56,7 @@ public class DictionaryMaker {
         private static final String OPTION_CODE_POINT_TABLE_ON = "on";
         public final String mInputBinary;
         public final String mInputCombined;
-        public final String mInputUnigramXml;
-        public final String mInputShortcutXml;
-        public final String mInputBigramXml;
         public final String mOutputBinary;
-        public final String mOutputXml;
         public final String mOutputCombined;
         public final int mOutputBinaryFormatVersion;
         public final int mCodePointTableMode;
@@ -76,39 +65,20 @@ public class DictionaryMaker {
             checkHasExactlyOneInput();
             checkHasAtLeastOneOutput();
             checkNotSameFile(mInputBinary, mOutputBinary);
-            checkNotSameFile(mInputBinary, mOutputXml);
             checkNotSameFile(mInputCombined, mOutputBinary);
-            checkNotSameFile(mInputCombined, mOutputXml);
-            checkNotSameFile(mInputUnigramXml, mOutputBinary);
-            checkNotSameFile(mInputUnigramXml, mOutputXml);
-            checkNotSameFile(mInputUnigramXml, mOutputCombined);
-            checkNotSameFile(mInputShortcutXml, mOutputBinary);
-            checkNotSameFile(mInputShortcutXml, mOutputXml);
-            checkNotSameFile(mInputShortcutXml, mOutputCombined);
-            checkNotSameFile(mInputBigramXml, mOutputBinary);
-            checkNotSameFile(mInputBigramXml, mOutputXml);
-            checkNotSameFile(mInputBigramXml, mOutputCombined);
-            checkNotSameFile(mOutputBinary, mOutputXml);
             checkNotSameFile(mOutputBinary, mOutputCombined);
-            checkNotSameFile(mOutputXml, mOutputCombined);
         }
 
         private void checkHasExactlyOneInput() {
-            if (null == mInputUnigramXml && null == mInputBinary && null == mInputCombined) {
+            if (null == mInputBinary && null == mInputCombined) {
                 throw new RuntimeException("No input file specified");
-            } else if ((null != mInputUnigramXml && null != mInputBinary)
-                    || (null != mInputUnigramXml && null != mInputCombined)
-                    || (null != mInputBinary && null != mInputCombined)) {
+            } else if (null != mInputBinary && null != mInputCombined) {
                 throw new RuntimeException("Several input files specified");
-            } else if ((null != mInputBinary || null != mInputCombined)
-                    && (null != mInputBigramXml || null != mInputShortcutXml)) {
-                throw new RuntimeException("Separate bigrams/shortcut files are only supported"
-                        + " with XML input (other formats include bigrams and shortcuts already)");
             }
         }
 
         private void checkHasAtLeastOneOutput() {
-            if (null == mOutputBinary && null == mOutputXml && null == mOutputCombined) {
+            if (null == mOutputBinary && null == mOutputCombined) {
                 throw new RuntimeException("No output specified");
             }
         }
@@ -131,16 +101,14 @@ public class DictionaryMaker {
 
         public static String getHelp() {
             return "Usage: makedict "
-                    + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
                     + "| [-s <combined format input]"
-                    + "| [-s <binary input>] [-d <binary output>] [-x <xml output>] "
+                    + "| [-s <binary input>] [-d <binary output>]"
                     + " [-o <combined output>] [-t <code point table switch: on/off/auto>]"
                     + "[-2] [-3] [-4]\n"
                     + "\n"
                     + "  Converts a source dictionary file to one or several outputs.\n"
-                    + "  Source can be an XML file, with an optional XML bigrams file, or a\n"
-                    + "  binary dictionary file.\n"
-                    + "  Binary version 2 (Jelly Bean), 3, 4, XML and\n"
+                    + "  Source can be a binary dictionary file or a combined format file.\n"
+                    + "  Binary version 2 (Jelly Bean), 3, 4, and\n"
                     + "  combined format outputs are supported.";
         }
 
@@ -151,11 +119,7 @@ public class DictionaryMaker {
             }
             String inputBinary = null;
             String inputCombined = null;
-            String inputUnigramXml = null;
-            String inputShortcutXml = null;
-            String inputBigramXml = null;
             String outputBinary = null;
-            String outputXml = null;
             String outputCombined = null;
             int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
             // Don't use code point table by default.
@@ -180,9 +144,7 @@ public class DictionaryMaker {
                         String argValue = args.get(0);
                         args.remove(0);
                         if (OPTION_INPUT_SOURCE.equals(arg)) {
-                            if (XmlDictInputOutput.isXmlUnigramDictionary(argValue)) {
-                                inputUnigramXml = argValue;
-                            } else if (CombinedInputOutput.isCombinedDictionary(argValue)) {
+                            if (CombinedInputOutput.isCombinedDictionary(argValue)) {
                                 inputCombined = argValue;
                             } else if (BinaryDictDecoderUtils.isBinaryDictionary(argValue)) {
                                 inputBinary = argValue;
@@ -190,14 +152,8 @@ public class DictionaryMaker {
                                 throw new IllegalArgumentException(
                                         "Unknown format for file " + argValue);
                             }
-                        } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
-                            inputShortcutXml = argValue;
-                        } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
-                            inputBigramXml = argValue;
                         } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
                             outputBinary = argValue;
-                        } else if (OPTION_OUTPUT_XML.equals(arg)) {
-                            outputXml = argValue;
                         } else if (OPTION_OUTPUT_COMBINED.equals(arg)) {
                             outputCombined = argValue;
                         } else if (OPTION_CODE_POINT_TABLE.equals(arg)) {
@@ -214,13 +170,13 @@ public class DictionaryMaker {
                         }
                     }
                 } else {
-                    if (null == inputBinary && null == inputUnigramXml) {
+                    if (null == inputBinary) {
                         if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) {
                             inputBinary = arg;
                         } else if (CombinedInputOutput.isCombinedDictionary(arg)) {
                             inputCombined = arg;
                         } else {
-                            inputUnigramXml = arg;
+                            throw new IllegalArgumentException("Unknown format for file " + arg);
                         }
                     } else if (null == outputBinary) {
                         outputBinary = arg;
@@ -232,11 +188,7 @@ public class DictionaryMaker {
 
             mInputBinary = inputBinary;
             mInputCombined = inputCombined;
-            mInputUnigramXml = inputUnigramXml;
-            mInputShortcutXml = inputShortcutXml;
-            mInputBigramXml = inputBigramXml;
             mOutputBinary = outputBinary;
-            mOutputXml = outputXml;
             mOutputCombined = outputCombined;
             mOutputBinaryFormatVersion = outputBinaryFormatVersion;
             mCodePointTableMode = codePointTableMode;
@@ -245,8 +197,7 @@ public class DictionaryMaker {
     }
 
     public static void main(String[] args)
-            throws FileNotFoundException, ParserConfigurationException, SAXException, IOException,
-            UnsupportedFormatException {
+            throws FileNotFoundException, IOException, UnsupportedFormatException {
         final Arguments parsedArgs = new Arguments(args);
         FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs);
         writeOutputToParsedArgs(parsedArgs, dictionary);
@@ -259,14 +210,11 @@ public class DictionaryMaker {
      * @return the read dictionary.
      */
     private static FusionDictionary readInputFromParsedArgs(final Arguments args)
-            throws IOException, UnsupportedFormatException, ParserConfigurationException,
-            SAXException, FileNotFoundException {
+            throws IOException, UnsupportedFormatException, FileNotFoundException {
         if (null != args.mInputBinary) {
             return readBinaryFile(args.mInputBinary);
         } else if (null != args.mInputCombined) {
             return readCombinedFile(args.mInputCombined);
-        } else if (null != args.mInputUnigramXml) {
-            return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml);
         } else {
             throw new RuntimeException("No input file specified");
         }
@@ -314,30 +262,6 @@ public class DictionaryMaker {
     }
 
     /**
-     * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
-     *
-     * @param unigramXmlFilename the name of the unigram XML file. May not be null.
-     * @param shortcutXmlFilename the name of the shortcut/whitelist XML file, or null if none.
-     * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
-     * @return the read dictionary.
-     * @throws FileNotFoundException if one of the files can't be found
-     * @throws SAXException if one or more of the XML files is not well-formed
-     * @throws IOException if one the input files can't be read
-     * @throws ParserConfigurationException if the system can't create a SAX parser
-     */
-    private static FusionDictionary readXmlFile(final String unigramXmlFilename,
-            final String shortcutXmlFilename, final String bigramXmlFilename)
-            throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
-        try (
-            final BufferedInputStream unigrams = getBufferedFileInputStream(unigramXmlFilename);
-            final BufferedInputStream shortcuts = getBufferedFileInputStream(shortcutXmlFilename);
-            final BufferedInputStream bigrams = getBufferedFileInputStream(bigramXmlFilename);
-        ) {
-            return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
-        }
-    }
-
-    /**
      * Invoke the right output method according to args.
      *
      * This will write the passed dictionary to the file(s) passed in the command line arguments.
@@ -353,9 +277,6 @@ public class DictionaryMaker {
             writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion,
                     args.mCodePointTableMode);
         }
-        if (null != args.mOutputXml) {
-            writeXmlDictionary(args.mOutputXml, dict);
-        }
         if (null != args.mOutputCombined) {
             writeCombinedDictionary(args.mOutputCombined, dict);
         }
@@ -387,21 +308,6 @@ public class DictionaryMaker {
     }
 
     /**
-     * Write the dictionary in XML format to the specified filename.
-     *
-     * @param outputFilename the name of the file to write to.
-     * @param dict the dictionary to write.
-     * @throws FileNotFoundException if the output file can't be created.
-     * @throws IOException if the output file can't be written to.
-     */
-    private static void writeXmlDictionary(final String outputFilename,
-            final FusionDictionary dict) throws FileNotFoundException, IOException {
-        try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) {
-            XmlDictInputOutput.writeDictionaryXml(writer, dict);
-        }
-    }
-
-    /**
      * Write the dictionary in the combined format to the specified filename.
      *
      * @param outputFilename the name of the file to write to.
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java
index 808e1d4c8..0b1fb88bc 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Makedict.java
@@ -20,8 +20,6 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import javax.xml.parsers.ParserConfigurationException;
-import org.xml.sax.SAXException;
 
 public class Makedict extends Dicttool.Command {
     public static final String COMMAND = "makedict";
@@ -35,8 +33,7 @@ public class Makedict extends Dicttool.Command {
     }
 
     @Override
-    public void run() throws FileNotFoundException, IOException, ParserConfigurationException,
-            SAXException, UnsupportedFormatException {
+    public void run() throws FileNotFoundException, IOException, UnsupportedFormatException {
         DictionaryMaker.main(mArgs);
     }
 }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
deleted file mode 100644
index 7f3337949..000000000
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.android.inputmethod.latin.dicttool;
-
-import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.ProbabilityInfo;
-import com.android.inputmethod.latin.makedict.WeightedString;
-import com.android.inputmethod.latin.makedict.WordProperty;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.TreeSet;
-
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-/**
- * Reads and writes XML files for a FusionDictionary.
- *
- * All functions in this class are static.
- */
-public class XmlDictInputOutput {
-
-    private static final String ROOT_TAG = "wordlist";
-    private static final String WORD_TAG = "w";
-    private static final String BIGRAM_TAG = "bigram";
-    private static final String SHORTCUT_TAG = "shortcut";
-    private static final String PROBABILITY_ATTR = "f";
-    private static final String WORD_ATTR = "word";
-    private static final String NOT_A_WORD_ATTR = "not_a_word";
-
-    /**
-     * SAX handler for a unigram XML file.
-     */
-    static private class UnigramHandler extends DefaultHandler {
-        // Parser states
-        private static final int START = 1;
-        private static final int WORD = 2;
-        private static final int UNKNOWN = 3;
-        private static final int SHORTCUT_ONLY_WORD_PROBABILITY = 1;
-
-        FusionDictionary mDictionary;
-        int mState; // the state of the parser
-        int mFreq; // the currently read freq
-        String mWord; // the current word
-        final HashMap<String, ArrayList<WeightedString>> mShortcutsMap;
-
-        /**
-         * Create the handler.
-         *
-         * @param shortcuts the shortcuts as a map. This may be empty, but may not be null.
-         */
-        public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) {
-            mDictionary = null;
-            mShortcutsMap = shortcuts;
-            mWord = "";
-            mState = START;
-            mFreq = 0;
-        }
-
-        public FusionDictionary getFinalDictionary() {
-            final FusionDictionary dict = mDictionary;
-            for (final String shortcutOnly : mShortcutsMap.keySet()) {
-                if (dict.hasWord(shortcutOnly)) continue;
-                dict.add(shortcutOnly, new ProbabilityInfo(SHORTCUT_ONLY_WORD_PROBABILITY),
-                        mShortcutsMap.get(shortcutOnly), true /* isNotAWord */,
-                        false /* isPossiblyOffensive */);
-            }
-            mDictionary = null;
-            mShortcutsMap.clear();
-            mWord = "";
-            mState = START;
-            mFreq = 0;
-            return dict;
-        }
-
-        @Override
-        public void startElement(String uri, String localName, String qName, Attributes attrs) {
-            if (WORD_TAG.equals(localName)) {
-                mState = WORD;
-                mWord = "";
-                for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
-                    final String attrName = attrs.getLocalName(attrIndex);
-                    if (PROBABILITY_ATTR.equals(attrName)) {
-                        mFreq = Integer.parseInt(attrs.getValue(attrIndex));
-                    }
-                }
-            } else if (ROOT_TAG.equals(localName)) {
-                final HashMap<String, String> attributes = new HashMap<>();
-                for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
-                    final String attrName = attrs.getLocalName(attrIndex);
-                    attributes.put(attrName, attrs.getValue(attrIndex));
-                }
-                mDictionary = new FusionDictionary(new PtNodeArray(),
-                        new DictionaryOptions(attributes));
-            } else {
-                mState = UNKNOWN;
-            }
-        }
-
-        @Override
-        public void characters(char[] ch, int start, int length) {
-            if (WORD == mState) {
-                // The XML parser is free to return text in arbitrary chunks one after the
-                // other. In particular, this happens in some implementations when it finds
-                // an escape code like "&amp;".
-                mWord += String.copyValueOf(ch, start, length);
-            }
-        }
-
-        @Override
-        public void endElement(String uri, String localName, String qName) {
-            if (WORD == mState) {
-                mDictionary.add(mWord, new ProbabilityInfo(mFreq), mShortcutsMap.get(mWord),
-                        false /* isNotAWord */, false /* isPossiblyOffensive */);
-                mState = START;
-            }
-        }
-    }
-
-    static private class AssociativeListHandler extends DefaultHandler {
-        private final String SRC_TAG;
-        private final String SRC_ATTRIBUTE;
-        private final String DST_TAG;
-        private final String DST_ATTRIBUTE;
-        private final String DST_FREQ;
-
-        // In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX
-        private final static int XML_MAX = 256;
-        // In memory and in the binary dictionary the bigram frequency is 0..MEMORY_MAX
-        private final static int MEMORY_MAX = 256;
-        private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX;
-
-        private String mSrc;
-        private final HashMap<String, ArrayList<WeightedString>> mAssocMap;
-
-        public AssociativeListHandler(final String srcTag, final String srcAttribute,
-                final String dstTag, final String dstAttribute, final String dstFreq) {
-            SRC_TAG = srcTag;
-            SRC_ATTRIBUTE = srcAttribute;
-            DST_TAG = dstTag;
-            DST_ATTRIBUTE = dstAttribute;
-            DST_FREQ = dstFreq;
-            mSrc = null;
-            mAssocMap = new HashMap<>();
-        }
-
-        @Override
-        public void startElement(String uri, String localName, String qName, Attributes attrs) {
-            if (SRC_TAG.equals(localName)) {
-                mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
-            } else if (DST_TAG.equals(localName)) {
-                String dst = attrs.getValue(uri, DST_ATTRIBUTE);
-                int freq = getValueFromFreqString(attrs.getValue(uri, DST_FREQ));
-                WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
-                ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
-                if (null == bigramList) bigramList = new ArrayList<>();
-                bigramList.add(bigram);
-                mAssocMap.put(mSrc, bigramList);
-            }
-        }
-
-        protected int getValueFromFreqString(final String freqString) {
-            return Integer.parseInt(freqString);
-        }
-
-        // This may return an empty map, but will never return null.
-        public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
-            return mAssocMap;
-        }
-    }
-
-    /**
-     * SAX handler for a bigram XML file.
-     */
-    static private class BigramHandler extends AssociativeListHandler {
-        private final static String BIGRAM_W1_TAG = "bi";
-        private final static String BIGRAM_W2_TAG = "w";
-        private final static String BIGRAM_W1_ATTRIBUTE = "w1";
-        private final static String BIGRAM_W2_ATTRIBUTE = "w2";
-        private final static String BIGRAM_FREQ_ATTRIBUTE = "p";
-
-        public BigramHandler() {
-            super(BIGRAM_W1_TAG, BIGRAM_W1_ATTRIBUTE, BIGRAM_W2_TAG, BIGRAM_W2_ATTRIBUTE,
-                    BIGRAM_FREQ_ATTRIBUTE);
-        }
-
-        // As per getAssocMap(), this never returns null.
-        public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
-            return getAssocMap();
-        }
-    }
-
-    /**
-     * SAX handler for a shortcut & whitelist XML file.
-     */
-    static private class ShortcutAndWhitelistHandler extends AssociativeListHandler {
-        private final static String ENTRY_TAG = "entry";
-        private final static String ENTRY_ATTRIBUTE = "shortcut";
-        private final static String TARGET_TAG = "target";
-        private final static String REPLACEMENT_ATTRIBUTE = "replacement";
-        private final static String TARGET_PRIORITY_ATTRIBUTE = "priority";
-        private final static String WHITELIST_MARKER = "whitelist";
-        private final static int WHITELIST_FREQ_VALUE = 15;
-        private final static int MIN_FREQ = 0;
-        private final static int MAX_FREQ = 14;
-
-        public ShortcutAndWhitelistHandler() {
-            super(ENTRY_TAG, ENTRY_ATTRIBUTE, TARGET_TAG, REPLACEMENT_ATTRIBUTE,
-                    TARGET_PRIORITY_ATTRIBUTE);
-        }
-
-        @Override
-        protected int getValueFromFreqString(final String freqString) {
-            if (WHITELIST_MARKER.equals(freqString)) {
-                return WHITELIST_FREQ_VALUE;
-            }
-            final int intValue = super.getValueFromFreqString(freqString);
-            if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
-                throw new RuntimeException("Shortcut freq out of range. Accepted range is "
-                        + MIN_FREQ + ".." + MAX_FREQ);
-            }
-            return intValue;
-        }
-
-        // As per getAssocMap(), this never returns null.
-        public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
-            return getAssocMap();
-        }
-    }
-
-    /**
-     * Basic test to find out whether the file is in the unigram XML format or not.
-     *
-     * Concretely this only tests the header line.
-     *
-     * @param filename The name of the file to test.
-     * @return true if the file is in the unigram XML format, false otherwise
-     */
-    public static boolean isXmlUnigramDictionary(final String filename) {
-        try (final BufferedReader reader = new BufferedReader(
-                new InputStreamReader(new FileInputStream(filename), "UTF-8"))) {
-            final String firstLine = reader.readLine();
-            return firstLine.matches("^\\s*<wordlist .*>\\s*$");
-        } catch (final IOException e) {
-            return false;
-        }
-    }
-
-    /**
-     * Reads a dictionary from an XML file.
-     *
-     * This is the public method that will parse an XML file and return the corresponding memory
-     * representation.
-     *
-     * @param unigrams the file to read the data from.
-     * @param shortcuts the file to read the shortcuts & whitelist from, or null.
-     * @param bigrams the file to read the bigrams from, or null.
-     * @return the in-memory representation of the dictionary.
-     */
-    public static FusionDictionary readDictionaryXml(final BufferedInputStream unigrams,
-            final BufferedInputStream shortcuts, final BufferedInputStream bigrams)
-            throws SAXException, IOException, ParserConfigurationException {
-        final SAXParserFactory factory = SAXParserFactory.newInstance();
-        factory.setNamespaceAware(true);
-        final SAXParser parser = factory.newSAXParser();
-        final BigramHandler bigramHandler = new BigramHandler();
-        if (null != bigrams) parser.parse(bigrams, bigramHandler);
-
-        final ShortcutAndWhitelistHandler shortcutAndWhitelistHandler =
-                new ShortcutAndWhitelistHandler();
-        if (null != shortcuts) parser.parse(shortcuts, shortcutAndWhitelistHandler);
-
-        final UnigramHandler unigramHandler =
-                new UnigramHandler(shortcutAndWhitelistHandler.getShortcutAndWhitelistMap());
-        parser.parse(unigrams, unigramHandler);
-        final FusionDictionary dict = unigramHandler.getFinalDictionary();
-        final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap();
-        for (final String firstWord : bigramMap.keySet()) {
-            if (!dict.hasWord(firstWord)) continue;
-            final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
-            for (final WeightedString bigram : bigramList) {
-                if (!dict.hasWord(bigram.mWord)) continue;
-                dict.setBigram(firstWord, bigram.mWord, bigram.mProbabilityInfo);
-            }
-        }
-        return dict;
-    }
-
-    /**
-     * Reads a dictionary in the first, legacy XML format
-     *
-     * This method reads data from the parser and creates a new FusionDictionary with it.
-     * The format parsed by this method is the format used before Ice Cream Sandwich,
-     * which has no support for bigrams or shortcuts/whitelist.
-     * It is important to note that this method expects the parser to have already eaten
-     * the first, all-encompassing tag.
-     *
-     * @param xpp the parser to read the data from.
-     * @return the parsed dictionary.
-     */
-
-    /**
-     * Writes a dictionary to an XML file.
-     *
-     * The output format is the "second" format, which supports bigrams and shortcuts/whitelist.
-     *
-     * @param destination a destination stream to write to.
-     * @param dict the dictionary to write.
-     */
-    public static void writeDictionaryXml(final BufferedWriter destination,
-            final FusionDictionary dict) throws IOException {
-        final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
-        for (WordProperty wordProperty : dict) {
-            wordPropertiesInDict.add(wordProperty);
-        }
-        // TODO: use an XMLSerializer if this gets big
-        destination.write("<wordlist format=\"2\"");
-        for (final String key : dict.mOptions.mAttributes.keySet()) {
-            final String value = dict.mOptions.mAttributes.get(key);
-            destination.write(" " + key + "=\"" + value + "\"");
-        }
-        destination.write(">\n");
-        destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
-        for (WordProperty wordProperty : wordPropertiesInDict) {
-            destination.write("  <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
-                    + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
-                    + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
-                    + "\">");
-            if (wordProperty.mHasShortcuts) {
-                destination.write("\n");
-                for (WeightedString target : wordProperty.mShortcutTargets) {
-                    destination.write("    <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
-                            + target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
-                            + ">\n");
-                }
-                destination.write("  ");
-            }
-            if (wordProperty.mHasNgrams) {
-                destination.write("\n");
-                for (WeightedString bigram : wordProperty.getBigrams()) {
-                    destination.write("    <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
-                            + bigram.getProbability() + "\">" + bigram.mWord
-                            + "</" + BIGRAM_TAG + ">\n");
-                }
-                destination.write("  ");
-            }
-            destination.write("</" + WORD_TAG + ">\n");
-        }
-        destination.write("</wordlist>\n");
-        destination.close();
-    }
-}