aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java
diff options
context:
space:
mode:
authorAkifumi Yoshimoto <akifumi@google.com>2014-09-25 19:58:33 +0900
committerAkifumi Yoshimoto <akifumi@google.com>2014-09-26 15:15:16 +0900
commit8a6e96d28645ce325a38423af6967a011edefc9d (patch)
treec6d3bf8693b6d95f2acb322b5801ae61983f70d6 /tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java
parentf4329f7fffc43840b7fb95cf181ea016108a7664 (diff)
downloadlatinime-8a6e96d28645ce325a38423af6967a011edefc9d.tar.gz
latinime-8a6e96d28645ce325a38423af6967a011edefc9d.tar.xz
latinime-8a6e96d28645ce325a38423af6967a011edefc9d.zip
Create a code point table based on occurrence counts.
Bug:17097992 Change-Id: Ifd76dbd4d385d800af416368e25c9e56a76d0fbf
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java91
1 files changed, 91 insertions, 0 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java
new file mode 100644
index 000000000..7c0b92dc3
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
+
+import android.test.AndroidTestCase;
+import android.test.suitebuilder.annotation.LargeTest;
+import android.util.Log;
+
+/**
+ * Unit tests for Ver2DictEncoder
+ */
+@LargeTest
+public class Ver2DictEncoderTests extends AndroidTestCase {
+ private static final String TAG = Ver2DictEncoderTests.class.getSimpleName();
+ private static final int UNIGRAM_FREQ = 10;
+
+ public void testCodePointTable() {
+ final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"};
+ final List<String> words = Arrays.asList(wordSource);
+ final String correctCodePointTable = "eotdsanirfg bclwup";
+ final String correctCodePointOccurrenceArrayString =
+ "10141164111411531003110297210521142103111911171108198199132111211021";
+ final String correctCodePointExpectedMapString = "323433363538373940494147454644424348";
+ final String dictName = "codePointTableTest";
+ final String dictVersion = Long.toString(System.currentTimeMillis());
+
+ final FormatSpec.FormatOptions formatOptions =
+ new FormatSpec.FormatOptions(FormatSpec.VERSION2);
+ final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(),
+ BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
+ addUnigrams(sourcedict, words, null /* shortcutMap */);
+ final CodePointTable codePointTable = Ver2DictEncoder.makeCodePointTable(sourcedict);
+
+ // Check if mCodePointOccurrenceArray is correct
+ final StringBuilder codePointOccurrenceArrayString = new StringBuilder();
+ for (Entry<Integer, Integer> entry : codePointTable.mCodePointOccurrenceArray) {
+ codePointOccurrenceArrayString.append(entry.getKey());
+ codePointOccurrenceArrayString.append(entry.getValue());
+ }
+ assertEquals(codePointOccurrenceArrayString.toString(),
+ correctCodePointOccurrenceArrayString);
+
+ // Check if mCodePointToOneByteCodeMap is correct
+ final StringBuilder codePointExpectedMapString = new StringBuilder();
+ for (int i = 0; i < correctCodePointTable.length(); ++i) {
+ codePointExpectedMapString.append(codePointTable.mCodePointToOneByteCodeMap.get(
+ correctCodePointTable.codePointAt(i)));
+ }
+ assertEquals(codePointExpectedMapString.toString(), correctCodePointExpectedMapString);
+ }
+
+ /**
+ * Adds unigrams to the dictionary.
+ */
+ private void addUnigrams(final FusionDictionary dict, final List<String> words,
+ final HashMap<String, List<String>> shortcutMap) {
+ for (final String word : words) {
+ final ArrayList<WeightedString> shortcuts = new ArrayList<>();
+ if (shortcutMap != null && shortcutMap.containsKey(word)) {
+ for (final String shortcut : shortcutMap.get(word)) {
+ shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
+ }
+ }
+ dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
+ (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
+ }
+ }
+}