diff options
author | 2014-09-25 19:58:33 +0900 | |
---|---|---|
committer | 2014-09-26 15:15:16 +0900 | |
commit | 8a6e96d28645ce325a38423af6967a011edefc9d (patch) | |
tree | c6d3bf8693b6d95f2acb322b5801ae61983f70d6 /tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java | |
parent | f4329f7fffc43840b7fb95cf181ea016108a7664 (diff) | |
download | latinime-8a6e96d28645ce325a38423af6967a011edefc9d.tar.gz latinime-8a6e96d28645ce325a38423af6967a011edefc9d.tar.xz latinime-8a6e96d28645ce325a38423af6967a011edefc9d.zip |
Create a code point table based on occurrence counts.
Bug:17097992
Change-Id: Ifd76dbd4d385d800af416368e25c9e56a76d0fbf
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java new file mode 100644 index 000000000..7c0b92dc3 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoderTests.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; + +import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.LargeTest; +import android.util.Log; + +/** + * Unit tests for Ver2DictEncoder + */ +@LargeTest +public class Ver2DictEncoderTests extends AndroidTestCase { + private static final String TAG = Ver2DictEncoderTests.class.getSimpleName(); + private static final int UNIGRAM_FREQ = 10; + + public void testCodePointTable() { + final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; + final List<String> words = Arrays.asList(wordSource); + final String correctCodePointTable = "eotdsanirfg bclwup"; + final String correctCodePointOccurrenceArrayString = + "10141164111411531003110297210521142103111911171108198199132111211021"; + final String correctCodePointExpectedMapString = "323433363538373940494147454644424348"; + final String dictName = "codePointTableTest"; + final String dictVersion = Long.toString(System.currentTimeMillis()); + + final FormatSpec.FormatOptions formatOptions = + new FormatSpec.FormatOptions(FormatSpec.VERSION2); + final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), + BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); + addUnigrams(sourcedict, words, null /* shortcutMap */); + final CodePointTable codePointTable = Ver2DictEncoder.makeCodePointTable(sourcedict); + + // Check if mCodePointOccurrenceArray is correct + final StringBuilder codePointOccurrenceArrayString = new StringBuilder(); + for (Entry<Integer, Integer> entry : codePointTable.mCodePointOccurrenceArray) { + codePointOccurrenceArrayString.append(entry.getKey()); + codePointOccurrenceArrayString.append(entry.getValue()); + } + assertEquals(codePointOccurrenceArrayString.toString(), + correctCodePointOccurrenceArrayString); + + // Check if mCodePointToOneByteCodeMap is correct + final StringBuilder codePointExpectedMapString = new StringBuilder(); + for (int i = 0; i < correctCodePointTable.length(); ++i) { + codePointExpectedMapString.append(codePointTable.mCodePointToOneByteCodeMap.get( + correctCodePointTable.codePointAt(i))); + } + assertEquals(codePointExpectedMapString.toString(), correctCodePointExpectedMapString); + } + + /** + * Adds unigrams to the dictionary. + */ + private void addUnigrams(final FusionDictionary dict, final List<String> words, + final HashMap<String, List<String>> shortcutMap) { + for (final String word : words) { + final ArrayList<WeightedString> shortcuts = new ArrayList<>(); + if (shortcutMap != null && shortcutMap.containsKey(word)) { + for (final String shortcut : shortcutMap.get(word)) { + shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); + } + } + dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), + (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); + } + } +} |