diff options
author | 2024-12-16 21:45:41 -0500 | |
---|---|---|
committer | 2025-01-11 14:17:35 -0500 | |
commit | e9a0e66716dab4dd3184d009d8920de1961efdfa (patch) | |
tree | 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java | |
parent | fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff) | |
download | latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip |
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java | 675 |
1 files changed, 0 insertions, 675 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java deleted file mode 100644 index 376aa3f62..000000000 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ /dev/null @@ -1,675 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import android.test.AndroidTestCase; -import android.util.Log; -import android.util.Pair; -import android.util.SparseArray; - -import com.android.inputmethod.latin.BinaryDictionary; -import com.android.inputmethod.latin.common.CodePointUtils; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; -import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map.Entry; -import java.util.Random; -import java.util.Set; -import java.util.TreeMap; - -/** - * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. - */ -public class BinaryDictDecoderEncoderTests extends AndroidTestCase { - private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); - private static final int DEFAULT_MAX_UNIGRAMS = 300; - private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; - private static final int LARGE_CODE_POINT_SET_SIZE = 300; - private static final int UNIGRAM_FREQ = 10; - private static final int BIGRAM_FREQ = 50; - private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; - - private static final ArrayList<String> sWords = new ArrayList<>(); - private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>(); - private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>(); - private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>(); - private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>(); - - final Random mRandom; - - public BinaryDictDecoderEncoderTests() { - this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); - } - - public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { - super(); - BinaryDictionaryUtils.setCurrentTimeForTest(0); - Log.e(TAG, "Testing dictionary: seed is " + seed); - mRandom = new Random(seed); - sWords.clear(); - sWordsWithVariousCodePoints.clear(); - generateWords(maxUnigrams, mRandom); - - for (int i = 0; i < sWords.size(); ++i) { - sChainBigrams.put(i, new ArrayList<Integer>()); - if (i > 0) { - sChainBigrams.get(i - 1).add(i); - } - } - - sStarBigrams.put(0, new ArrayList<Integer>()); - // MAX - 1 because we added one above already - final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); - for (int i = 1; i < maxBigrams; ++i) { - sStarBigrams.get(0).add(i); - } - } - - @Override - protected void setUp() throws Exception { - super.setUp(); - BinaryDictionaryUtils.setCurrentTimeForTest(0); - } - - @Override - protected void tearDown() throws Exception { - // Quit test mode. - BinaryDictionaryUtils.setCurrentTimeForTest(-1); - super.tearDown(); - } - - private static void generateWords(final int number, final Random random) { - final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, - random); - final Set<String> wordSet = new HashSet<>(); - while (wordSet.size() < number) { - wordSet.add(CodePointUtils.generateWord(random, codePointSet)); - } - sWords.addAll(wordSet); - - final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( - LARGE_CODE_POINT_SET_SIZE, random); - wordSet.clear(); - while (wordSet.size() < number) { - wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); - } - sWordsWithVariousCodePoints.addAll(wordSet); - } - - /** - * Adds unigrams to the dictionary. - */ - private static void addUnigrams(final int number, final FusionDictionary dict, - final List<String> words) { - for (int i = 0; i < number; ++i) { - final String word = words.get(i); - final ArrayList<WeightedString> shortcuts = new ArrayList<>(); - dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), false /* isNotAWord */, - false /* isPossiblyOffensive */); - } - } - - private static void addBigrams(final FusionDictionary dict, - final List<String> words, - final SparseArray<List<Integer>> bigrams) { - for (int i = 0; i < bigrams.size(); ++i) { - final int w1 = bigrams.keyAt(i); - for (int w2 : bigrams.valueAt(i)) { - dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); - } - } - } - -// The following is useful to dump the dictionary into a textual file, but it can't compile -// on-device, so it's commented out. -// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) -// throws IOException { -// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( -// new java.io.FileWriter(new File(filename)), dict); -// } - - private static long timeWritingDictToFile(final File file, final FusionDictionary dict, - final FormatSpec.FormatOptions formatOptions) { - - long now = -1, diff = -1; - - try { - final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); - - now = System.currentTimeMillis(); - // If you need to dump the dict to a textual file, uncomment the line below and the - // function above - // dumpToCombinedFileForDebug(file, "/tmp/foo"); - dictEncoder.writeDictionary(dict, formatOptions); - diff = System.currentTimeMillis() - now; - } catch (IOException e) { - Log.e(TAG, "IO exception while writing file", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException", e); - } - - return diff; - } - - private static void checkDictionary(final FusionDictionary dict, final List<String> words, - final SparseArray<List<Integer>> bigrams) { - assertNotNull(dict); - - // check unigram - for (final String word : words) { - final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); - assertNotNull(ptNode); - } - - // check bigram - for (int i = 0; i < bigrams.size(); ++i) { - final int w1 = bigrams.keyAt(i); - for (final int w2 : bigrams.valueAt(i)) { - final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, - words.get(w1)); - assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); - } - } - } - - private static String outputOptions(final int bufferType, - final FormatSpec.FormatOptions formatOptions) { - final String result = " : buffer type = " - + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); - return result + " : version = " + formatOptions.mVersion; - } - - // Tests for readDictionaryBinary and writeDictionaryBinary - - private static long timeReadingAndCheckDict(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType) { - long now, diff = -1; - - FusionDictionary dict = null; - try { - final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), - bufferType); - now = System.currentTimeMillis(); - dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); - diff = System.currentTimeMillis() - now; - } catch (IOException e) { - Log.e(TAG, "IOException while reading dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); - } - - checkDictionary(dict, words, bigrams); - return diff; - } - - // Tests for readDictionaryBinary and writeDictionaryBinary - private String runReadAndWrite(final List<String> words, - final SparseArray<List<Integer>> bigrams, - final int bufferType, final FormatSpec.FormatOptions formatOptions, - final String message) { - - final String dictName = "runReadAndWrite"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(words.size(), dict, words); - addBigrams(dict, words, bigrams); - checkDictionary(dict, words, bigrams); - - final long write = timeWritingDictToFile(file, dict, formatOptions); - final long read = timeReadingAndCheckDict(file, words, bigrams, bufferType); - - return "PROF: read=" + read + "ms, write=" + write + "ms :" + message - + " : " + outputOptions(bufferType, formatOptions); - } - - private void runReadAndWriteTests(final List<String> results, final int bufferType, - final FormatSpec.FormatOptions formatOptions) { - results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, - formatOptions, "unigram")); - results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, - formatOptions, "chain")); - results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, - formatOptions, "star")); - results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions, - "unigram with shortcuts")); - results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions, - "chain with shortcuts")); - results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions, - "star with shortcuts")); - results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, - bufferType, formatOptions, - "unigram with various code points")); - } - - public void testCharacterTableIsPresent() throws IOException, UnsupportedFormatException { - final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; - final List<String> words = Arrays.asList(wordSource); - final String correctCodePointTable = "toesdrniawuplgfcb "; - final String dictName = "codePointTableTest"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY; - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, - BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir()); - - // Write a test dictionary - final DictEncoder dictEncoder = new Ver2DictEncoder(file, - Ver2DictEncoder.CODE_POINT_TABLE_ON); - final FormatSpec.FormatOptions formatOptions = - new FormatSpec.FormatOptions( - FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION); - final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(words.size(), sourcedict, words); - dictEncoder.writeDictionary(sourcedict, formatOptions); - - // Read the dictionary - final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), - DictDecoder.USE_BYTEARRAY); - final DictionaryHeader fileHeader = dictDecoder.readHeader(); - // Check if codePointTable is present - assertTrue("codePointTable is not present", - fileHeader.mDictionaryOptions.mAttributes.containsKey(codePointTableAttribute)); - final String codePointTable = - fileHeader.mDictionaryOptions.mAttributes.get(codePointTableAttribute); - // Check if codePointTable is correct - assertEquals("codePointTable is incorrect", codePointTable, correctCodePointTable); - } - - // Unit test for CharEncoding.readString and CharEncoding.writeString. - public void testCharEncoding() { - // the max length of a word in sWords is less than 50. - // See generateWords. - final byte[] buffer = new byte[50 * 3]; - final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); - for (final String word : sWords) { - Arrays.fill(buffer, (byte) 0); - CharEncoding.writeString(buffer, 0, word, null); - dictBuffer.position(0); - final String str = CharEncoding.readString(dictBuffer); - assertEquals(word, str); - } - } - - public void testReadAndWriteWithByteBuffer() { - final List<String> results = new ArrayList<>(); - - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.STATIC_OPTIONS); - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); - for (final String result : results) { - Log.d(TAG, result); - } - } - - public void testReadAndWriteWithByteArray() { - final List<String> results = new ArrayList<>(); - - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.STATIC_OPTIONS); - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); - runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - // Tests for readUnigramsAndBigramsBinary - - private static void checkWordMap(final List<String> expectedWords, - final SparseArray<List<Integer>> expectedBigrams, - final TreeMap<Integer, String> resultWords, - final TreeMap<Integer, Integer> resultFrequencies, - final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, - final boolean checkProbability) { - // check unigrams - final Set<String> actualWordsSet = new HashSet<>(resultWords.values()); - final Set<String> expectedWordsSet = new HashSet<>(expectedWords); - assertEquals(actualWordsSet, expectedWordsSet); - if (checkProbability) { - for (int freq : resultFrequencies.values()) { - assertEquals(freq, UNIGRAM_FREQ); - } - } - - // check bigrams - final HashMap<String, Set<String>> expBigrams = new HashMap<>(); - for (int i = 0; i < expectedBigrams.size(); ++i) { - final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); - for (int w2 : expectedBigrams.valueAt(i)) { - if (expBigrams.get(word1) == null) { - expBigrams.put(word1, new HashSet<String>()); - } - expBigrams.get(word1).add(expectedWords.get(w2)); - } - } - - final HashMap<String, Set<String>> actBigrams = new HashMap<>(); - for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { - final String word1 = resultWords.get(entry.getKey()); - final int unigramFreq = resultFrequencies.get(entry.getKey()); - for (PendingAttribute attr : entry.getValue()) { - final String word2 = resultWords.get(attr.mAddress); - if (actBigrams.get(word1) == null) { - actBigrams.put(word1, new HashSet<String>()); - } - actBigrams.get(word1).add(word2); - - if (checkProbability) { - final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( - unigramFreq, attr.mFrequency); - assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); - } - } - } - assertEquals(actBigrams, expBigrams); - } - - private static long timeAndCheckReadUnigramsAndBigramsBinary(final File file, - final List<String> words, final SparseArray<List<Integer>> bigrams, - final int bufferType, final boolean checkProbability) { - final TreeMap<Integer, String> resultWords = new TreeMap<>(); - final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>(); - final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>(); - - long now = -1, diff = -1; - try { - final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), - bufferType); - now = System.currentTimeMillis(); - dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); - diff = System.currentTimeMillis() - now; - } catch (IOException e) { - Log.e(TAG, "IOException", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException", e); - } - - checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); - return diff; - } - - private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType, - final FormatSpec.FormatOptions formatOptions, final String message) { - final String dictName = "runReadUnigrams"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - - // making the dictionary from lists of words. - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(words.size(), dict, words); - addBigrams(dict, words, bigrams); - - timeWritingDictToFile(file, dict, formatOptions); - - // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the - // probability when there's a timestamp for the entry. - // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. - long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, - !formatOptions.mHasTimestamp /* checkProbability */); - long fullReading = timeReadingAndCheckDict(file, words, bigrams, - bufferType); - - return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap - + " : " + message + " : " + outputOptions(bufferType, formatOptions); - } - - private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, - final int bufferType, final FormatSpec.FormatOptions formatOptions) { - results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, - formatOptions, "unigram")); - results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, - formatOptions, "chain")); - results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, - formatOptions, "star")); - } - - public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { - final ArrayList<String> results = new ArrayList<>(); - - runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.STATIC_OPTIONS); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - public void testReadUnigramsAndBigramsBinaryWithByteArray() { - final ArrayList<String> results = new ArrayList<>(); - - runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.STATIC_OPTIONS); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - // Tests for getTerminalPosition - private static String getWordFromBinary(final DictDecoder dictDecoder, final int address) { - if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); - - DictionaryHeader fileHeader = null; - try { - fileHeader = dictDecoder.readHeader(); - } catch (IOException e) { - return null; - } catch (UnsupportedFormatException e) { - return null; - } - if (fileHeader == null) return null; - return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, - address).mWord; - } - - private static long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, - final boolean contained) { - long diff = -1; - int position = -1; - try { - final long now = System.nanoTime(); - position = dictDecoder.getTerminalPosition(word); - diff = System.nanoTime() - now; - } catch (IOException e) { - Log.e(TAG, "IOException while getTerminalPosition", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); - } - - assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); - if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); - return diff; - } - - private void runGetTerminalPosition(final ArrayList<String> words, - final SparseArray<List<Integer>> bigrams, final int bufferType, - final FormatOptions formatOptions, final String message) { - final String dictName = "testGetTerminalPosition"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(sWords.size(), dict, sWords); - addBigrams(dict, words, bigrams); - timeWritingDictToFile(file, dict, formatOptions); - - final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), - DictDecoder.USE_BYTEARRAY); - try { - dictDecoder.openDictBuffer(); - } catch (IOException e) { - Log.e(TAG, "IOException while opening the buffer", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "IOException while opening the buffer", e); - } - assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); - - try { - // too long word - final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; - assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); - - // null - assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); - - // empty string - assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); - } catch (IOException e) { - } catch (UnsupportedFormatException e) { - } - - // Test a word that is contained within the dictionary. - long sum = 0; - for (int i = 0; i < sWords.size(); ++i) { - final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); - sum += time == -1 ? 0 : time; - } - Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message - + " : " + outputOptions(bufferType, formatOptions)); - - // Test a word that isn't contained within the dictionary. - final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, - mRandom); - for (int i = 0; i < 1000; ++i) { - final String word = CodePointUtils.generateWord(mRandom, codePointSet); - if (sWords.indexOf(word) != -1) continue; - checkGetTerminalPosition(dictDecoder, word, false); - } - } - - private void runGetTerminalPositionTests(final int bufferType, - final FormatOptions formatOptions) { - runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); - } - - public void testGetTerminalPosition() { - final ArrayList<String> results = new ArrayList<>(); - - runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, - BinaryDictUtils.STATIC_OPTIONS); - runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, - BinaryDictUtils.STATIC_OPTIONS); - - for (final String result : results) { - Log.d(TAG, result); - } - } - - public void testVer2DictGetWordProperty() { - final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; - final ArrayList<String> words = sWords; - final String dictName = "testGetWordProperty"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(words.size(), dict, words); - addBigrams(dict, words, sEmptyBigrams); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - file.delete(); - timeWritingDictToFile(file, dict, formatOptions); - final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), - 0 /* offset */, file.length(), true /* useFullEditDistance */, - Locale.ENGLISH, dictName, false /* isUpdatable */); - for (final String word : words) { - final WordProperty wordProperty = binaryDictionary.getWordProperty(word, - false /* isBeginningOfSentence */); - assertEquals(word, wordProperty.mWord); - assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); - } - } - - public void testVer2DictIteration() { - final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; - final ArrayList<String> words = sWords; - final SparseArray<List<Integer>> bigrams = sEmptyBigrams; - final String dictName = "testGetWordProperty"; - final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); - addUnigrams(words.size(), dict, words); - addBigrams(dict, words, bigrams); - final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, - getContext().getCacheDir()); - timeWritingDictToFile(file, dict, formatOptions); - Log.d(TAG, file.getAbsolutePath()); - final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), - 0 /* offset */, file.length(), true /* useFullEditDistance */, - Locale.ENGLISH, dictName, false /* isUpdatable */); - - final HashSet<String> wordSet = new HashSet<>(words); - final HashSet<Pair<String, String>> bigramSet = new HashSet<>(); - - for (int i = 0; i < words.size(); i++) { - final List<Integer> bigramList = bigrams.get(i); - if (bigramList != null) { - for (final Integer word1Index : bigramList) { - final String word1 = words.get(word1Index); - bigramSet.add(new Pair<>(words.get(i), word1)); - } - } - } - int token = 0; - do { - final BinaryDictionary.GetNextWordPropertyResult result = - binaryDictionary.getNextWordProperty(token); - final WordProperty wordProperty = result.mWordProperty; - final String word0 = wordProperty.mWord; - assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability); - wordSet.remove(word0); - if (wordProperty.mHasNgrams) { - for (final WeightedString bigramTarget : wordProperty.getBigrams()) { - final String word1 = bigramTarget.mWord; - final Pair<String, String> bigram = new Pair<>(word0, word1); - assertTrue(bigramSet.contains(bigram)); - bigramSet.remove(bigram); - } - } - token = result.mNextToken; - } while (token != 0); - assertTrue(wordSet.isEmpty()); - assertTrue(bigramSet.isEmpty()); - } -} |