diff options
author | 2024-12-16 21:45:41 -0500 | |
---|---|---|
committer | 2025-01-11 14:17:35 -0500 | |
commit | e9a0e66716dab4dd3184d009d8920de1961efdfa (patch) | |
tree | 02dcc096643d74645bf28459c2834c3d4a2ad7f2 /tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | |
parent | fb3b9360d70596d7e921de8bf7d3ca99564a077e (diff) | |
download | latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.gz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.tar.xz latinime-e9a0e66716dab4dd3184d009d8920de1961efdfa.zip |
Rename to Kelar Keyboard (org.kelar.inputmethod.latin)
Diffstat (limited to 'tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java')
-rw-r--r-- | tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java | 913 |
1 files changed, 0 insertions, 913 deletions
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java deleted file mode 100644 index db8b80949..000000000 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ /dev/null @@ -1,913 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import android.text.TextUtils; -import android.util.Pair; - -import androidx.test.InstrumentationRegistry; -import androidx.test.filters.LargeTest; -import androidx.test.runner.AndroidJUnit4; - -import com.android.inputmethod.latin.NgramContext.WordInfo; -import com.android.inputmethod.latin.common.CodePointUtils; -import com.android.inputmethod.latin.common.FileUtils; -import com.android.inputmethod.latin.makedict.DictionaryHeader; -import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.WeightedString; -import com.android.inputmethod.latin.makedict.WordProperty; -import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Locale; -import java.util.Random; - -@LargeTest -@RunWith(AndroidJUnit4.class) -public class BinaryDictionaryTests { - private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; - private static final String TEST_LOCALE = "test"; - private static final String DICTIONARY_ID = "TestBinaryDictionary"; - - private HashSet<File> mDictFilesToBeDeleted = new HashSet<>(); - - @Before - public void setUp() throws Exception { - mDictFilesToBeDeleted.clear(); - } - - @After - public void tearDown() throws Exception { - for (final File dictFile : mDictFilesToBeDeleted) { - dictFile.delete(); - } - mDictFilesToBeDeleted.clear(); - } - - private File createEmptyDictionaryAndGetFile(final int formatVersion) { - return createEmptyDictionaryWithAttributesAndGetFile(formatVersion, - new HashMap<String, String>()); - } - - private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion, - final HashMap<String, String> attributeMap) { - try { - final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, - attributeMap); - mDictFilesToBeDeleted.add(dictFile); - return dictFile; - } catch (final IOException e) { - fail(e.toString()); - } - return null; - } - - private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, - final HashMap<String, String> attributeMap) throws IOException { - final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, - InstrumentationRegistry.getTargetContext().getCacheDir()); - file.delete(); - file.mkdir(); - if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, - Locale.ENGLISH, attributeMap)) { - return file; - } - throw new IOException("Empty dictionary " + file.getAbsolutePath() - + " cannot be created. Format version: " + formatVersion); - } - - private static BinaryDictionary getBinaryDictionary(final File dictFile) { - return new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - } - - private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) { - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); - return new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - } - - @Test - public void testIsValidDictionary() { - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - assertTrue("binaryDictionary must be valid for existing valid dictionary file.", - binaryDictionary.isValidDictionary()); - binaryDictionary.close(); - assertFalse("binaryDictionary must be invalid after closing.", - binaryDictionary.isValidDictionary()); - FileUtils.deleteRecursively(dictFile); - binaryDictionary = getBinaryDictionary(dictFile); - assertFalse("binaryDictionary must be invalid for not existing dictionary file.", - binaryDictionary.isValidDictionary()); - binaryDictionary.close(); - } - - @Test - public void testConstructingDictionaryOnMemory() { - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - FileUtils.deleteRecursively(dictFile); - assertFalse(dictFile.exists()); - final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, - FormatSpec.VERSION403, new HashMap<String, String>()); - assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); - final int probability = 100; - addUnigramWord(binaryDictionary, "word", probability); - assertEquals(probability, binaryDictionary.getFrequency("word")); - assertFalse(dictFile.exists()); - binaryDictionary.flush(); - assertTrue(dictFile.exists()); - assertTrue(binaryDictionary.isValidDictionary()); - assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); - assertEquals(probability, binaryDictionary.getFrequency("word")); - binaryDictionary.close(); - } - - @Test - public void testAddTooLongWord() { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - final StringBuffer stringBuilder = new StringBuffer(); - for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) { - stringBuilder.append('a'); - } - final String validLongWord = stringBuilder.toString(); - stringBuilder.append('a'); - final String invalidLongWord = stringBuilder.toString(); - final int probability = 100; - addUnigramWord(binaryDictionary, "aaa", probability); - addUnigramWord(binaryDictionary, validLongWord, probability); - addUnigramWord(binaryDictionary, invalidLongWord, probability); - // Too long short cut. - binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */, - false /* isNotAWord */, false /* isPossiblyOffensive */, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); - addUnigramWord(binaryDictionary, "abc", probability); - final int updatedProbability = 200; - // Update. - addUnigramWord(binaryDictionary, validLongWord, updatedProbability); - addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); - addUnigramWord(binaryDictionary, "abc", updatedProbability); - - assertEquals(probability, binaryDictionary.getFrequency("aaa")); - assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); - assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord)); - assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); - } - - private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, - final int probability) { - binaryDictionary.addUnigramEntry(word, probability, - false /* isBeginningOfSentence */, false /* isNotAWord */, - false /* isPossiblyOffensive */, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - } - - private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, - final String word1, final int probability) { - binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - } - - private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0, - final String word1, final String word2, final int probability) { - binaryDictionary.addNgramEntry( - new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2, - probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - } - - private static boolean isValidBigram(final BinaryDictionary binaryDictionary, - final String word0, final String word1) { - return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); - } - - private static int getBigramProbability(final BinaryDictionary binaryDictionary, - final String word0, final String word1) { - return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1); - } - - private static int getTrigramProbability(final BinaryDictionary binaryDictionary, - final String word0, final String word1, final String word2) { - return binaryDictionary.getNgramProbability( - new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2); - } - - @Test - public void testAddUnigramWord() { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - final int probability = 100; - addUnigramWord(binaryDictionary, "aaa", probability); - // Reallocate and create. - addUnigramWord(binaryDictionary, "aab", probability); - // Insert into children. - addUnigramWord(binaryDictionary, "aac", probability); - // Make terminal. - addUnigramWord(binaryDictionary, "aa", probability); - // Create children. - addUnigramWord(binaryDictionary, "aaaa", probability); - // Reallocate and make termianl. - addUnigramWord(binaryDictionary, "a", probability); - - final int updatedProbability = 200; - // Update. - addUnigramWord(binaryDictionary, "aaa", updatedProbability); - - assertEquals(probability, binaryDictionary.getFrequency("aab")); - assertEquals(probability, binaryDictionary.getFrequency("aac")); - assertEquals(probability, binaryDictionary.getFrequency("aa")); - assertEquals(probability, binaryDictionary.getFrequency("aaaa")); - assertEquals(probability, binaryDictionary.getFrequency("a")); - assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); - } - - @Test - public void testRandomlyAddUnigramWord() { - final int wordCount = 1000; - final int codePointSetSize = 50; - final long seed = System.currentTimeMillis(); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - - final HashMap<String, Integer> probabilityMap = new HashMap<>(); - // Test a word that isn't contained within the dictionary. - final Random random = new Random(seed); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - for (int i = 0; i < wordCount; ++i) { - final String word = CodePointUtils.generateWord(random, codePointSet); - probabilityMap.put(word, random.nextInt(0xFF)); - } - for (String word : probabilityMap.keySet()) { - addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); - } - for (String word : probabilityMap.keySet()) { - assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); - } - } - - @Test - public void testAddBigramWords() { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - - final int unigramProbability = 100; - final int bigramProbability = 150; - final int updatedBigramProbability = 200; - addUnigramWord(binaryDictionary, "aaa", unigramProbability); - addUnigramWord(binaryDictionary, "abb", unigramProbability); - addUnigramWord(binaryDictionary, "bcc", unigramProbability); - addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); - addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); - addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); - addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); - - assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); - assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); - assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); - assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); - - addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); - assertEquals(updatedBigramProbability, - getBigramProbability(binaryDictionary, "aaa", "abb")); - - assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); - assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); - assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); - assertEquals(Dictionary.NOT_A_PROBABILITY, - getBigramProbability(binaryDictionary, "bcc", "aaa")); - assertEquals(Dictionary.NOT_A_PROBABILITY, - getBigramProbability(binaryDictionary, "bcc", "bbc")); - assertEquals(Dictionary.NOT_A_PROBABILITY, - getBigramProbability(binaryDictionary, "aaa", "aaa")); - - // Testing bigram link. - addUnigramWord(binaryDictionary, "abcde", unigramProbability); - addUnigramWord(binaryDictionary, "fghij", unigramProbability); - addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); - addUnigramWord(binaryDictionary, "fgh", unigramProbability); - addUnigramWord(binaryDictionary, "abc", unigramProbability); - addUnigramWord(binaryDictionary, "f", unigramProbability); - - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij")); - assertEquals(Dictionary.NOT_A_PROBABILITY, - getBigramProbability(binaryDictionary, "abcde", "fgh")); - addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); - assertEquals(updatedBigramProbability, - getBigramProbability(binaryDictionary, "abcde", "fghij")); - } - - @Test - public void testRandomlyAddBigramWords() { - final int wordCount = 100; - final int bigramCount = 1000; - final int codePointSetSize = 50; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - - final ArrayList<String> words = new ArrayList<>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - for (int i = 0; i < wordCount; ++i) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - unigramProbabilities.put(word, unigramProbability); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - - for (int i = 0; i < bigramCount; i++) { - final String word0 = words.get(random.nextInt(wordCount)); - final String word1 = words.get(random.nextInt(wordCount)); - if (TextUtils.equals(word0, word1)) { - continue; - } - final Pair<String, String> bigram = new Pair<>(word0, word1); - bigramWords.add(bigram); - final int unigramProbability = unigramProbabilities.get(word1); - final int bigramProbability = - unigramProbability + random.nextInt(0xFF - unigramProbability); - bigramProbabilities.put(bigram, bigramProbability); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - } - - for (final Pair<String, String> bigram : bigramWords) { - final int bigramProbability = bigramProbabilities.get(bigram); - assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, - isValidBigram(binaryDictionary, bigram.first, bigram.second)); - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } - } - - @Test - public void testAddTrigramWords() { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - final int unigramProbability = 100; - final int trigramProbability = 150; - final int updatedTrigramProbability = 200; - addUnigramWord(binaryDictionary, "aaa", unigramProbability); - addUnigramWord(binaryDictionary, "abb", unigramProbability); - addUnigramWord(binaryDictionary, "bcc", unigramProbability); - - addBigramWords(binaryDictionary, "abb", "bcc", 10); - addBigramWords(binaryDictionary, "abb", "aaa", 10); - - addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability); - addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability); - - assertEquals(trigramProbability, - getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc")); - assertEquals(trigramProbability, - getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); - assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); - - addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability); - assertEquals(updatedTrigramProbability, - getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); - } - - @Test - public void testFlushDictionary() { - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - - final int probability = 100; - addUnigramWord(binaryDictionary, "aaa", probability); - addUnigramWord(binaryDictionary, "abcd", probability); - // Close without flushing. - binaryDictionary.close(); - - binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), - 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - - assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); - assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); - - addUnigramWord(binaryDictionary, "aaa", probability); - addUnigramWord(binaryDictionary, "abcd", probability); - binaryDictionary.flush(); - binaryDictionary.close(); - - binaryDictionary = getBinaryDictionary(dictFile); - assertEquals(probability, binaryDictionary.getFrequency("aaa")); - assertEquals(probability, binaryDictionary.getFrequency("abcd")); - addUnigramWord(binaryDictionary, "bcde", probability); - binaryDictionary.flush(); - binaryDictionary.close(); - - binaryDictionary = getBinaryDictionary(dictFile); - assertEquals(probability, binaryDictionary.getFrequency("bcde")); - binaryDictionary.close(); - } - - @Test - public void testFlushWithGCDictionary() { - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - final int unigramProbability = 100; - final int bigramProbability = 150; - addUnigramWord(binaryDictionary, "aaa", unigramProbability); - addUnigramWord(binaryDictionary, "abb", unigramProbability); - addUnigramWord(binaryDictionary, "bcc", unigramProbability); - addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); - addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); - addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); - addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - - binaryDictionary = getBinaryDictionary(dictFile); - assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); - assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); - assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); - assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); - assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); - assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - } - - @Test - public void testAddBigramWordsAndFlashWithGC() { - final int wordCount = 100; - final int bigramCount = 1000; - final int codePointSetSize = 30; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - - final ArrayList<String> words = new ArrayList<>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - for (int i = 0; i < wordCount; ++i) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - unigramProbabilities.put(word, unigramProbability); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - - for (int i = 0; i < bigramCount; i++) { - final String word0 = words.get(random.nextInt(wordCount)); - final String word1 = words.get(random.nextInt(wordCount)); - if (TextUtils.equals(word0, word1)) { - continue; - } - final Pair<String, String> bigram = new Pair<>(word0, word1); - bigramWords.add(bigram); - final int unigramProbability = unigramProbabilities.get(word1); - final int bigramProbability = - unigramProbability + random.nextInt(0xFF - unigramProbability); - bigramProbabilities.put(bigram, bigramProbability); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - } - - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - binaryDictionary = getBinaryDictionary(dictFile); - - for (final Pair<String, String> bigram : bigramWords) { - final int bigramProbability = bigramProbabilities.get(bigram); - assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, - isValidBigram(binaryDictionary, bigram.first, bigram.second)); - assertEquals(bigramProbability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - } - } - - @Test - public void testRandomOperationsAndFlashWithGC() { - final int maxUnigramCount = 5000; - final int maxBigramCount = 10000; - final HashMap<String, String> attributeMap = new HashMap<>(); - attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); - attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); - - final int flashWithGCIterationCount = 50; - final int operationCountInEachIteration = 200; - final int initialUnigramCount = 100; - final float addUnigramProb = 0.5f; - final float addBigramProb = 0.8f; - final int codePointSetSize = 30; - - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, - attributeMap); - BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - - final ArrayList<String> words = new ArrayList<>(); - final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - for (int i = 0; i < initialUnigramCount; ++i) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - unigramProbabilities.put(word, unigramProbability); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - - for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { - binaryDictionary = getBinaryDictionary(dictFile); - for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { - // Add unigram. - if (random.nextFloat() < addUnigramProb) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - unigramProbabilities.put(word, unigramProbability); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - // Add bigram. - if (random.nextFloat() < addBigramProb && words.size() > 2) { - final int word0Index = random.nextInt(words.size()); - int word1Index = random.nextInt(words.size() - 1); - if (word0Index <= word1Index) { - word1Index++; - } - final String word0 = words.get(word0Index); - final String word1 = words.get(word1Index); - if (TextUtils.equals(word0, word1)) { - continue; - } - final int unigramProbability = unigramProbabilities.get(word1); - final int bigramProbability = - unigramProbability + random.nextInt(0xFF - unigramProbability); - final Pair<String, String> bigram = new Pair<>(word0, word1); - bigramWords.add(bigram); - bigramProbabilities.put(bigram, bigramProbability); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - } - } - - // Test whether the all unigram operations are collectlly handled. - for (int i = 0; i < words.size(); i++) { - final String word = words.get(i); - final int unigramProbability = unigramProbabilities.get(word); - assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); - } - // Test whether the all bigram operations are collectlly handled. - for (int i = 0; i < bigramWords.size(); i++) { - final Pair<String, String> bigram = bigramWords.get(i); - final int probability; - if (bigramProbabilities.containsKey(bigram)) { - probability = bigramProbabilities.get(bigram); - } else { - probability = Dictionary.NOT_A_PROBABILITY; - } - - assertEquals(probability, - getBigramProbability(binaryDictionary, bigram.first, bigram.second)); - assertEquals(probability != Dictionary.NOT_A_PROBABILITY, - isValidBigram(binaryDictionary, bigram.first, bigram.second)); - } - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - } - } - - @Test - public void testAddManyUnigramsAndFlushWithGC() { - final int flashWithGCIterationCount = 3; - final int codePointSetSize = 50; - - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - - final ArrayList<String> words = new ArrayList<>(); - final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - - BinaryDictionary binaryDictionary; - for (int i = 0; i < flashWithGCIterationCount; i++) { - binaryDictionary = getBinaryDictionary(dictFile); - while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - unigramProbabilities.put(word, unigramProbability); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - - for (int j = 0; j < words.size(); j++) { - final String word = words.get(j); - final int unigramProbability = unigramProbabilities.get(word); - assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); - } - - binaryDictionary.flushWithGC(); - binaryDictionary.close(); - } - } - - @Test - public void testUnigramAndBigramCount() { - final int maxUnigramCount = 5000; - final int maxBigramCount = 10000; - final HashMap<String, String> attributeMap = new HashMap<>(); - attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); - attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); - - final int flashWithGCIterationCount = 10; - final int codePointSetSize = 50; - final int unigramCountPerIteration = 1000; - final int bigramCountPerIteration = 2000; - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, - attributeMap); - - final ArrayList<String> words = new ArrayList<>(); - final HashSet<Pair<String, String>> bigrams = new HashSet<>(); - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - - BinaryDictionary binaryDictionary; - for (int i = 0; i < flashWithGCIterationCount; i++) { - binaryDictionary = getBinaryDictionary(dictFile); - for (int j = 0; j < unigramCountPerIteration; j++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - words.add(word); - final int unigramProbability = random.nextInt(0xFF); - addUnigramWord(binaryDictionary, word, unigramProbability); - } - for (int j = 0; j < bigramCountPerIteration; j++) { - final String word0 = words.get(random.nextInt(words.size())); - final String word1 = words.get(random.nextInt(words.size())); - if (TextUtils.equals(word0, word1)) { - continue; - } - bigrams.add(new Pair<>(word0, word1)); - final int bigramProbability = random.nextInt(0xF); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - } - assertEquals(new HashSet<>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.BIGRAM_COUNT_QUERY))); - binaryDictionary.flushWithGC(); - assertEquals(new HashSet<>(words).size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.UNIGRAM_COUNT_QUERY))); - assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.BIGRAM_COUNT_QUERY))); - binaryDictionary.close(); - } - } - - @Test - public void testGetWordProperties() { - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final int UNIGRAM_COUNT = 1000; - final int BIGRAM_COUNT = 1000; - final int codePointSetSize = 20; - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); - final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); - - final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", - false /* isBeginningOfSentence */); - assertFalse(invalidWordProperty.isValid()); - - final ArrayList<String> words = new ArrayList<>(); - final HashMap<String, Integer> wordProbabilities = new HashMap<>(); - final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); - - for (int i = 0; i < UNIGRAM_COUNT; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int unigramProbability = random.nextInt(0xFF); - final boolean isNotAWord = random.nextBoolean(); - final boolean isPossiblyOffensive = random.nextBoolean(); - // TODO: Add tests for historical info. - binaryDictionary.addUnigramEntry(word, unigramProbability, - false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, - BinaryDictionary.NOT_A_VALID_TIMESTAMP); - if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - words.add(word); - wordProbabilities.put(word, unigramProbability); - final WordProperty wordProperty = binaryDictionary.getWordProperty(word, - false /* isBeginningOfSentence */); - assertEquals(word, wordProperty.mWord); - assertTrue(wordProperty.isValid()); - assertEquals(isNotAWord, wordProperty.mIsNotAWord); - assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive); - assertEquals(false, wordProperty.mHasNgrams); - assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); - } - - for (int i = 0; i < BIGRAM_COUNT; i++) { - final int word0Index = random.nextInt(wordProbabilities.size()); - final int word1Index = random.nextInt(wordProbabilities.size()); - if (word0Index == word1Index) { - continue; - } - final String word0 = words.get(word0Index); - final String word1 = words.get(word1Index); - final int unigramProbability = wordProbabilities.get(word1); - final int bigramProbability = - unigramProbability + random.nextInt(0xFF - unigramProbability); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - if (!bigrams.containsKey(word0)) { - final HashSet<String> bigramWord1s = new HashSet<>(); - bigrams.put(word0, bigramWord1s); - } - bigrams.get(word0).add(word1); - bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); - } - - for (int i = 0; i < words.size(); i++) { - final String word0 = words.get(i); - if (!bigrams.containsKey(word0)) { - continue; - } - final HashSet<String> bigramWord1s = bigrams.get(word0); - final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, - false /* isBeginningOfSentence */); - assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size()); - // TODO: Support ngram. - for (final WeightedString bigramTarget : wordProperty.getBigrams()) { - final String word1 = bigramTarget.mWord; - assertTrue(bigramWord1s.contains(word1)); - final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); - assertEquals(bigramProbability, bigramTarget.getProbability()); - } - } - } - - @Test - public void testIterateAllWords() { - final long seed = System.currentTimeMillis(); - final Random random = new Random(seed); - final int UNIGRAM_COUNT = 1000; - final int BIGRAM_COUNT = 1000; - final int codePointSetSize = 20; - final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - - final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", - false /* isBeginningOfSentence */); - assertFalse(invalidWordProperty.isValid()); - - final ArrayList<String> words = new ArrayList<>(); - final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); - final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); - final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = - new HashMap<>(); - - for (int i = 0; i < UNIGRAM_COUNT; i++) { - final String word = CodePointUtils.generateWord(random, codePointSet); - final int unigramProbability = random.nextInt(0xFF); - addUnigramWord(binaryDictionary, word, unigramProbability); - if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - words.add(word); - wordProbabilitiesToCheckLater.put(word, unigramProbability); - } - - for (int i = 0; i < BIGRAM_COUNT; i++) { - final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); - final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); - if (word0Index == word1Index) { - continue; - } - final String word0 = words.get(word0Index); - final String word1 = words.get(word1Index); - final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); - final int bigramProbability = - unigramProbability + random.nextInt(0xFF - unigramProbability); - addBigramWords(binaryDictionary, word0, word1, bigramProbability); - if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { - binaryDictionary.flushWithGC(); - } - if (!bigrams.containsKey(word0)) { - final HashSet<String> bigramWord1s = new HashSet<>(); - bigrams.put(word0, bigramWord1s); - } - bigrams.get(word0).add(word1); - bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); - } - - final HashSet<String> wordSet = new HashSet<>(words); - final HashSet<Pair<String, String>> bigramSet = - new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); - int token = 0; - do { - final BinaryDictionary.GetNextWordPropertyResult result = - binaryDictionary.getNextWordProperty(token); - final WordProperty wordProperty = result.mWordProperty; - final String word0 = wordProperty.mWord; - assertEquals((int)wordProbabilitiesToCheckLater.get(word0), - wordProperty.mProbabilityInfo.mProbability); - wordSet.remove(word0); - final HashSet<String> bigramWord1s = bigrams.get(word0); - // TODO: Support ngram. - if (wordProperty.mHasNgrams) { - for (final WeightedString bigramTarget : wordProperty.getBigrams()) { - final String word1 = bigramTarget.mWord; - assertTrue(bigramWord1s.contains(word1)); - final Pair<String, String> bigram = new Pair<>(word0, word1); - final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); - assertEquals(bigramProbability, bigramTarget.getProbability()); - bigramSet.remove(bigram); - } - } - token = result.mNextToken; - } while (token != 0); - assertTrue(wordSet.isEmpty()); - assertTrue(bigramSet.isEmpty()); - } - - @Test - public void testPossiblyOffensiveAttributeMaintained() { - final BinaryDictionary binaryDictionary = - getEmptyBinaryDictionary(FormatSpec.VERSION403); - binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0); - WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); - assertEquals(true, wordProperty.mIsPossiblyOffensive); - } - - @Test - public void testBeginningOfSentence() { - final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); - final int dummyProbability = 0; - final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; - final int bigramProbability = 200; - addUnigramWord(binaryDictionary, "aaa", dummyProbability); - binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); - binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - addUnigramWord(binaryDictionary, "bbb", dummyProbability); - binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability, - BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); - binaryDictionary.flushWithGC(); - assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); - assertEquals(bigramProbability, - binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb")); - } -} |