aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java3
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java3
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java258
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java14
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java319
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java150
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java279
7 files changed, 24 insertions, 1002 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index fa8fb2028..80daedd50 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -121,8 +121,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
private static boolean needsToMigrateDictionary(final int formatVersion) {
// When we bump up the dictionary format version, the old version should be added to here
// for supporting migration. Note that native code has to support reading such formats.
- return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
- || formatVersion == FormatSpec.VERSION402;
+ return formatVersion == FormatSpec.VERSION402;
}
public boolean isValidDictionaryLocked() {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 288261bf0..e422c4cd2 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -174,9 +174,6 @@ public final class FormatSpec {
public static final int VERSION202 = 202;
// format version for Fava Dictionaries.
public static final int VERSION_DELIGHT3 = 86736212;
- public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
- // Dictionary version used for testing.
- public static final int VERSION4_ONLY_FOR_TESTING = 399;
public static final int VERSION402 = 402;
public static final int VERSION403 = 403;
public static final int VERSION4 = VERSION403;
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 89167f744..e92831c48 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -42,8 +42,6 @@ import java.util.Random;
public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
- private static final int[] DICT_FORMAT_VERSIONS =
- new int[] { FormatSpec.VERSION402, FormatSpec.VERSION403 };
private static final String DICTIONARY_ID = "TestBinaryDictionary";
private static boolean supportsNgram(final int formatVersion) {
@@ -113,13 +111,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testIsValidDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testIsValidDictionary(formatVersion);
- }
- }
-
- private void testIsValidDictionary(final int formatVersion) {
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
binaryDictionary.isValidDictionary());
@@ -134,20 +126,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testConstructingDictionaryOnMemory() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testConstructingDictionaryOnMemory(formatVersion);
- }
- }
-
- private void testConstructingDictionaryOnMemory(final int formatVersion) {
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
FileUtils.deleteRecursively(dictFile);
assertFalse(dictFile.exists());
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
- true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
- new HashMap<String, String>());
+ true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE,
+ FormatSpec.VERSION403, new HashMap<String, String>());
assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
final int probability = 100;
addUnigramWord(binaryDictionary, "word", probability);
assertEquals(probability, binaryDictionary.getFrequency("word"));
@@ -155,19 +141,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.flush();
assertTrue(dictFile.exists());
assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(formatVersion, binaryDictionary.getFormatVersion());
+ assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
assertEquals(probability, binaryDictionary.getFrequency("word"));
binaryDictionary.close();
}
public void testAddTooLongWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddTooLongWord(formatVersion);
- }
- }
-
- private void testAddTooLongWord(final int formatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final StringBuffer stringBuilder = new StringBuffer();
for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) {
stringBuilder.append('a');
@@ -234,13 +214,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddUnigramWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddUnigramWord(formatVersion);
- }
- }
-
- private void testAddUnigramWord(final int formatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int probability = 100;
addUnigramWord(binaryDictionary, "aaa", probability);
// Reallocate and create.
@@ -267,16 +241,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddUnigramWord() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomlyAddUnigramWord(formatVersion);
- }
- }
-
- private void testRandomlyAddUnigramWord(final int formatVersion) {
final int wordCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final HashMap<String, Integer> probabilityMap = new HashMap<>();
// Test a word that isn't contained within the dictionary.
@@ -295,13 +263,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddBigramWords(formatVersion);
- }
- }
-
- private void testAddBigramWords(final int formatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
final int bigramProbability = 150;
@@ -354,18 +316,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomlyAddBigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomlyAddBigramWords(formatVersion);
- }
- }
-
- private void testRandomlyAddBigramWords(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
@@ -406,15 +362,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddTrigramWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- if (supportsNgram(formatVersion)) {
- testAddTrigramWords(formatVersion);
- }
- }
- }
-
- private void testAddTrigramWords(final int formatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int unigramProbability = 100;
final int trigramProbability = 150;
final int updatedTrigramProbability = 200;
@@ -440,13 +388,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testFlushDictionary(formatVersion);
- }
- }
-
- private void testFlushDictionary(final int formatVersion) {
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int probability = 100;
@@ -480,13 +422,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testFlushWithGCDictionary() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testFlushWithGCDictionary(formatVersion);
- }
- }
-
- private void testFlushWithGCDictionary(final int formatVersion) {
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final int unigramProbability = 100;
final int bigramProbability = 150;
@@ -516,20 +452,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddBigramWordsAndFlashWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddBigramWordsAndFlashWithGC(formatVersion);
- }
- }
-
- // TODO: Evaluate performance of GC
- private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final ArrayList<String> words = new ArrayList<>();
@@ -575,12 +504,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testRandomOperationsAndFlashWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testRandomOperationsAndFlashWithGC(formatVersion);
- }
- }
-
- private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
final int maxUnigramCount = 5000;
final int maxBigramCount = 10000;
final HashMap<String, String> attributeMap = new HashMap<>();
@@ -596,7 +519,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
+ final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
attributeMap);
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
@@ -675,19 +598,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testAddManyUnigramsAndFlushWithGC() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testAddManyUnigramsAndFlushWithGC(formatVersion);
- }
- }
-
- private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
final ArrayList<String> words = new ArrayList<>();
final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
@@ -716,12 +633,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testUnigramAndBigramCount() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testUnigramAndBigramCount(formatVersion);
- }
- }
-
- private void testUnigramAndBigramCount(final int formatVersion) {
final int maxUnigramCount = 5000;
final int maxBigramCount = 10000;
final HashMap<String, String> attributeMap = new HashMap<>();
@@ -734,7 +645,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramCountPerIteration = 2000;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
- final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
+ final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
attributeMap);
final ArrayList<String> words = new ArrayList<>();
@@ -778,19 +689,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testGetWordProperties() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testGetWordProperties(formatVersion);
- }
- }
-
- private void testGetWordProperties(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
+ final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
@@ -869,19 +774,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
public void testIterateAllWords() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testIterateAllWords(formatVersion);
- }
- }
-
- private void testIterateAllWords(final int formatVersion) {
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
final int UNIGRAM_COUNT = 1000;
final int BIGRAM_COUNT = 1000;
final int codePointSetSize = 20;
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
false /* isBeginningOfSentence */);
@@ -965,123 +864,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(true, wordProperty.mIsPossiblyOffensive);
}
- public void testDictMigration() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
- }
- }
-
- private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion);
- final int unigramProbability = 100;
- addUnigramWord(binaryDictionary, "aaa", unigramProbability);
- addUnigramWord(binaryDictionary, "bbb", unigramProbability);
- final int bigramProbability = 150;
- addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
- binaryDictionary.addUnigramEntry("ccc", unigramProbability,
- false /* isBeginningOfSentence */, false /* isNotAWord */,
- false /* isPossiblyOffensive */, 0 /* timestamp */);
- binaryDictionary.addUnigramEntry("ddd", unigramProbability,
- false /* isBeginningOfSentence */,
- true /* isNotAWord */, true /* isPossiblyOffensive */, 0 /* timestamp */);
- binaryDictionary.addNgramEntry(NgramContext.BEGINNING_OF_SENTENCE,
- "aaa", bigramProbability, 0 /* timestamp */);
- assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
- assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
- assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
- assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
- assertTrue(binaryDictionary.migrateTo(toFormatVersion));
- assertTrue(binaryDictionary.isValidDictionary());
- assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
- assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
- assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
- assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
- assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
- NgramContext.BEGINNING_OF_SENTENCE, "aaa"));
- assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
- WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
- false /* isBeginningOfSentence */);
- wordProperty = binaryDictionary.getWordProperty("ddd",
- false /* isBeginningOfSentence */);
- assertTrue(wordProperty.mIsPossiblyOffensive);
- assertTrue(wordProperty.mIsNotAWord);
- }
-
- public void testLargeDictMigration() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
- }
- }
-
- private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
- final int UNIGRAM_COUNT = 3000;
- final int BIGRAM_COUNT = 3000;
- final int codePointSetSize = 50;
- final long seed = System.currentTimeMillis();
- final Random random = new Random(seed);
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(fromFormatVersion);
-
- final ArrayList<String> words = new ArrayList<>();
- final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
- final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
- final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
-
- for (int i = 0; i < UNIGRAM_COUNT; i++) {
- final String word = CodePointUtils.generateWord(random, codePointSet);
- final int unigramProbability = random.nextInt(0xFF);
- addUnigramWord(binaryDictionary, word, unigramProbability);
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- words.add(word);
- unigramProbabilities.put(word, unigramProbability);
- }
-
- for (int i = 0; i < BIGRAM_COUNT; i++) {
- final int word0Index = random.nextInt(words.size());
- final int word1Index = random.nextInt(words.size());
- if (word0Index == word1Index) {
- continue;
- }
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
- final int unigramProbability = unigramProbabilities.get(word1);
- final int bigramProbability =
- random.nextInt(0xFF - unigramProbability) + unigramProbability;
- addBigramWords(binaryDictionary, word0, word1, bigramProbability);
- if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
- binaryDictionary.flushWithGC();
- }
- final Pair<String, String> bigram = new Pair<>(word0, word1);
- bigrams.add(bigram);
- bigramProbabilities.put(bigram, bigramProbability);
- }
- assertTrue(binaryDictionary.migrateTo(toFormatVersion));
-
- for (final String word : words) {
- assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
- }
- assertEquals(unigramProbabilities.size(), Integer.parseInt(
- binaryDictionary.getPropertyForGettingStats(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
-
- for (final Pair<String, String> bigram : bigrams) {
- assertEquals((int)bigramProbabilities.get(bigram),
- getBigramProbability(binaryDictionary, bigram.first, bigram.second));
- assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
- }
- assertEquals(bigramProbabilities.size(), Integer.parseInt(
- binaryDictionary.getPropertyForGettingStats(BinaryDictionary.BIGRAM_COUNT_QUERY)));
- }
-
public void testBeginningOfSentence() {
- for (final int formatVersion : DICT_FORMAT_VERSIONS) {
- testBeginningOfSentence(formatVersion);
- }
- }
-
- private void testBeginningOfSentence(final int formatVersion) {
- final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(formatVersion);
+ final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
final int dummyProbability = 0;
final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
final int bigramProbability = 200;
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index a432ca740..da1b32a8b 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -43,22 +43,12 @@ public final class BinaryDictIOUtils {
*/
public static DictDecoder getDictDecoder(final File dictFile, final long offset,
final long length, final int bufferType) {
- if (dictFile.isDirectory()) {
- return new Ver4DictDecoder(dictFile);
- } else if (dictFile.isFile()) {
- return new Ver2DictDecoder(dictFile, offset, length, bufferType);
- }
- return null;
+ return new Ver4DictDecoder(dictFile);
}
public static DictDecoder getDictDecoder(final File dictFile, final long offset,
final long length, final DictionaryBufferFactory factory) {
- if (dictFile.isDirectory()) {
- return new Ver4DictDecoder(dictFile);
- } else if (dictFile.isFile()) {
- return new Ver2DictDecoder(dictFile, offset, length, factory);
- }
- return null;
+ return new Ver4DictDecoder(dictFile);
}
public static DictDecoder getDictDecoder(final File dictFile, final long offset,
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
deleted file mode 100644
index 7ee1df92b..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.BinaryDictionary;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-/**
- * An implementation of DictDecoder for version 2 binary dictionary.
- */
-// TODO: Separate logics that are used only for testing.
-@UsedForTesting
-public class Ver2DictDecoder extends AbstractDictDecoder {
- /**
- * A utility class for reading a PtNode.
- */
- static class PtNodeReader {
- static ProbabilityInfo readProbabilityInfo(final DictBuffer dictBuffer) {
- // Ver2 dicts don't contain historical information.
- return new ProbabilityInfo(dictBuffer.readUnsignedByte());
- }
-
- static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
- return dictBuffer.readUnsignedByte();
- }
-
- static int readChildrenAddress(final DictBuffer dictBuffer,
- final int ptNodeFlags) {
- switch (ptNodeFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
- return dictBuffer.readUnsignedByte();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
- return dictBuffer.readUnsignedShort();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
- return dictBuffer.readUnsignedInt24();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
- default:
- return FormatSpec.NO_CHILDREN_ADDRESS;
- }
- }
-
- // Reads shortcuts and returns the read length.
- static int readShortcut(final DictBuffer dictBuffer,
- final ArrayList<WeightedString> shortcutTargets) {
- final int pointerBefore = dictBuffer.position();
- dictBuffer.readUnsignedShort(); // skip the size
- while (true) {
- final int targetFlags = dictBuffer.readUnsignedByte();
- final String word = CharEncoding.readString(dictBuffer);
- shortcutTargets.add(new WeightedString(word,
- targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
- if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
- }
- return dictBuffer.position() - pointerBefore;
- }
-
- static int readBigramAddresses(final DictBuffer dictBuffer,
- final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
- int readLength = 0;
- int bigramCount = 0;
- while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- final int bigramFlags = dictBuffer.readUnsignedByte();
- ++readLength;
- final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
- ? 1 : -1;
- int bigramAddress = baseAddress + readLength;
- switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
- bigramAddress += sign * dictBuffer.readUnsignedByte();
- readLength += 1;
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
- bigramAddress += sign * dictBuffer.readUnsignedShort();
- readLength += 2;
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
- bigramAddress += sign * dictBuffer.readUnsignedInt24();
- readLength += 3;
- break;
- default:
- throw new RuntimeException("Has bigrams with no address");
- }
- bigrams.add(new PendingAttribute(
- bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
- bigramAddress));
- if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
- }
- return readLength;
- }
- }
-
- protected final File mDictionaryBinaryFile;
- protected final long mOffset;
- protected final long mLength;
- // TODO: Remove mBufferFactory and mDictBuffer from this class members because they are now
- // used only for testing.
- private final DictionaryBufferFactory mBufferFactory;
- protected DictBuffer mDictBuffer;
-
- @UsedForTesting
- /* package */ Ver2DictDecoder(final File file, final long offset, final long length,
- final int factoryFlag) {
- mDictionaryBinaryFile = file;
- mOffset = offset;
- mLength = length;
- mDictBuffer = null;
- if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
- mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
- } else if ((factoryFlag & MASK_DICTBUFFER) == USE_BYTEARRAY) {
- mBufferFactory = new DictionaryBufferFromByteArrayFactory();
- } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
- mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
- } else {
- mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
- }
- }
-
- /* package */ Ver2DictDecoder(final File file, final long offset, final long length,
- final DictionaryBufferFactory factory) {
- mDictionaryBinaryFile = file;
- mOffset = offset;
- mLength = length;
- mBufferFactory = factory;
- }
-
- @Override
- public void openDictBuffer() throws FileNotFoundException, IOException {
- mDictBuffer = mBufferFactory.getDictionaryBuffer(mDictionaryBinaryFile);
- }
-
- @Override
- public boolean isDictBufferOpen() {
- return mDictBuffer != null;
- }
-
- /* package */ DictBuffer getDictBuffer() {
- return mDictBuffer;
- }
-
- @UsedForTesting
- /* package */ DictBuffer openAndGetDictBuffer() throws FileNotFoundException, IOException {
- openDictBuffer();
- return getDictBuffer();
- }
-
- @Override
- public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
- // dictType is not being used in dicttool. Passing an empty string.
- final BinaryDictionary binaryDictionary = new BinaryDictionary(
- mDictionaryBinaryFile.getAbsolutePath(), mOffset, mLength,
- true /* useFullEditDistance */, null /* locale */, "" /* dictType */,
- false /* isUpdatable */);
- final DictionaryHeader header = binaryDictionary.getHeader();
- binaryDictionary.close();
- if (header == null) {
- throw new IOException("Cannot read the dictionary header.");
- }
- if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
- header.mFormatOptions.mVersion != FormatSpec.VERSION201 &&
- header.mFormatOptions.mVersion != FormatSpec.VERSION202) {
- throw new UnsupportedFormatException("File header has a wrong version : "
- + header.mFormatOptions.mVersion);
- }
- if (!isDictBufferOpen()) {
- openDictBuffer();
- }
- // Advance buffer reading position to the head of dictionary body.
- setPosition(header.mBodyOffset);
- return header;
- }
-
- // TODO: Make this buffer multi thread safe.
- private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
- @Override
- public PtNodeInfo readPtNode(final int ptNodePos) {
- int addressPointer = ptNodePos;
- final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
- final int characters[];
- if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
- int index = 0;
- int character = CharEncoding.readChar(mDictBuffer);
- addressPointer += CharEncoding.getCharSize(character, null);
- while (FormatSpec.INVALID_CHARACTER != character) {
- // FusionDictionary is making sure that the length of the word is smaller than
- // MAX_WORD_LENGTH.
- // So we'll never write past the end of mCharacterBuffer.
- mCharacterBuffer[index++] = character;
- character = CharEncoding.readChar(mDictBuffer);
- addressPointer += CharEncoding.getCharSize(character, null);
- }
- characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
- } else {
- final int character = CharEncoding.readChar(mDictBuffer);
- addressPointer += CharEncoding.getCharSize(character, null);
- characters = new int[] { character };
- }
- final ProbabilityInfo probabilityInfo;
- if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
- probabilityInfo = PtNodeReader.readProbabilityInfo(mDictBuffer);
- addressPointer += FormatSpec.PTNODE_FREQUENCY_SIZE;
- } else {
- probabilityInfo = null;
- }
- int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags);
- if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
- childrenAddress += addressPointer;
- }
- addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags);
- final ArrayList<WeightedString> shortcutTargets;
- if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
- // readShortcut will add shortcuts to shortcutTargets.
- shortcutTargets = new ArrayList<>();
- addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
- } else {
- shortcutTargets = null;
- }
-
- final ArrayList<PendingAttribute> bigrams;
- if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
- bigrams = new ArrayList<>();
- addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
- addressPointer);
- if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
- + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
- }
- } else {
- bigrams = null;
- }
- return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, probabilityInfo,
- childrenAddress, shortcutTargets, bigrams);
- }
-
- @Override
- public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
- throws FileNotFoundException, IOException, UnsupportedFormatException {
- // dictType is not being used in dicttool. Passing an empty string.
- final BinaryDictionary binaryDictionary = new BinaryDictionary(
- mDictionaryBinaryFile.getAbsolutePath(), 0 /* offset */,
- mDictionaryBinaryFile.length() /* length */, true /* useFullEditDistance */,
- null /* locale */, "" /* dictType */, false /* isUpdatable */);
- final DictionaryHeader header = readHeader();
- final FusionDictionary fusionDict =
- new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions);
- int token = 0;
- final ArrayList<WordProperty> wordProperties = new ArrayList<>();
- do {
- final BinaryDictionary.GetNextWordPropertyResult result =
- binaryDictionary.getNextWordProperty(token);
- final WordProperty wordProperty = result.mWordProperty;
- if (wordProperty == null) {
- binaryDictionary.close();
- if (deleteDictIfBroken) {
- mDictionaryBinaryFile.delete();
- }
- return null;
- }
- wordProperties.add(wordProperty);
- token = result.mNextToken;
- } while (token != 0);
-
- // Insert unigrams into the fusion dictionary.
- for (final WordProperty wordProperty : wordProperties) {
- fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
- wordProperty.mIsNotAWord,
- wordProperty.mIsPossiblyOffensive);
- }
- // Insert bigrams into the fusion dictionary.
- for (final WordProperty wordProperty : wordProperties) {
- if (!wordProperty.mHasNgrams) {
- continue;
- }
- final String word0 = wordProperty.mWord;
- for (final WeightedString bigram : wordProperty.getBigrams()) {
- fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
- }
- }
- binaryDictionary.close();
- return fusionDict;
- }
-
- @Override
- public void setPosition(int newPos) {
- mDictBuffer.position(newPos);
- }
-
- @Override
- public int getPosition() {
- return mDictBuffer.position();
- }
-
- @Override
- public int readPtNodeCount() {
- return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
- }
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java
deleted file mode 100644
index 3882c2c55..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoderTests.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
-import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
-import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFromByteArrayFactory;
-import com.android.inputmethod.latin.makedict.DictDecoder.
- DictionaryBufferFromReadOnlyByteBufferFactory;
-import com.android.inputmethod.latin.makedict.DictDecoder.
- DictionaryBufferFromWritableByteBufferFactory;
-
-import android.test.AndroidTestCase;
-import android.util.Log;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-
-/**
- * Unit tests for Ver2DictDecoder
- */
-public class Ver2DictDecoderTests extends AndroidTestCase {
- private static final String TAG = Ver2DictDecoderTests.class.getSimpleName();
-
- private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-
- // Utilities for testing
- public void writeDataToFile(final File file) {
- FileOutputStream outStream = null;
- try {
- outStream = new FileOutputStream(file);
- outStream.write(data);
- } catch (IOException e) {
- fail ("Can't write data to the test file");
- } finally {
- if (outStream != null) {
- try {
- outStream.close();
- } catch (IOException e) {
- Log.e(TAG, "Failed to close the output stream", e);
- }
- }
- }
- }
-
- public void runTestOpenBuffer(final String testName, final DictionaryBufferFactory factory) {
- File testFile = null;
- try {
- testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
- } catch (IOException e) {
- Log.e(TAG, "IOException while the creating temporary file", e);
- }
-
- assertNotNull(testFile);
- final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(),
- factory);
- try {
- dictDecoder.openDictBuffer();
- } catch (Exception e) {
- Log.e(TAG, "Failed to open the buffer", e);
- }
-
- writeDataToFile(testFile);
-
- try {
- dictDecoder.openDictBuffer();
- } catch (Exception e) {
- Log.e(TAG, "Raised the exception while opening buffer", e);
- }
-
- assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
- }
-
- public void testOpenBufferWithByteBuffer() {
- runTestOpenBuffer("testOpenBufferWithByteBuffer",
- new DictionaryBufferFromReadOnlyByteBufferFactory());
- }
-
- public void testOpenBufferWithByteArray() {
- runTestOpenBuffer("testOpenBufferWithByteArray",
- new DictionaryBufferFromByteArrayFactory());
- }
-
- public void testOpenBufferWithWritableByteBuffer() {
- runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
- new DictionaryBufferFromWritableByteBufferFactory());
- }
-
- public void runTestGetBuffer(final String testName, final DictionaryBufferFactory factory) {
- File testFile = null;
- try {
- testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
- } catch (IOException e) {
- Log.e(TAG, "IOException while the creating temporary file", e);
- }
-
- final Ver2DictDecoder dictDecoder = new Ver2DictDecoder(testFile, 0, testFile.length(),
- factory);
-
- // the default return value of getBuffer() must be null.
- assertNull("the default return value of getBuffer() is not null",
- dictDecoder.getDictBuffer());
-
- writeDataToFile(testFile);
- assertTrue(testFile.exists());
- Log.d(TAG, "file length = " + testFile.length());
-
- DictBuffer dictBuffer = null;
- try {
- dictBuffer = dictDecoder.openAndGetDictBuffer();
- } catch (IOException e) {
- Log.e(TAG, "Failed to open and get the buffer", e);
- }
- assertNotNull("the buffer must not be null", dictBuffer);
-
- for (int i = 0; i < data.length; ++i) {
- assertEquals(data[i], dictBuffer.readUnsignedByte());
- }
- }
-
- public void testGetBufferWithByteBuffer() {
- runTestGetBuffer("testGetBufferWithByteBuffer",
- new DictionaryBufferFromReadOnlyByteBufferFactory());
- }
-
- public void testGetBufferWithByteArray() {
- runTestGetBuffer("testGetBufferWithByteArray",
- new DictionaryBufferFromByteArrayFactory());
- }
-
- public void testGetBufferWithWritableByteBuffer() {
- runTestGetBuffer("testGetBufferWithWritableByteBuffer",
- new DictionaryBufferFromWritableByteBufferFactory());
- }
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
deleted file mode 100644
index c63b972eb..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
-import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils.CodePointTable;
-import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-/**
- * An implementation of DictEncoder for version 2 binary dictionary.
- */
-@UsedForTesting
-public class Ver2DictEncoder implements DictEncoder {
-
- private final File mDictFile;
- private OutputStream mOutStream;
- private byte[] mBuffer;
- private int mPosition;
- private final int mCodePointTableMode;
- public static final int CODE_POINT_TABLE_OFF = 0;
- public static final int CODE_POINT_TABLE_ON = 1;
-
- @UsedForTesting
- public Ver2DictEncoder(final File dictFile, final int codePointTableMode) {
- mDictFile = dictFile;
- mOutStream = null;
- mBuffer = null;
- mCodePointTableMode = codePointTableMode;
- }
-
- // This constructor is used only by BinaryDictOffdeviceUtilsTests.
- // If you want to use this in the production code, you should consider keeping consistency of
- // the interface of Ver3DictDecoder by using factory.
- @UsedForTesting
- public Ver2DictEncoder(final OutputStream outStream) {
- mDictFile = null;
- mOutStream = outStream;
- mCodePointTableMode = CODE_POINT_TABLE_OFF;
- }
-
- private void openStream() throws FileNotFoundException {
- mOutStream = new FileOutputStream(mDictFile);
- }
-
- private void close() throws IOException {
- if (mOutStream != null) {
- mOutStream.close();
- mOutStream = null;
- }
- }
-
- // Package for testing
- static CodePointTable makeCodePointTable(final FusionDictionary dict) {
- final HashMap<Integer, Integer> codePointOccurrenceCounts = new HashMap<>();
- for (final WordProperty word : dict) {
- // Store per code point occurrence
- final String wordString = word.mWord;
- for (int i = 0; i < wordString.length(); ++i) {
- final int codePoint = Character.codePointAt(wordString, i);
- if (codePointOccurrenceCounts.containsKey(codePoint)) {
- codePointOccurrenceCounts.put(codePoint,
- codePointOccurrenceCounts.get(codePoint) + 1);
- } else {
- codePointOccurrenceCounts.put(codePoint, 1);
- }
- }
- }
- final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray =
- new ArrayList<>(codePointOccurrenceCounts.entrySet());
- // Descending order sort by occurrence (value side)
- Collections.sort(codePointOccurrenceArray, new Comparator<Entry<Integer, Integer>>() {
- @Override
- public int compare(final Entry<Integer, Integer> a, final Entry<Integer, Integer> b) {
- if (a.getValue() != b.getValue()) {
- return b.getValue().compareTo(a.getValue());
- }
- return b.getKey().compareTo(a.getKey());
- }
- });
- int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE;
- // Temporary map for writing of nodes
- final HashMap<Integer, Integer> codePointToOneByteCodeMap = new HashMap<>();
- for (final Entry<Integer, Integer> entry : codePointOccurrenceArray) {
- // Put a relation from the original code point to the one byte code.
- codePointToOneByteCodeMap.put(entry.getKey(), currentCodePointTableIndex);
- if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) {
- break;
- }
- }
- // codePointToOneByteCodeMap for writing the trie
- // codePointOccurrenceArray for writing the header
- return new CodePointTable(codePointToOneByteCodeMap, codePointOccurrenceArray);
- }
-
- @Override
- public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
- throws IOException, UnsupportedFormatException {
- // We no longer support anything but the latest version of v2.
- if (formatOptions.mVersion != FormatSpec.VERSION202) {
- throw new UnsupportedFormatException(
- "The given format options has wrong version number : "
- + formatOptions.mVersion);
- }
-
- if (mOutStream == null) {
- openStream();
- }
-
- // Make code point conversion table ordered by occurrence of code points
- // Version 201 or later have codePointTable
- final CodePointTable codePointTable;
- if (mCodePointTableMode == CODE_POINT_TABLE_OFF || formatOptions.mVersion
- < FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE) {
- codePointTable = new CodePointTable();
- } else {
- codePointTable = makeCodePointTable(dict);
- }
-
- BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions,
- codePointTable.mCodePointOccurrenceArray);
-
- // Addresses are limited to 3 bytes, but since addresses can be relative to each node
- // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
- // the order of the PtNode arrays becomes a quite complicated problem, because though the
- // dictionary itself does not have a size limit, each node array must still be within 16MB
- // of all its children and parents. As long as this is ensured, the dictionary file may
- // grow to any size.
-
- // Leave the choice of the optimal node order to the flattenTree function.
- MakedictLog.i("Flattening the tree...");
- ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
-
- MakedictLog.i("Computing addresses...");
- BinaryDictEncoderUtils.computeAddresses(dict, flatNodes,
- codePointTable.mCodePointToOneByteCodeMap);
- MakedictLog.i("Checking PtNode array...");
- if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
-
- // Create a buffer that matches the final dictionary size.
- final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
- final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
- mBuffer = new byte[bufferSize];
-
- MakedictLog.i("Writing file...");
-
- for (PtNodeArray nodeArray : flatNodes) {
- BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray,
- codePointTable.mCodePointToOneByteCodeMap);
- }
- if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
- mOutStream.write(mBuffer, 0, mPosition);
-
- MakedictLog.i("Done");
- close();
- }
-
- @Override
- public void setPosition(final int position) {
- if (mBuffer == null || position < 0 || position >= mBuffer.length) return;
- mPosition = position;
- }
-
- @Override
- public int getPosition() {
- return mPosition;
- }
-
- @Override
- public void writePtNodeCount(final int ptNodeCount) {
- final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
- if (countSize != 1 && countSize != 2) {
- throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
- }
- final int encodedPtNodeCount = (countSize == 2) ?
- (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
- mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount,
- countSize);
- }
-
- private void writePtNodeFlags(final PtNode ptNode,
- final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
- codePointToOneByteCodeMap);
- mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
- BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos),
- FormatSpec.PTNODE_FLAGS_SIZE);
- }
-
- private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars,
- final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
- mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition,
- codePointToOneByteCodeMap);
- if (hasSeveralChars) {
- mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
- }
- }
-
- private void writeFrequency(final int frequency) {
- if (frequency >= 0) {
- mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
- FormatSpec.PTNODE_FREQUENCY_SIZE);
- }
- }
-
- private void writeChildrenPosition(final PtNode ptNode,
- final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
- codePointToOneByteCodeMap);
- mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
- childrenPos);
- }
-
- /**
- * Write a bigram attributes list to mBuffer.
- *
- * @param bigrams the bigram attributes list.
- * @param dict the dictionary the node array is a part of (for relative offsets).
- */
- private void writeBigrams(final ArrayList<WeightedString> bigrams,
- final FusionDictionary dict) {
- if (bigrams == null) return;
-
- final Iterator<WeightedString> bigramIterator = bigrams.iterator();
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int addressOfBigram = target.mCachedAddressAfterUpdate;
- final int unigramFrequencyForThisWord = target.getProbability();
- final int offset = addressOfBigram
- - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
- offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord);
- mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
- Math.abs(offset));
- }
- }
-
- @Override
- public void writePtNode(final PtNode ptNode, final FusionDictionary dict,
- final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
- writePtNodeFlags(ptNode, codePointToOneByteCodeMap);
- writeCharacters(ptNode.mChars, ptNode.hasSeveralChars(), codePointToOneByteCodeMap);
- writeFrequency(ptNode.getProbability());
- writeChildrenPosition(ptNode, codePointToOneByteCodeMap);
- writeBigrams(ptNode.mBigrams, dict);
- }
-}