aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-12-17 18:17:51 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2013-12-17 18:17:51 +0900
commitc2fd53ee0e610c9f143537aa3c5a4b0ab6b14e6a (patch)
treef09cf027bd2560024162b32f52ae7767270a779e /tests/src
parent42334bb49343099e71104a9ab2b1d299a16ebe30 (diff)
downloadlatinime-c2fd53ee0e610c9f143537aa3c5a4b0ab6b14e6a.tar.gz
latinime-c2fd53ee0e610c9f143537aa3c5a4b0ab6b14e6a.tar.xz
latinime-c2fd53ee0e610c9f143537aa3c5a4b0ab6b14e6a.zip
Remove ver4 dict updater.
Change-Id: I468994c98d091be621b9fb3fbe6405c67fc6a465
Diffstat (limited to 'tests/src')
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java31
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java380
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java10
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java50
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java123
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java790
6 files changed, 0 insertions, 1384 deletions
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index 8c5da254b..b5a71f0bf 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -592,35 +592,4 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
Log.d(TAG, result);
}
}
-
- private void runTestDeleteWord(final FormatOptions formatOptions)
- throws IOException, UnsupportedFormatException {
- final String dictName = "testDeleteWord";
- final String dictVersion = Long.toString(System.currentTimeMillis());
- final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
- getContext().getCacheDir());
-
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
- BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion));
- addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
- timeWritingDictToFile(file, dict, formatOptions);
-
- final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
- dictUpdater.getTerminalPosition(sWords.get(0)));
- dictUpdater.deleteWord(sWords.get(0));
- assertEquals(FormatSpec.NOT_VALID_WORD,
- dictUpdater.getTerminalPosition(sWords.get(0)));
-
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
- dictUpdater.getTerminalPosition(sWords.get(5)));
- dictUpdater.deleteWord(sWords.get(5));
- assertEquals(FormatSpec.NOT_VALID_WORD,
- dictUpdater.getTerminalPosition(sWords.get(5)));
- }
-
- public void testDeleteWord() throws IOException, UnsupportedFormatException {
- runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
- runTestDeleteWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
- }
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
deleted file mode 100644
index 9ed50c4b3..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import android.test.AndroidTestCase;
-import android.test.MoreAsserts;
-import android.test.suitebuilder.annotation.LargeTest;
-import android.util.Log;
-
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
-import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.utils.CollectionUtils;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Random;
-
-@LargeTest
-public class BinaryDictIOUtilsTests extends AndroidTestCase {
- private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
-
- private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
- public static final int DEFAULT_MAX_UNIGRAMS = 1500;
- private final int mMaxUnigrams;
-
- private static final String[] CHARACTERS = {
- "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
- "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
- "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
- "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
- "\uD841\uDE28" /* 𠘨 */, "\uD840\uDC0B" /* 𠀋 */, "\uD861\uDED7" /* 𨛗 */ // surrogate pair
- };
-
- public BinaryDictIOUtilsTests() {
- // 1500 is the default max unigrams
- this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
- }
-
- public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
- super();
- Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
- mMaxUnigrams = maxUnigrams;
- final Random random = new Random(seed);
- sWords.clear();
- for (int i = 0; i < maxUnigrams; ++i) {
- sWords.add(generateWord(random.nextInt()));
- }
- }
-
- // Utilities for test
- private String generateWord(final int value) {
- final int lengthOfChars = CHARACTERS.length;
- StringBuilder builder = new StringBuilder("");
- long lvalue = Math.abs((long)value);
- while (lvalue > 0) {
- builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
- lvalue /= lengthOfChars;
- }
- if (builder.toString().equals("")) return "a";
- return builder.toString();
- }
-
- private static void printPtNode(final PtNodeInfo info) {
- Log.d(TAG, " PtNode at " + info.mOriginalAddress);
- Log.d(TAG, " flags = " + info.mFlags);
- Log.d(TAG, " parentAddress = " + info.mParentAddress);
- Log.d(TAG, " characters = " + new String(info.mCharacters, 0,
- info.mCharacters.length));
- if (info.mFrequency != -1) Log.d(TAG, " frequency = " + info.mFrequency);
- if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
- Log.d(TAG, " children address = no children address");
- } else {
- Log.d(TAG, " children address = " + info.mChildrenAddress);
- }
- if (info.mShortcutTargets != null) {
- for (final WeightedString ws : info.mShortcutTargets) {
- Log.d(TAG, " shortcuts = " + ws.mWord);
- }
- }
- if (info.mBigrams != null) {
- for (final PendingAttribute attr : info.mBigrams) {
- Log.d(TAG, " bigram = " + attr.mAddress);
- }
- }
- Log.d(TAG, " end address = " + info.mEndAddress);
- }
-
- private static void printNode(final Ver2DictDecoder dictDecoder,
- final FormatSpec.FormatOptions formatOptions) {
- final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
- Log.d(TAG, "Node at " + dictBuffer.position());
- final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
- Log.d(TAG, " ptNodeCount = " + count);
- for (int i = 0; i < count; ++i) {
- final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
- formatOptions);
- printPtNode(currentInfo);
- }
- if (formatOptions.supportsDynamicUpdate()) {
- final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
- Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
- }
- }
-
- @SuppressWarnings("unused")
- private static void printBinaryFile(final Ver2DictDecoder dictDecoder)
- throws IOException, UnsupportedFormatException {
- final FileHeader fileHeader = dictDecoder.readHeader();
- final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
- while (dictBuffer.position() < dictBuffer.limit()) {
- printNode(dictDecoder, fileHeader.mFormatOptions);
- }
- }
-
- private int getWordPosition(final File file, final String word) {
- int position = FormatSpec.NOT_VALID_WORD;
-
- try {
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file,
- DictDecoder.USE_READONLY_BYTEBUFFER);
- position = dictDecoder.getTerminalPosition(word);
- } catch (IOException e) {
- } catch (UnsupportedFormatException e) {
- }
- return position;
- }
-
- /**
- * Find a word using the DictDecoder.
- *
- * @param dictDecoder the dict decoder
- * @param word the word searched
- * @return the found ptNodeInfo
- * @throws IOException
- * @throws UnsupportedFormatException
- */
- private static PtNodeInfo findWordByDictDecoder(final DictDecoder dictDecoder,
- final String word) throws IOException, UnsupportedFormatException {
- int position = dictDecoder.getTerminalPosition(word);
- if (position != FormatSpec.NOT_VALID_WORD) {
- dictDecoder.setPosition(0);
- final FileHeader header = dictDecoder.readHeader();
- dictDecoder.setPosition(position);
- return dictDecoder.readPtNode(position, header.mFormatOptions);
- }
- return null;
- }
-
- private PtNodeInfo findWordFromFile(final File file, final String word) {
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
- PtNodeInfo info = null;
- try {
- dictDecoder.openDictBuffer();
- info = findWordByDictDecoder(dictDecoder, word);
- } catch (IOException e) {
- } catch (UnsupportedFormatException e) {
- }
- return info;
- }
-
- // return amount of time to insert a word
- private long insertAndCheckWord(final File file, final String word, final int frequency,
- final boolean exist, final ArrayList<WeightedString> bigrams,
- final ArrayList<WeightedString> shortcuts, final FormatOptions formatOptions) {
- long amountOfTime = -1;
- try {
- final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
-
- if (!exist) {
- assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
- }
- final long now = System.nanoTime();
- dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false);
- amountOfTime = System.nanoTime() - now;
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
- } catch (IOException e) {
- Log.e(TAG, "Raised an IOException while inserting a word", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
- }
- return amountOfTime;
- }
-
- private void deleteWord(final File file, final String word, final FormatOptions formatOptions) {
- try {
- final DictUpdater dictUpdater = BinaryDictUtils.getDictUpdater(file, formatOptions);
- dictUpdater.deleteWord(word);
- } catch (IOException e) {
- Log.e(TAG, "Raised an IOException while deleting a word", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Raised an UnsupportedFormatException while deleting a word", e);
- }
- }
-
- private void checkReverseLookup(final File file, final String word, final int position) {
-
- try {
- final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
- final FileHeader fileHeader = dictDecoder.readHeader();
- assertEquals(word,
- BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
- position, fileHeader.mFormatOptions).mWord);
- } catch (IOException e) {
- Log.e(TAG, "Raised an IOException while looking up a word", e);
- } catch (UnsupportedFormatException e) {
- Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
- }
- }
-
- private void runTestInsertWord(final FormatOptions formatOptions) {
- final String testName = "testInsertWord";
- final String version = Long.toString(System.currentTimeMillis());
- final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
- getContext().getCacheDir());
-
- // set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
- BinaryDictUtils.makeDictionaryOptions(testName, version));
- dict.add("abcd", 10, null, false);
-
- try {
- final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
- dictEncoder.writeDictionary(dict, formatOptions);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- } catch (UnsupportedFormatException e) {
- fail("UnsupportedFormatException while writing an initial dictionary : " + e);
- }
-
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
- insertAndCheckWord(file, "abcde", 10, false, null, null, formatOptions);
- checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
-
- insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatOptions);
- checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
-
- insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatOptions);
- checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
-
- // update the existing word.
- insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatOptions);
- checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
-
- // Testing splitOnly
- insertAndCheckWord(file, "ab", 20, false, null, null, formatOptions);
- checkReverseLookup(file, "ab", getWordPosition(file, "ab"));
- checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
- checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
- checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
-
- // Testing splitAndBranch
- insertAndCheckWord(file, "ami", 30, false, null, null, formatOptions);
- checkReverseLookup(file, "ami", getWordPosition(file, "ami"));
- checkReverseLookup(file, "ab", getWordPosition(file, "ab"));
- checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
- checkReverseLookup(file, "abcde", getWordPosition(file, "abcde"));
- checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
- checkReverseLookup(file, "ami", getWordPosition(file, "ami"));
-
- insertAndCheckWord(file, "abcdefzzzz", 40, false, null, null, formatOptions);
- checkReverseLookup(file, "abcdefzzzz", getWordPosition(file, "abcdefzzzz"));
-
- deleteWord(file, "ami", formatOptions);
- assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
-
- insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatOptions);
-
- deleteWord(file, "abcd", formatOptions);
- assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
- }
-
- public void testInsertWord() {
- runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
- runTestInsertWord(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
- }
-
- private void runTestInsertWordWithBigrams(final FormatOptions formatOptions) {
- final String testName = "testInsertWordWithBigrams";
- final String version = Long.toString(System.currentTimeMillis());
- File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
- getContext().getCacheDir());
-
- // set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
- BinaryDictUtils.makeDictionaryOptions(testName, version));
- dict.add("abcd", 10, null, false);
- dict.add("efgh", 15, null, false);
-
- try {
- final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
- dictEncoder.writeDictionary(dict, formatOptions);
- } catch (IOException e) {
- fail("IOException while writing an initial dictionary : " + e);
- } catch (UnsupportedFormatException e) {
- fail("UnsupportedFormatException while writing an initial dictionary : " + e);
- }
-
- final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
- banana.add(new WeightedString("banana", 10));
-
- insertAndCheckWord(file, "banana", 0, false, null, null, formatOptions);
- insertAndCheckWord(file, "recursive", 60, true, banana, null, formatOptions);
-
- final PtNodeInfo info = findWordFromFile(file, "recursive");
- int bananaPos = getWordPosition(file, "banana");
- assertNotNull(info.mBigrams);
- assertEquals(info.mBigrams.size(), 1);
- assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
- }
-
- public void testInsertWordWithBigrams() {
- runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
- runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
- }
-
- private void runTestRandomWords(final FormatOptions formatOptions) {
- final String testName = "testRandomWord";
- final String version = Long.toString(System.currentTimeMillis());
- final File file = BinaryDictUtils.getDictFile(testName, version, formatOptions,
- getContext().getCacheDir());
-
- // set an initial dictionary.
- final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
- BinaryDictUtils.makeDictionaryOptions(testName, version));
- dict.add("initial", 10, null, false);
-
- try {
- final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
- dictEncoder.writeDictionary(dict, formatOptions);
- } catch (IOException e) {
- assertTrue(false);
- } catch (UnsupportedFormatException e) {
- assertTrue(false);
- }
-
- long maxTimeToInsert = 0, sum = 0;
- long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
- int cnt = 0;
- for (final String word : sWords) {
- final long diff = insertAndCheckWord(file, word,
- cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatOptions);
- maxTimeToInsert = Math.max(maxTimeToInsert, diff);
- minTimeToInsert = Math.min(minTimeToInsert, diff);
- sum += diff;
- cnt++;
- }
- cnt = 0;
- for (final String word : sWords) {
- MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
- }
-
- Log.d(TAG, "Test version " + formatOptions.mVersion);
- Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
- Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
- Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
- }
-
- public void testRandomWords() {
- runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
- runTestRandomWords(BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
- }
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
index 67d77e05a..f7a808c1e 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictUtils.java
@@ -69,14 +69,4 @@ public class BinaryDictUtils {
+ formatOptions.mVersion);
}
}
-
- public static DictUpdater getDictUpdater(final File file, final FormatOptions formatOptions)
- throws UnsupportedFormatException {
- if (formatOptions.mVersion == FormatSpec.VERSION4) {
- return new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
- } else {
- throw new UnsupportedFormatException("The format option has a wrong version : "
- + formatOptions.mVersion);
- }
- }
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java
deleted file mode 100644
index 709ea3310..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/DictUpdater.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-/**
- * An interface of a binary dictionary updater.
- */
-public interface DictUpdater extends DictDecoder {
-
- /**
- * Deletes the word from the binary dictionary.
- *
- * @param word the word to be deleted.
- */
- public void deleteWord(final String word) throws IOException, UnsupportedFormatException;
-
- /**
- * Inserts a word into a binary dictionary.
- *
- * @param word the word to be inserted.
- * @param frequency the frequency of the new word.
- * @param bigramStrings bigram list, or null if none.
- * @param shortcuts shortcut list, or null if none.
- * @param isBlackListEntry whether this should be a blacklist entry.
- */
- // TODO: Support batch insertion.
- public void insertWord(final String word, final int frequency,
- final ArrayList<WeightedString> bigramStrings,
- final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
- final boolean isBlackListEntry) throws IOException, UnsupportedFormatException;
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java
deleted file mode 100644
index 4518f21b9..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * An auxiliary class for updating data associated with SparseTable.
- */
-public class SparseTableContentUpdater extends SparseTableContentReader {
- protected OutputStream mLookupTableOutStream;
- protected OutputStream[] mAddressTableOutStreams;
- protected OutputStream[] mContentOutStreams;
-
- public SparseTableContentUpdater(final String name, final int blockSize,
- final File baseDir, final String[] contentFilenames, final String[] contentIds,
- final DictionaryBufferFactory factory) {
- super(name, blockSize, baseDir, contentFilenames, contentIds, factory);
- mAddressTableOutStreams = new OutputStream[mContentCount];
- mContentOutStreams = new OutputStream[mContentCount];
- }
-
- protected void openStreamsAndBuffers() throws IOException {
- openBuffers();
- mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */);
- for (int i = 0; i < mContentCount; ++i) {
- mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i],
- true /* append */);
- mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */);
- }
- }
-
- /**
- * Set the contentIndex-th elements of contentId-th table.
- *
- * @param contentId the id of the content table.
- * @param contentIndex the index where to set the valie.
- * @param value the value to set.
- */
- protected void setContentValue(final int contentId, final int contentIndex, final int value)
- throws IOException {
- if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
- >= mLookupTableBuffer.limit()) {
- // Need to extend the lookup table
- final int currentSize = mLookupTableBuffer.limit()
- / SparseTable.SIZE_OF_INT_IN_BYTES;
- final int target = contentIndex / mBlockSize + 1;
- for (int i = currentSize; i < target; ++i) {
- BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream,
- SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
- }
- // We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in
- // Java isn't expanded automatically when the underlying file is expanded.
- reopenLookupTable();
- }
-
- mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
- int posInAddressTable = mLookupTableBuffer.readInt();
- if (posInAddressTable == SparseTable.NOT_EXIST) {
- // Need to extend the address table
- mLookupTableBuffer.position(mLookupTableBuffer.position()
- - SparseTable.SIZE_OF_INT_IN_BYTES);
- posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize;
- BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer,
- posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES);
- for (int i = 0; i < mContentCount; ++i) {
- for (int j = 0; j < mBlockSize; ++j) {
- BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i],
- SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
- }
- }
- // We need to reopen the byte buffers of the address tables because a MappedByteBuffer
- // in Java isn't expanded automatically when the underlying file is expanded.
- reopenAddressTables();
- }
- posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES;
-
- mAddressTableBuffers[contentId].position(posInAddressTable);
- BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId],
- value, SparseTable.SIZE_OF_INT_IN_BYTES);
- }
-
- private void reopenLookupTable() throws IOException {
- mLookupTableOutStream.flush();
- mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
- }
-
- private void reopenAddressTables() throws IOException {
- for (int i = 0; i < mContentCount; ++i) {
- mAddressTableOutStreams[i].flush();
- mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
- }
- }
-
- protected void close() throws IOException {
- mLookupTableOutStream.close();
- for (final OutputStream stream : mAddressTableOutStreams) {
- stream.close();
- }
- for (final OutputStream stream : mContentOutStreams) {
- stream.close();
- }
- }
-}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
deleted file mode 100644
index 119755ff3..000000000
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.inputmethod.latin.makedict;
-
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
-import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.utils.CollectionUtils;
-
-import android.util.Log;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-
-/**
- * An implementation of DictUpdater for version 4 binary dictionary.
- */
-public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
- private static final String TAG = Ver4DictUpdater.class.getSimpleName();
- private static final int MAX_JUMPS = 10000;
-
- private OutputStream mDictStream;
- private final File mFrequencyFile;
-
- public Ver4DictUpdater(final File dictDirectory, final int factoryType)
- throws UnsupportedFormatException {
- // DictUpdater must have an updatable DictBuffer.
- super(dictDirectory, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
- ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
- mFrequencyFile = getFile(FILETYPE_FREQUENCY);
- }
-
- private static class BigramContentUpdater extends SparseTableContentUpdater {
- public BigramContentUpdater(final String name, final File baseDir,
- final boolean hasTimestamp) {
- super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- BigramContentReader.getContentFilenames(name, hasTimestamp),
- BigramContentReader.getContentIds(hasTimestamp),
- new DictionaryBufferFromWritableByteBufferFactory());
- }
-
- public void insertBigramEntries(final int terminalId, final int frequency,
- final ArrayList<PendingAttribute> entries) throws IOException {
- if (terminalId < 0) {
- throw new RuntimeException("Invalid terminal id : " + terminalId);
- }
- openStreamsAndBuffers();
-
- if (entries == null || entries.isEmpty()) {
- setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
- SparseTable.NOT_EXIST);
- return;
- }
- final int positionOfEntries =
- (int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length();
- setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries);
-
- final Iterator<PendingAttribute> bigramIterator = entries.iterator();
- while (bigramIterator.hasNext()) {
- final PendingAttribute entry = bigramIterator.next();
- final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
- 0 /* offset */, entry.mFrequency, frequency, "" /* word */);
- BinaryDictEncoderUtils.writeUIntToStream(
- mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(
- mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress,
- FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
- }
- close();
- }
- }
-
- private static class ShortcutContentUpdater extends SparseTableContentUpdater {
- public ShortcutContentUpdater(final String name, final File baseDir) {
- super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
- FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
- new String[] { FormatSpec.SHORTCUT_CONTENT_ID },
- new DictionaryBufferFromWritableByteBufferFactory());
- }
-
- public void insertShortcuts(final int terminalId,
- final ArrayList<WeightedString> shortcuts) throws IOException {
- if (terminalId < 0) {
- throw new RuntimeException("Invalid terminal id : " + terminalId);
- }
- openStreamsAndBuffers();
- if (shortcuts == null || shortcuts.isEmpty()) {
- setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
- SparseTable.NOT_EXIST);
- return;
- }
-
- final int positionOfShortcuts =
- (int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length();
- setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts);
-
- final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
- while (shortcutIterator.hasNext()) {
- final WeightedString target = shortcutIterator.next();
- final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
- shortcutIterator.hasNext(), target.mFrequency);
- BinaryDictEncoderUtils.writeUIntToStream(
- mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX],
- target.mWord);
- }
- close();
- }
- }
-
- @Override
- public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
- if (mDictBuffer == null) {
- openDictBuffer();
- readHeader();
- }
- final int wordPos = getTerminalPosition(word);
- if (wordPos != FormatSpec.NOT_VALID_WORD) {
- mDictBuffer.position(wordPos);
- final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- mDictBuffer.position(wordPos);
- mDictBuffer.put((byte)markAsDeleted(flags));
- }
- }
-
- private int getNewTerminalId() {
- // The size of frequency file is FormatSpec.FREQUENCY_AND_FLAGS_SIZE * number of terminals
- // because each terminal always has a frequency.
- // So we can get a fresh terminal id by this logic.
- // CAVEAT: we are reading the file size from the disk each time: beware of race conditions,
- // even on one thread.
- return (int) (mFrequencyFile.length() / FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- }
-
- private void updateParentPosIfNotMoved(final int nodePos, final int newParentPos,
- final FormatOptions formatOptions) {
- final int originalPos = getPosition();
- setPosition(nodePos);
- final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- if (!BinaryDictIOUtils.isMovedPtNode(flags, formatOptions)) {
- final int parentOffset = newParentPos - nodePos;
- BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, parentOffset);
- }
- setPosition(originalPos);
- }
-
- private void updateParentPositions(final int nodeArrayPos, final int newParentPos,
- final FormatOptions formatOptions) {
- final int originalPos = mDictBuffer.position();
- mDictBuffer.position(nodeArrayPos);
- int jumpCount = 0;
- do {
- final int count = readPtNodeCount();
- for (int i = 0; i < count; ++i) {
- updateParentPosIfNotMoved(getPosition(), newParentPos, formatOptions);
- skipPtNode(formatOptions);
- }
- if (!readAndFollowForwardLink()) break;
- } while (jumpCount++ < MAX_JUMPS);
- setPosition(originalPos);
- }
-
- private void updateChildrenPos(final int nodePos, final int newChildrenPos,
- final FormatOptions options) {
- final int originalPos = getPosition();
- setPosition(nodePos);
- final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- PtNodeReader.readParentAddress(mDictBuffer, options);
- BinaryDictIOUtils.skipString(mDictBuffer,
- (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
- if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer);
- final int basePos = getPosition();
- BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newChildrenPos - basePos);
- setPosition(originalPos);
- }
-
- private void updateTerminalPosition(final int terminalId, final int position) {
- if (terminalId == PtNode.NOT_A_TERMINAL
- || terminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE
- >= mTerminalAddressTableBuffer.limit()) return;
- mTerminalAddressTableBuffer.position(terminalId
- * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- BinaryDictEncoderUtils.writeUIntToDictBuffer(mTerminalAddressTableBuffer, position,
- FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- }
-
- private void updateForwardLink(final int nodeArrayPos, final int newForwardLink,
- final FormatOptions formatOptions) {
- final int originalPos = getPosition();
- setPosition(nodeArrayPos);
- int jumpCount = 0;
- while (jumpCount++ < MAX_JUMPS) {
- final int ptNodeCount = readPtNodeCount();
- for (int i = 0; i < ptNodeCount; ++i) {
- skipPtNode(formatOptions);
- }
- final int forwardLinkPos = getPosition();
- if (!readAndFollowForwardLink()) {
- setPosition(forwardLinkPos);
- BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, newForwardLink - forwardLinkPos);
- break;
- }
- }
- setPosition(originalPos);
- }
-
- private void markPtNodeAsMoved(final int nodePos, final int newNodePos,
- final FormatOptions options) {
- final int originalPos = getPosition();
- updateParentPosIfNotMoved(nodePos, newNodePos, options);
- setPosition(nodePos);
- final int currentFlags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- setPosition(nodePos);
- mDictBuffer.put((byte) (FormatSpec.FLAG_IS_MOVED
- | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
- final int offset = newNodePos - nodePos;
- BinaryDictIOUtils.writeSInt24ToBuffer(mDictBuffer, offset);
- setPosition(originalPos);
- }
-
- /**
- * Writes a PtNode to an output stream from a Ver4PtNodeInfo.
- *
- * @param nodePos the position of the head of the PtNode.
- * @param info the PtNode info to be written.
- * @return the size written, in bytes.
- */
- private int writePtNode(final int nodePos, final Ver4PtNodeInfo info) throws IOException {
- int written = 0;
-
- // Write flags.
- mDictStream.write((byte) (info.mFlags & 0xFF));
- written += FormatSpec.PTNODE_FLAGS_SIZE;
-
- // Write the parent position.
- final int parentOffset = info.mParentPos == FormatSpec.NO_PARENT_ADDRESS ?
- FormatSpec.NO_PARENT_ADDRESS : info.mParentPos - nodePos;
- BinaryDictIOUtils.writeSInt24ToStream(mDictStream, parentOffset);
- written += FormatSpec.PARENT_ADDRESS_SIZE;
-
- // Write a string.
- if (((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0)
- != (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters > 1)) {
- throw new RuntimeException("Inconsistent flags : hasMultipleChars = "
- + ((info.mFlags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0) + ", length = "
- + (info.mEndIndexOfCharacters - info.mStartIndexOfCharacters));
- }
- written += CharEncoding.writeCodePoints(mDictStream, info.mCharacters,
- info.mStartIndexOfCharacters, info.mEndIndexOfCharacters);
-
- // Write the terminal id.
- if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
- BinaryDictEncoderUtils.writeUIntToStream(mDictStream, info.mTerminalId,
- FormatSpec.PTNODE_TERMINAL_ID_SIZE);
- written += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
- }
-
- // Write the children position.
- final int childrenOffset = info.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS
- ? 0 : info.mChildrenPos - (nodePos + written);
- BinaryDictIOUtils.writeSInt24ToStream(mDictStream, childrenOffset);
- written += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
-
- return written;
- }
-
- /**
- * Helper method to split and move PtNode.
- *
- * @param ptNodeArrayPos the position of PtNodeArray which contains the split and moved PtNode.
- * @param splittedPtNodeToMovePos the position of the split and moved PtNode.
- * @param newParent the parent PtNode after splitting.
- * @param newChildren the children PtNodes after splitting.
- * @param newParentStartPos where to write the new parent.
- * @param formatOptions the format options.
- */
- private void writeSplittedPtNodes(final int ptNodeArrayPos, final int splittedPtNodeToMovePos,
- final Ver4PtNodeInfo newParent, final Ver4PtNodeInfo[] newChildren,
- final int newParentStartPos,
- final FormatOptions formatOptions) throws IOException {
- updateTerminalPosition(newParent.mTerminalId,
- newParentStartPos + 1 /* size of PtNodeCount */);
- int written = writePtNodeArray(newParentStartPos, new Ver4PtNodeInfo[] { newParent },
- FormatSpec.NO_FORWARD_LINK_ADDRESS);
- final int childrenStartPos = newParentStartPos + written;
- writePtNodeArray(childrenStartPos, newChildren, FormatSpec.NO_FORWARD_LINK_ADDRESS);
- int childrenNodePos = childrenStartPos + 1 /* size of PtNodeCount */;
- for (final Ver4PtNodeInfo info : newChildren) {
- updateTerminalPosition(info.mTerminalId, childrenNodePos);
- childrenNodePos += computePtNodeSize(info.mCharacters, info.mStartIndexOfCharacters,
- info.mEndIndexOfCharacters,
- (info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0);
- }
-
- // Mark as moved.
- markPtNodeAsMoved(splittedPtNodeToMovePos, newParentStartPos + 1 /* size of PtNodeCount */,
- formatOptions);
- updateForwardLink(ptNodeArrayPos, newParentStartPos, formatOptions);
- }
-
- /**
- * Writes a node array to the stream.
- *
- * @param nodeArrayPos the position of the head of the node array.
- * @param infos an array of Ver4PtNodeInfo to be written.
- * @return the written length in bytes.
- */
- private int writePtNodeArray(final int nodeArrayPos, final Ver4PtNodeInfo[] infos,
- final int forwardLink) throws IOException {
- int written = BinaryDictIOUtils.writePtNodeCount(mDictStream, infos.length);
- for (int i = 0; i < infos.length; ++i) {
- written += writePtNode(nodeArrayPos + written, infos[i]);
- }
- BinaryDictIOUtils.writeSInt24ToStream(mDictStream, forwardLink);
- written += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
- return written;
- }
-
- private int computePtNodeSize(final int[] codePoints, final int startIndex, final int endIndex,
- final boolean isTerminal) {
- return FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
- + CharEncoding.getCharArraySize(codePoints, startIndex, endIndex)
- + (endIndex - startIndex > 1 ? FormatSpec.PTNODE_TERMINATOR_SIZE : 0)
- + (isTerminal ? FormatSpec.PTNODE_TERMINAL_ID_SIZE : 0)
- + FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
- }
-
- private void writeNewSinglePtNodeWithAttributes(final int[] codePoints,
- final boolean hasShortcuts, final int terminalId, final boolean hasBigrams,
- final boolean isNotAWord, final boolean isBlackListEntry, final int parentPos,
- final FormatOptions formatOptions) throws IOException {
- final int newNodeArrayPos = mDictBuffer.limit();
- final int newNodeFlags = BinaryDictEncoderUtils.makePtNodeFlags(codePoints.length > 1,
- terminalId != PtNode.NOT_A_TERMINAL, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts,
- hasBigrams, isNotAWord, isBlackListEntry, formatOptions);
- final Ver4PtNodeInfo info = new Ver4PtNodeInfo(newNodeFlags, codePoints, terminalId,
- FormatSpec.NO_CHILDREN_ADDRESS, parentPos, 0 /* nodeSize */);
- writePtNodeArray(newNodeArrayPos, new Ver4PtNodeInfo[] { info },
- FormatSpec.NO_FORWARD_LINK_ADDRESS);
- }
-
- private int setMultipleCharsInFlags(final int currentFlags, final boolean hasMultipleChars) {
- final int flags;
- if (hasMultipleChars) {
- flags = currentFlags | FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
- } else {
- flags = currentFlags & (~FormatSpec.FLAG_HAS_MULTIPLE_CHARS);
- }
- return flags;
- }
-
- private int setIsNotAWordInFlags(final int currentFlags, final boolean isNotAWord) {
- final int flags;
- if (isNotAWord) {
- flags = currentFlags | FormatSpec.FLAG_IS_NOT_A_WORD;
- } else {
- flags = currentFlags & (~FormatSpec.FLAG_IS_NOT_A_WORD);
- }
- return flags;
- }
-
- private int setIsBlackListEntryInFlags(final int currentFlags, final boolean isBlackListEntry) {
- final int flags;
- if (isBlackListEntry) {
- flags = currentFlags | FormatSpec.FLAG_IS_BLACKLISTED;
- } else {
- flags = currentFlags & (~FormatSpec.FLAG_IS_BLACKLISTED);
- }
- return flags;
- }
-
- /**
- * Splits a PtNode.
- *
- * abcd - ef
- *
- * -> inserting "abc"
- *
- * abc - d - ef
- *
- * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split.
- * @param nodeToSplitPos the position of the PtNode to split.
- * @param nodeToSplitInfo the information of the PtNode to split.
- * @param indexToSplit the index where to split in the code points array.
- * @param parentOfNodeToSplitPos the absolute position of a parent of the node to split.
- * @param newTerminalId the terminal id of the inserted node (corresponds to "d").
- * @param hasShortcuts whether the inserted word should have shortcuts.
- * @param hasBigrams whether the inserted word should have bigrams.
- * @param isNotAWord whether the inserted word should be not a word.
- * @param isBlackListEntry whether the inserted word should be a black list entry.
- * @param formatOptions the format options.
- */
- private void splitOnly(final int nodeArrayToSplitPos, final int nodeToSplitPos,
- final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit,
- final int parentOfNodeToSplitPos, final int newTerminalId, final boolean hasShortcuts,
- final boolean hasBigrams, final boolean isNotAWord, final boolean isBlackListEntry,
- final FormatOptions formatOptions) throws IOException {
- final int parentNodeArrayStartPos = mDictBuffer.limit();
- final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
- final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(indexToSplit > 1,
- true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams,
- isNotAWord, isBlackListEntry, formatOptions);
- final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags,
- nodeToSplitInfo.mCharacters, newTerminalId, parentNodeStartPos
- + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, true)
- + FormatSpec.FORWARD_LINK_ADDRESS_SIZE,
- parentOfNodeToSplitPos, 0 /* nodeSize */);
- parentInfo.mStartIndexOfCharacters = 0;
- parentInfo.mEndIndexOfCharacters = indexToSplit;
-
- // Write the child.
- final int childrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags,
- nodeToSplitInfo.mCharacters.length - indexToSplit > 1);
- final Ver4PtNodeInfo childrenInfo = new Ver4PtNodeInfo(childrenFlags,
- nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId,
- nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */);
- childrenInfo.mStartIndexOfCharacters = indexToSplit;
- childrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length;
- if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
- updateParentPositions(nodeToSplitInfo.mChildrenPos,
- parentInfo.mChildrenPos + 1 /* size of PtNodeCount */, formatOptions);
- }
-
- writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo,
- new Ver4PtNodeInfo[] { childrenInfo }, parentNodeArrayStartPos, formatOptions);
- }
-
- /**
- * Split and branch a PtNode.
- *
- * ab - cd
- *
- * -> inserting "ac"
- *
- * a - b - cd
- * |
- * - c
- *
- * @param nodeArrayToSplitPos the position of PtNodeArray which contains the PtNode to split.
- * @param nodeToSplitPos the position of the PtNode to split.
- * @param nodeToSplitInfo the information of the PtNode to split.
- * @param indexToSplit the index where to split in the code points array.
- * @param parentOfNodeToSplitPos the absolute position of parent of the node to split.
- * @param newWordSuffixCodePoints the suffix of the newly inserted word (corresponds to "c").
- * @param startIndexOfNewWordSuffixCodePoints the start index in newWordSuffixCodePoints where
- * the suffix starts.
- * @param newTerminalId the terminal id of the inserted node (correspond to "c").
- * @param hasShortcuts whether the inserted word should have shortcuts.
- * @param hasBigrams whether the inserted word should have bigrams.
- * @param isNotAWord whether the inserted word should be not a word.
- * @param isBlackListEntry whether the inserted word should be a black list entry.
- * @param formatOptions the format options.
- */
- private void splitAndBranch(final int nodeArrayToSplitPos, final int nodeToSplitPos,
- final Ver4PtNodeInfo nodeToSplitInfo, final int indexToSplit,
- final int parentOfNodeToSplitPos, final int[] newWordSuffixCodePoints,
- final int startIndexOfNewWordSuffixCodePoints,
- final int newTerminalId,
- final boolean hasShortcuts, final boolean hasBigrams, final boolean isNotAWord,
- final boolean isBlackListEntry, final FormatOptions formatOptions) throws IOException {
- final int parentNodeArrayStartPos = mDictBuffer.limit();
- final int parentNodeStartPos = parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
- final int parentFlags = BinaryDictEncoderUtils.makePtNodeFlags(
- indexToSplit > 1,
- false /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED,
- false /* hasShortcut */, false /* hasBigrams */,
- false /* isNotAWord */, false /* isBlackListEntry */, formatOptions);
- final Ver4PtNodeInfo parentInfo = new Ver4PtNodeInfo(parentFlags,
- nodeToSplitInfo.mCharacters, PtNode.NOT_A_TERMINAL,
- parentNodeStartPos
- + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false)
- + FormatSpec.FORWARD_LINK_ADDRESS_SIZE,
- parentOfNodeToSplitPos, 0 /* nodeSize */);
- parentInfo.mStartIndexOfCharacters = 0;
- parentInfo.mEndIndexOfCharacters = indexToSplit;
-
- final int childrenNodeArrayStartPos = parentNodeStartPos
- + computePtNodeSize(nodeToSplitInfo.mCharacters, 0, indexToSplit, false)
- + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
- final int firstChildrenFlags = BinaryDictEncoderUtils.makePtNodeFlags(
- newWordSuffixCodePoints.length - startIndexOfNewWordSuffixCodePoints > 1,
- true /* isTerminal */, FormatSpec.FLAG_IS_NOT_MOVED, hasShortcuts, hasBigrams,
- isNotAWord, isBlackListEntry, formatOptions);
- final Ver4PtNodeInfo firstChildrenInfo = new Ver4PtNodeInfo(firstChildrenFlags,
- newWordSuffixCodePoints, newTerminalId,
- FormatSpec.NO_CHILDREN_ADDRESS, parentNodeStartPos,
- 0 /* nodeSize */);
- firstChildrenInfo.mStartIndexOfCharacters = startIndexOfNewWordSuffixCodePoints;
- firstChildrenInfo.mEndIndexOfCharacters = newWordSuffixCodePoints.length;
-
- final int secondChildrenStartPos = childrenNodeArrayStartPos + 1 /* size of ptNodeCount */
- + computePtNodeSize(newWordSuffixCodePoints, startIndexOfNewWordSuffixCodePoints,
- newWordSuffixCodePoints.length, true /* isTerminal */);
- final int secondChildrenFlags = setMultipleCharsInFlags(nodeToSplitInfo.mFlags,
- nodeToSplitInfo.mCharacters.length - indexToSplit > 1);
- final Ver4PtNodeInfo secondChildrenInfo = new Ver4PtNodeInfo(secondChildrenFlags,
- nodeToSplitInfo.mCharacters, nodeToSplitInfo.mTerminalId,
- nodeToSplitInfo.mChildrenPos, parentNodeStartPos, 0 /* nodeSize */);
- secondChildrenInfo.mStartIndexOfCharacters = indexToSplit;
- secondChildrenInfo.mEndIndexOfCharacters = nodeToSplitInfo.mCharacters.length;
- if (nodeToSplitInfo.mChildrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
- updateParentPositions(nodeToSplitInfo.mChildrenPos, secondChildrenStartPos,
- formatOptions);
- }
-
- writeSplittedPtNodes(nodeArrayToSplitPos, nodeToSplitPos, parentInfo,
- new Ver4PtNodeInfo[] { firstChildrenInfo, secondChildrenInfo },
- parentNodeArrayStartPos, formatOptions);
- }
-
- /**
- * Inserts a word into the trie file and returns the position of inserted terminal node.
- * If the insertion is failed, returns FormatSpec.NOT_VALID_WORD.
- */
- private int insertWordToTrie(final String word, final int newTerminalId,
- final boolean isNotAWord, final boolean isBlackListEntry, final boolean hasBigrams,
- final boolean hasShortcuts) throws IOException, UnsupportedFormatException {
- setPosition(0);
- final FileHeader header = readHeader();
-
- final int[] codePoints = FusionDictionary.getCodePoints(word);
- final int wordLen = codePoints.length;
-
- int wordPos = 0;
- for (int depth = 0; depth < FormatSpec.MAX_WORD_LENGTH; /* nop */) {
- final int nodeArrayPos = getPosition();
- final int ptNodeCount = readPtNodeCount();
- boolean goToChildren = false;
- int parentPos = FormatSpec.NO_PARENT_ADDRESS;
- for (int i = 0; i < ptNodeCount; ++i) {
- final int nodePos = getPosition();
- final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(nodePos, header.mFormatOptions);
- if (BinaryDictIOUtils.isMovedPtNode(nodeInfo.mFlags, header.mFormatOptions)) {
- continue;
- }
- if (nodeInfo.mParentPos != FormatSpec.NO_PARENT_ADDRESS) {
- parentPos = nodePos + nodeInfo.mParentPos;
- }
-
- final boolean firstCharacterMatched =
- codePoints[wordPos] == nodeInfo.mCharacters[0];
- boolean allCharactersMatched = true;
- int firstDifferentCharacterIndex = -1;
- for (int p = 0; p < nodeInfo.mCharacters.length; ++p) {
- if (wordPos + p >= codePoints.length) break;
- if (codePoints[wordPos + p] != nodeInfo.mCharacters[p]) {
- if (firstDifferentCharacterIndex == -1) {
- firstDifferentCharacterIndex = p;
- }
- allCharactersMatched = false;
- }
- }
-
- if (!firstCharacterMatched) {
- // Go to the next sibling node.
- continue;
- }
-
- if (!allCharactersMatched) {
- final int parentNodeArrayStartPos = mDictBuffer.limit();
- splitAndBranch(nodeArrayPos, nodePos, nodeInfo, firstDifferentCharacterIndex,
- parentPos, codePoints, wordPos + firstDifferentCharacterIndex,
- newTerminalId, hasShortcuts, hasBigrams, isNotAWord,
- isBlackListEntry, header.mFormatOptions);
-
- return parentNodeArrayStartPos + computePtNodeSize(codePoints, wordPos,
- wordPos + firstDifferentCharacterIndex, false)
- + FormatSpec.FORWARD_LINK_ADDRESS_SIZE + 1 /* size of PtNodeCount */;
- }
-
- if (wordLen - wordPos < nodeInfo.mCharacters.length) {
- final int parentNodeArrayStartPos = mDictBuffer.limit();
- splitOnly(nodeArrayPos, nodePos, nodeInfo, wordLen - wordPos, parentPos,
- newTerminalId, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry,
- header.mFormatOptions);
-
- // Return the position of the inserted word.
- return parentNodeArrayStartPos + 1 /* size of PtNodeCount */;
- }
-
- wordPos += nodeInfo.mCharacters.length;
- if (wordPos == wordLen) {
- // This dictionary already contains the word.
- Log.e(TAG, "Something went wrong. If the word is already contained, "
- + " there is no need to insert new PtNode.");
- return FormatSpec.NOT_VALID_WORD;
- }
- if (nodeInfo.mChildrenPos == FormatSpec.NO_CHILDREN_ADDRESS) {
- // There are no children.
- // We need to add a new node as a child of this node.
- final int newNodeArrayPos = mDictBuffer.limit();
- final int[] newNodeCodePoints = Arrays.copyOfRange(codePoints, wordPos,
- codePoints.length);
- writeNewSinglePtNodeWithAttributes(newNodeCodePoints, hasShortcuts,
- newTerminalId, hasBigrams, isNotAWord, isBlackListEntry, nodePos,
- header.mFormatOptions);
- updateChildrenPos(nodePos, newNodeArrayPos, header.mFormatOptions);
- return newNodeArrayPos + 1 /* size of PtNodeCount */;
- } else {
- // Found the matched node.
- // Go to the children of this node.
- setPosition(nodeInfo.mChildrenPos);
- goToChildren = true;
- depth++;
- break;
- }
- }
-
- if (goToChildren) continue;
- if (!readAndFollowForwardLink()) {
- // Add a new node that contains [wordPos, word.length()-1].
- // and update the forward link.
- final int newNodeArrayPos = mDictBuffer.limit();
- final int[] newCodePoints = Arrays.copyOfRange(codePoints, wordPos,
- codePoints.length);
- writeNewSinglePtNodeWithAttributes(newCodePoints, hasShortcuts, newTerminalId,
- hasBigrams, isNotAWord, isBlackListEntry, parentPos, header.mFormatOptions);
- updateForwardLink(nodeArrayPos, newNodeArrayPos, header.mFormatOptions);
- return newNodeArrayPos + 1 /* size of PtNodeCount */;
- }
- }
- return FormatSpec.NOT_VALID_WORD;
- }
-
- private void updateFrequency(final int terminalId, final int frequency) {
- mFrequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToDictBuffer(mFrequencyBuffer, frequency,
- FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- }
-
- private void insertFrequency(final int frequency) throws IOException {
- final OutputStream frequencyStream = new FileOutputStream(mFrequencyFile,
- true /* append */);
- BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency,
- FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- frequencyStream.close();
- }
-
- private void insertTerminalPosition(final int posOfTerminal) throws IOException,
- UnsupportedFormatException {
- final OutputStream terminalPosStream = new FileOutputStream(
- getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */);
- BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal,
- FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- terminalPosStream.close();
- }
-
- private void insertBigrams(final int terminalId, final int frequency,
- final ArrayList<PendingAttribute> bigramAddresses)
- throws IOException, UnsupportedFormatException {
- openDictBuffer();
- final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(),
- mDictDirectory, false);
-
- // Convert addresses to terminal ids.
- final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
- mDictBuffer.position(0);
- final FileHeader header = readHeader();
- for (PendingAttribute attr : bigramAddresses) {
- mDictBuffer.position(attr.mAddress);
- final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions);
- if (info.mTerminalId == PtNode.NOT_A_TERMINAL) {
- throw new RuntimeException("We can't have a bigram target that's not a terminal.");
- }
- bigrams.add(new PendingAttribute(frequency, info.mTerminalId));
- }
- updater.insertBigramEntries(terminalId, frequency, bigrams);
- close();
- }
-
- private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts)
- throws IOException {
- final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(),
- mDictDirectory);
- updater.insertShortcuts(terminalId, shortcuts);
- }
-
- private void openBuffersAndStream() throws IOException, UnsupportedFormatException {
- openDictBuffer();
- mDictStream = new FileOutputStream(getFile(FILETYPE_TRIE), true /* append */);
- }
-
- private void close() throws IOException {
- if (mDictStream != null) {
- mDictStream.close();
- mDictStream = null;
- }
- mDictBuffer = null;
- mFrequencyBuffer = null;
- mTerminalAddressTableBuffer = null;
- }
-
- private void updateAttributes(final int posOfWord, final int frequency,
- final ArrayList<WeightedString> bigramStrings,
- final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
- final boolean isBlackListEntry) throws IOException, UnsupportedFormatException {
- mDictBuffer.position(0);
- final FileHeader header = readHeader();
- mDictBuffer.position(posOfWord);
- final Ver4PtNodeInfo info = readVer4PtNodeInfo(posOfWord, header.mFormatOptions);
- final int terminalId = info.mTerminalId;
-
- // Update the flags.
- final int newFlags = setIsNotAWordInFlags(
- setIsBlackListEntryInFlags(info.mFlags, isBlackListEntry), isNotAWord);
- mDictBuffer.position(posOfWord);
- mDictBuffer.put((byte) newFlags);
-
- updateFrequency(terminalId, frequency);
- insertBigrams(terminalId, frequency, resolveBigramPositions(this, bigramStrings));
- insertShortcuts(terminalId, shortcuts);
- }
-
- @Override
- public void insertWord(final String word, final int frequency,
- final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts,
- final boolean isNotAWord, final boolean isBlackListEntry)
- throws IOException, UnsupportedFormatException {
- final int newTerminalId = getNewTerminalId();
-
- openBuffersAndStream();
- final int posOfWord = getTerminalPosition(word);
- if (posOfWord != FormatSpec.NOT_VALID_WORD) {
- // The word is already contained in the dictionary.
- updateAttributes(posOfWord, frequency, bigramStrings, shortcuts, isNotAWord,
- isBlackListEntry);
- close();
- return;
- }
-
- // Insert new PtNode into trie.
- final int posOfTerminal = insertWordToTrie(word, newTerminalId, isNotAWord,
- isBlackListEntry, bigramStrings != null && !bigramStrings.isEmpty(),
- shortcuts != null && !shortcuts.isEmpty());
- insertFrequency(frequency);
- insertTerminalPosition(posOfTerminal);
- close();
-
- insertBigrams(newTerminalId, frequency, resolveBigramPositions(this, bigramStrings));
- insertShortcuts(newTerminalId, shortcuts);
- }
-
- /**
- * Converts a list of WeightedString to a list of PendingAttribute.
- */
- private static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater,
- final ArrayList<WeightedString> bigramStrings)
- throws IOException, UnsupportedFormatException {
- if (bigramStrings == null) return CollectionUtils.newArrayList();
- final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
- for (final WeightedString bigram : bigramStrings) {
- final int pos = dictUpdater.getTerminalPosition(bigram.mWord);
- if (pos == FormatSpec.NOT_VALID_WORD) {
- // TODO: figure out what is the correct thing to do here.
- } else {
- bigrams.add(new PendingAttribute(bigram.mFrequency, pos));
- }
- }
- return bigrams;
- }
-
- private static int markAsDeleted(final int flags) {
- return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED;
- }
-}