aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--java/res/values/config.xml2
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java206
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java8
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java2
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DictDecoder.java195
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DictUpdater.java6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java13
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java2
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java53
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java73
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java59
-rw-r--r--java/src/com/android/inputmethod/latin/settings/SettingsValues.java17
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h10
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp32
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp14
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp55
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h23
-rw-r--r--native/jni/src/suggest/policyimpl/typing/scoring_params.cpp48
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java4
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java114
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java20
-rw-r--r--tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java68
29 files changed, 669 insertions, 393 deletions
diff --git a/java/res/values/config.xml b/java/res/values/config.xml
index 465d52cec..66b9b7082 100644
--- a/java/res/values/config.xml
+++ b/java/res/values/config.xml
@@ -103,7 +103,7 @@
-->
<string-array name="auto_correction_threshold_values" translatable="false">
<!-- Off, When auto correction setting is Off, this value is not used. -->
- <item></item>
+ <item>floatMaxValue</item>
<!-- Modest : Suggestion whose normalized score is greater than this value
will be subject to auto-correction. -->
<item>0.185</item>
diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
new file mode 100644
index 000000000..9f7f502ea
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.TreeMap;
+
+/**
+ * A base class of the binary dictionary decoder.
+ */
+public abstract class AbstractDictDecoder implements DictDecoder {
+ protected FileHeader readHeader(final DictBuffer dictBuffer)
+ throws IOException, UnsupportedFormatException {
+ if (dictBuffer == null) {
+ openDictBuffer();
+ }
+
+ final int version = HeaderReader.readVersion(dictBuffer);
+ if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
+ || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
+ throw new UnsupportedFormatException("Unsupported version : " + version);
+ }
+ // TODO: Remove this field.
+ final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
+
+ final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
+
+ if (headerSize < 0) {
+ throw new UnsupportedFormatException("header size can't be negative.");
+ }
+
+ final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
+ headerSize);
+
+ final FileHeader header = new FileHeader(headerSize,
+ new FusionDictionary.DictionaryOptions(attributes,
+ 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
+ 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
+ new FormatOptions(version,
+ 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
+ return header;
+ }
+
+ @Override @UsedForTesting
+ public int getTerminalPosition(final String word)
+ throws IOException, UnsupportedFormatException {
+ if (!isDictBufferOpen()) {
+ openDictBuffer();
+ }
+ return BinaryDictIOUtils.getTerminalPosition(this, word);
+ }
+
+ @Override @UsedForTesting
+ public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
+ final TreeMap<Integer, Integer> frequencies,
+ final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
+ throws IOException, UnsupportedFormatException {
+ if (!isDictBufferOpen()) {
+ openDictBuffer();
+ }
+ BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
+ }
+
+ /**
+ * A utility class for reading a file header.
+ */
+ protected static class HeaderReader {
+ protected static int readVersion(final DictBuffer dictBuffer)
+ throws IOException, UnsupportedFormatException {
+ return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
+ }
+
+ protected static int readOptionFlags(final DictBuffer dictBuffer) {
+ return dictBuffer.readUnsignedShort();
+ }
+
+ protected static int readHeaderSize(final DictBuffer dictBuffer) {
+ return dictBuffer.readInt();
+ }
+
+ protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
+ final int headerSize) {
+ final HashMap<String, String> attributes = new HashMap<String, String>();
+ while (dictBuffer.position() < headerSize) {
+ // We can avoid an infinite loop here since dictBuffer.position() is always
+ // increased by calling CharEncoding.readString.
+ final String key = CharEncoding.readString(dictBuffer);
+ final String value = CharEncoding.readString(dictBuffer);
+ attributes.put(key, value);
+ }
+ dictBuffer.position(headerSize);
+ return attributes;
+ }
+ }
+
+ /**
+ * A utility class for reading a PtNode.
+ */
+ protected static class PtNodeReader {
+ protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
+ return dictBuffer.readUnsignedByte();
+ }
+
+ protected static int readParentAddress(final DictBuffer dictBuffer,
+ final FormatOptions formatOptions) {
+ if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
+ return BinaryDictDecoderUtils.readSInt24(dictBuffer);
+ } else {
+ return FormatSpec.NO_PARENT_ADDRESS;
+ }
+ }
+
+ protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
+ final FormatOptions formatOptions) {
+ if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
+ final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
+ if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
+ return address;
+ } else {
+ switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
+ case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
+ return dictBuffer.readUnsignedByte();
+ case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
+ return dictBuffer.readUnsignedShort();
+ case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
+ return dictBuffer.readUnsignedInt24();
+ case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
+ default:
+ return FormatSpec.NO_CHILDREN_ADDRESS;
+ }
+ }
+ }
+
+ // Reads shortcuts and returns the read length.
+ protected static int readShortcut(final DictBuffer dictBuffer,
+ final ArrayList<WeightedString> shortcutTargets) {
+ final int pointerBefore = dictBuffer.position();
+ dictBuffer.readUnsignedShort(); // skip the size
+ while (true) {
+ final int targetFlags = dictBuffer.readUnsignedByte();
+ final String word = CharEncoding.readString(dictBuffer);
+ shortcutTargets.add(new WeightedString(word,
+ targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
+ if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ return dictBuffer.position() - pointerBefore;
+ }
+
+ protected static int readBigramAddresses(final DictBuffer dictBuffer,
+ final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
+ int readLength = 0;
+ int bigramCount = 0;
+ while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ final int bigramFlags = dictBuffer.readUnsignedByte();
+ ++readLength;
+ final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
+ ? 1 : -1;
+ int bigramAddress = baseAddress + readLength;
+ switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
+ case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
+ bigramAddress += sign * dictBuffer.readUnsignedByte();
+ readLength += 1;
+ break;
+ case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
+ bigramAddress += sign * dictBuffer.readUnsignedShort();
+ readLength += 2;
+ break;
+ case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
+ bigramAddress += sign * dictBuffer.readUnsignedInt24();
+ readLength += 3;
+ break;
+ default:
+ throw new RuntimeException("Has bigrams with no address");
+ }
+ bigrams.add(new PendingAttribute(
+ bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
+ bigramAddress));
+ if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ return readLength;
+ }
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 2c3d1346f..216492b4d 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -23,11 +23,11 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
@@ -219,14 +219,14 @@ public final class BinaryDictDecoderUtils {
}
/**
- * Writes a string with our character format to a ByteArrayOutputStream.
+ * Writes a string with our character format to an OutputStream.
*
* This will also write the terminator byte.
*
- * @param buffer the ByteArrayOutputStream to write to.
+ * @param buffer the OutputStream to write to.
* @param word the string to write.
*/
- static void writeString(final ByteArrayOutputStream buffer, final String word) {
+ static void writeString(final OutputStream buffer, final String word) throws IOException {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index b6024243f..f761829de 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -383,8 +383,8 @@ public class BinaryDictEncoderUtils {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
nodeSize + size, ptNode.mChildren));
}
- nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) {
+ nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
if (null != ptNode.mBigrams) {
for (WeightedString bigram : ptNode.mBigrams) {
final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java
index e251f7df7..3dbeee099 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java
@@ -17,11 +17,9 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File;
@@ -32,50 +30,17 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.TreeMap;
/**
- * The base class of binary dictionary decoders.
+ * An interface of binary dictionary decoders.
*/
-public abstract class DictDecoder {
-
- protected FileHeader readHeader(final DictBuffer dictBuffer)
- throws IOException, UnsupportedFormatException {
- if (dictBuffer == null) {
- openDictBuffer();
- }
-
- final int version = HeaderReader.readVersion(dictBuffer);
- if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
- || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
- throw new UnsupportedFormatException("Unsupported version : " + version);
- }
- // TODO: Remove this field.
- final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
-
- final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
-
- if (headerSize < 0) {
- throw new UnsupportedFormatException("header size can't be negative.");
- }
-
- final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
- headerSize);
-
- final FileHeader header = new FileHeader(headerSize,
- new FusionDictionary.DictionaryOptions(attributes,
- 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
- 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
- new FormatOptions(version,
- 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
- return header;
- }
+public interface DictDecoder {
/**
* Reads and returns the file header.
*/
- public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException;
+ public FileHeader readHeader() throws IOException, UnsupportedFormatException;
/**
* Reads PtNode from nodeAddress.
@@ -83,7 +48,7 @@ public abstract class DictDecoder {
* @param formatOptions the format options.
* @return PtNodeInfo.
*/
- public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
+ public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
/**
* Reads a buffer and returns the memory representation of the dictionary.
@@ -98,7 +63,7 @@ public abstract class DictDecoder {
* @return the created (or merged) dictionary.
*/
@UsedForTesting
- public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict,
+ public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
final boolean deleteDictIfBroken)
throws FileNotFoundException, IOException, UnsupportedFormatException;
@@ -113,12 +78,7 @@ public abstract class DictDecoder {
*/
@UsedForTesting
public int getTerminalPosition(final String word)
- throws IOException, UnsupportedFormatException {
- if (!isDictBufferOpen()) {
- openDictBuffer();
- }
- return BinaryDictIOUtils.getTerminalPosition(this, word);
- }
+ throws IOException, UnsupportedFormatException;
/**
* Reads unigrams and bigrams from the binary file.
@@ -134,47 +94,42 @@ public abstract class DictDecoder {
public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
final TreeMap<Integer, Integer> frequencies,
final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
- throws IOException, UnsupportedFormatException {
- if (!isDictBufferOpen()) {
- openDictBuffer();
- }
- BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
- }
+ throws IOException, UnsupportedFormatException;
/**
* Sets the position of the buffer to the given value.
*
* @param newPos the new position
*/
- public abstract void setPosition(final int newPos);
+ public void setPosition(final int newPos);
/**
* Gets the position of the buffer.
*
* @return the position
*/
- public abstract int getPosition();
+ public int getPosition();
/**
* Reads and returns the PtNode count out of a buffer and forwards the pointer.
*/
- public abstract int readPtNodeCount();
+ public int readPtNodeCount();
/**
* Reads the forward link and advances the position.
*
* @return true if this method moves the file pointer, false otherwise.
*/
- public abstract boolean readAndFollowForwardLink();
- public abstract boolean hasNextPtNodeArray();
+ public boolean readAndFollowForwardLink();
+ public boolean hasNextPtNodeArray();
/**
* Opens the dictionary file and makes DictBuffer.
*/
@UsedForTesting
- public abstract void openDictBuffer() throws FileNotFoundException, IOException;
+ public void openDictBuffer() throws FileNotFoundException, IOException;
@UsedForTesting
- public abstract boolean isDictBufferOpen();
+ public boolean isDictBufferOpen();
// Constants for DictionaryBufferFactory.
public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
@@ -272,125 +227,5 @@ public abstract class DictDecoder {
}
}
- /**
- * A utility class for reading a file header.
- */
- protected static class HeaderReader {
- protected static int readVersion(final DictBuffer dictBuffer)
- throws IOException, UnsupportedFormatException {
- return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
- }
-
- protected static int readOptionFlags(final DictBuffer dictBuffer) {
- return dictBuffer.readUnsignedShort();
- }
-
- protected static int readHeaderSize(final DictBuffer dictBuffer) {
- return dictBuffer.readInt();
- }
-
- protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
- final int headerSize) {
- final HashMap<String, String> attributes = new HashMap<String, String>();
- while (dictBuffer.position() < headerSize) {
- // We can avoid an infinite loop here since dictBuffer.position() is always
- // increased by calling CharEncoding.readString.
- final String key = CharEncoding.readString(dictBuffer);
- final String value = CharEncoding.readString(dictBuffer);
- attributes.put(key, value);
- }
- dictBuffer.position(headerSize);
- return attributes;
- }
- }
-
- /**
- * A utility class for reading a PtNode.
- */
- protected static class PtNodeReader {
- protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
- return dictBuffer.readUnsignedByte();
- }
-
- protected static int readParentAddress(final DictBuffer dictBuffer,
- final FormatOptions formatOptions) {
- if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
- return BinaryDictDecoderUtils.readSInt24(dictBuffer);
- } else {
- return FormatSpec.NO_PARENT_ADDRESS;
- }
- }
-
- protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
- final FormatOptions formatOptions) {
- if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
- final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
- if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
- return address;
- } else {
- switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
- return dictBuffer.readUnsignedByte();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
- return dictBuffer.readUnsignedShort();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
- return dictBuffer.readUnsignedInt24();
- case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
- default:
- return FormatSpec.NO_CHILDREN_ADDRESS;
- }
- }
- }
-
- // Reads shortcuts and returns the read length.
- protected static int readShortcut(final DictBuffer dictBuffer,
- final ArrayList<WeightedString> shortcutTargets) {
- final int pointerBefore = dictBuffer.position();
- dictBuffer.readUnsignedShort(); // skip the size
- while (true) {
- final int targetFlags = dictBuffer.readUnsignedByte();
- final String word = CharEncoding.readString(dictBuffer);
- shortcutTargets.add(new WeightedString(word,
- targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
- if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
- }
- return dictBuffer.position() - pointerBefore;
- }
-
- protected static int readBigramAddresses(final DictBuffer dictBuffer,
- final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
- int readLength = 0;
- int bigramCount = 0;
- while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- final int bigramFlags = dictBuffer.readUnsignedByte();
- ++readLength;
- final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
- ? 1 : -1;
- int bigramAddress = baseAddress + readLength;
- switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
- bigramAddress += sign * dictBuffer.readUnsignedByte();
- readLength += 1;
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
- bigramAddress += sign * dictBuffer.readUnsignedShort();
- readLength += 2;
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
- bigramAddress += sign * dictBuffer.readUnsignedInt24();
- readLength += 3;
- break;
- default:
- throw new RuntimeException("Has bigrams with no address");
- }
- bigrams.add(new PendingAttribute(
- bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
- bigramAddress));
- if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
- }
- return readLength;
- }
- }
-
- public abstract void skipPtNode(final FormatOptions formatOptions);
+ public void skipPtNode(final FormatOptions formatOptions);
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java
index 413d0301c..c4f7ec91f 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java
@@ -16,6 +16,7 @@
package com.android.inputmethod.latin.makedict;
+import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.IOException;
@@ -24,13 +25,15 @@ import java.util.ArrayList;
/**
* An interface of a binary dictionary updater.
*/
-public interface DictUpdater {
+@UsedForTesting
+public interface DictUpdater extends DictDecoder {
/**
* Deletes the word from the binary dictionary.
*
* @param word the word to be deleted.
*/
+ @UsedForTesting
public void deleteWord(final String word) throws IOException, UnsupportedFormatException;
/**
@@ -43,6 +46,7 @@ public interface DictUpdater {
* @param isBlackListEntry whether this should be a blacklist entry.
*/
// TODO: Support batch insertion.
+ @UsedForTesting
public void insertWord(final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index a5516bd41..5a5d7af6b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -266,15 +266,28 @@ public final class FormatSpec {
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
static final String BIGRAM_FILE_EXTENSION = ".bigram";
+ static final String SHORTCUT_FILE_EXTENSION = ".shortcut";
static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup";
static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+
+ // With the English main dictionary as of October 2013, the size of bigram address table is
+ // is 584KB with the block size being 4.
+ // This is 91% of that of full address table.
static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
static final int BIGRAM_CONTENT_COUNT = 1;
static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
+ static final int SHORTCUT_CONTENT_COUNT = 1;
+ static final int SHORTCUT_CONTENT_INDEX = 0;
+ // With the English main dictionary as of October 2013, the size of shortcut address table is
+ // 29KB with the block size being 64.
+ // This is only 4.4% of that of full address table.
+ static final int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+ static final String SHORTCUT_CONTENT_ID = "_shortcut";
+
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
static final int NO_FORWARD_LINK_ADDRESS = 0;
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
index b87259c38..acab4f8a5 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
@@ -37,7 +37,7 @@ import java.util.Arrays;
* An implementation of DictDecoder for version 3 binary dictionary.
*/
@UsedForTesting
-public class Ver3DictDecoder extends DictDecoder {
+public class Ver3DictDecoder extends AbstractDictDecoder {
private static final String TAG = Ver3DictDecoder.class.getSimpleName();
static {
@@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder {
// TODO: implement something sensical instead of just a phony method
private static native int doNothing();
- protected static class PtNodeReader extends DictDecoder.PtNodeReader {
+ protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
private static int readFrequency(final DictBuffer dictBuffer) {
return dictBuffer.readUnsignedByte();
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
index fa7ae310a..07adda625 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
@@ -57,7 +57,7 @@ public class Ver3DictUpdater extends Ver3DictDecoder implements DictUpdater {
public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
if (mOutStream == null) openStreamAndBuffer();
mDictBuffer.position(0);
- super.readHeader();
+ readHeader();
final int wordPos = getTerminalPosition(word);
if (wordPos != FormatSpec.NOT_VALID_WORD) {
mDictBuffer.position(wordPos);
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 5089687da..53729075f 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -23,6 +23,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CollectionUtils;
import android.util.Log;
@@ -36,21 +37,24 @@ import java.util.Arrays;
* An implementation of binary dictionary decoder for version 4 binary dictionary.
*/
@UsedForTesting
-public class Ver4DictDecoder extends DictDecoder {
+public class Ver4DictDecoder extends AbstractDictDecoder {
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
private static final int FILETYPE_TRIE = 1;
private static final int FILETYPE_FREQUENCY = 2;
private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
private static final int FILETYPE_BIGRAM_FREQ = 4;
+ private static final int FILETYPE_SHORTCUT = 5;
private final File mDictDirectory;
private final DictionaryBufferFactory mBufferFactory;
- private DictBuffer mDictBuffer;
+ protected DictBuffer mDictBuffer;
private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer;
private DictBuffer mBigramBuffer;
+ private DictBuffer mShortcutBuffer;
private SparseTable mBigramAddressTable;
+ private SparseTable mShortcutAddressTable;
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
@@ -89,6 +93,10 @@ public class Ver4DictDecoder extends DictDecoder {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION
+ FormatSpec.BIGRAM_FREQ_CONTENT_ID);
+ } else if (fileType == FILETYPE_SHORTCUT) {
+ return new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION
+ + FormatSpec.SHORTCUT_CONTENT_ID);
} else {
throw new RuntimeException("Unsupported kind of file : " + fileType);
}
@@ -102,6 +110,8 @@ public class Ver4DictDecoder extends DictDecoder {
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
loadBigramAddressSparseTable();
+ mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
+ loadShortcutAddressSparseTable();
}
@Override
@@ -136,7 +146,18 @@ public class Ver4DictDecoder extends DictDecoder {
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
}
- protected static class PtNodeReader extends DictDecoder.PtNodeReader {
+ // TODO: Let's have something like SparseTableContentsReader in this class.
+ private void loadShortcutAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.SHORTCUT_CONTENT_ID);
+ mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
+ new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
+ }
+
+ protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
return frequencyBuffer.readUnsignedByte();
@@ -147,6 +168,23 @@ public class Ver4DictDecoder extends DictDecoder {
}
}
+ private ArrayList<WeightedString> readShortcuts(final int terminalId) {
+ if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;
+
+ final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
+ final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX,
+ terminalId);
+ mShortcutBuffer.position(posOfShortcuts);
+ while (true) {
+ final int flags = mShortcutBuffer.readUnsignedByte();
+ final String word = CharEncoding.readString(mShortcutBuffer);
+ ret.add(new WeightedString(word,
+ flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
+ if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ return ret;
+ }
+
// TODO: Make this buffer thread safe.
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
@@ -197,14 +235,7 @@ public class Ver4DictDecoder extends DictDecoder {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
- final ArrayList<WeightedString> shortcutTargets;
- if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
- // readShortcut will add shortcuts to shortcutTargets.
- shortcutTargets = new ArrayList<WeightedString>();
- addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
- } else {
- shortcutTargets = null;
- }
+ final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index b38c33019..f9dcacf77 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -49,6 +49,7 @@ public class Ver4DictEncoder implements DictEncoder {
private File mDictDir;
private String mBaseFilename;
private BigramContentWriter mBigramWriter;
+ private ShortcutContentWriter mShortcutWriter;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
@@ -152,6 +153,39 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
+ private static class ShortcutContentWriter extends SparseTableContentWriter {
+ public ShortcutContentWriter(final String name, final int initialCapacity,
+ final File baseDir) {
+ super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT,
+ initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
+ new String[] { FormatSpec.SHORTCUT_CONTENT_ID });
+ }
+
+ public void writeShortcutForOneWord(final int terminalId,
+ final Iterator<WeightedString> shortcutIterator) throws IOException {
+ write(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
+ new SparseTableContentWriterInterface() {
+ @Override
+ public void write(final OutputStream outStream) throws IOException {
+ writeShortcutForOneWordInternal(outStream, shortcutIterator);
+ }
+ });
+ }
+
+ private void writeShortcutForOneWordInternal(final OutputStream outStream,
+ final Iterator<WeightedString> shortcutIterator) throws IOException {
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = shortcutIterator.next();
+ final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
+ shortcutIterator.hasNext(), target.mFrequency);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, shortcutFlags,
+ FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ CharEncoding.writeString(outStream, target.mWord);
+ }
+ }
+ }
+
private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
throws FileNotFoundException, IOException {
final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
@@ -225,6 +259,8 @@ public class Ver4DictEncoder implements DictEncoder {
writeTerminalData(flatNodes, terminalCount);
mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir);
writeBigrams(flatNodes, dict);
+ mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir);
+ writeShortcuts(flatNodes);
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
@@ -306,29 +342,6 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
- private void writeShortcuts(ArrayList<WeightedString> shortcuts) {
- if (null == shortcuts || shortcuts.isEmpty()) return;
-
- final int indexOfShortcutByteSize = mTriePos;
- mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
- final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
- while (shortcutIterator.hasNext()) {
- final WeightedString target = shortcutIterator.next();
- final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
- shortcutIterator.hasNext(), target.mFrequency);
- mTrieBuf[mTriePos++] = (byte)shortcutFlags;
- final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos,
- target.mWord);
- mTriePos += shortcutShift;
- }
- final int shortcutByteSize = mTriePos - indexOfShortcutByteSize;
- if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
- throw new RuntimeException("Shortcut list too large : " + shortcutByteSize);
- }
- BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize,
- shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
- }
-
private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
throws IOException {
mBigramWriter.openStreams();
@@ -343,6 +356,19 @@ public class Ver4DictEncoder implements DictEncoder {
mBigramWriter.closeStreams();
}
+ private void writeShortcuts(final ArrayList<PtNodeArray> flatNodes) throws IOException {
+ mShortcutWriter.openStreams();
+ for (final PtNodeArray nodeArray : flatNodes) {
+ for (final PtNode ptNode : nodeArray.mData) {
+ if (ptNode.mShortcutTargets != null && !ptNode.mShortcutTargets.isEmpty()) {
+ mShortcutWriter.writeShortcutForOneWord(ptNode.mTerminalId,
+ ptNode.mShortcutTargets.iterator());
+ }
+ }
+ }
+ mShortcutWriter.closeStreams();
+ }
+
@Override
public void writeForwardLinkAddress(int forwardLinkAddress) {
mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
@@ -359,7 +385,6 @@ public class Ver4DictEncoder implements DictEncoder {
writeTerminalId(ptNode.mTerminalId);
}
writeChildrenPosition(ptNode, formatOptions);
- writeShortcuts(ptNode.mShortcutTargets);
}
private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
new file mode 100644
index 000000000..3d8f186ba
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * An implementation of DictUpdater for version 4 binary dictionary.
+ */
+@UsedForTesting
+public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
+
+ @UsedForTesting
+ public Ver4DictUpdater(final File dictDirectory, final int factoryType) {
+ // DictUpdater must have an updatable DictBuffer.
+ super(dictDirectory, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
+ ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
+ }
+
+ @Override
+ public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
+ if (mDictBuffer == null) openDictBuffer();
+ readHeader();
+ final int wordPos = getTerminalPosition(word);
+ if (wordPos != FormatSpec.NOT_VALID_WORD) {
+ mDictBuffer.position(wordPos);
+ final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
+ mDictBuffer.position(wordPos);
+ mDictBuffer.put((byte) DynamicBinaryDictIOUtils.markAsDeleted(flags));
+ }
+ }
+
+ @Override
+ public void insertWord(final String word, final int frequency,
+ final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts,
+ final boolean isNotAWord, final boolean isBlackListEntry)
+ throws IOException, UnsupportedFormatException {
+ // TODO: Implement this method.
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java
index ee322e91b..2abcdc7fa 100644
--- a/java/src/com/android/inputmethod/latin/settings/SettingsValues.java
+++ b/java/src/com/android/inputmethod/latin/settings/SettingsValues.java
@@ -45,8 +45,9 @@ import java.util.Locale;
*/
public final class SettingsValues {
private static final String TAG = SettingsValues.class.getSimpleName();
- // "floatNegativeInfinity" is a special marker string for Float.NEGATIVE_INFINITE
- // currently used for auto-correction
+ // "floatMaxValue" and "floatNegativeInfinity" are special marker strings for
+ // Float.NEGATIVE_INFINITE and Float.MAX_VALUE. Currently used for auto-correction settings.
+ private static final String FLOAT_MAX_VALUE_MARKER_STRING = "floatMaxValue";
private static final String FLOAT_NEGATIVE_INFINITY_MARKER_STRING = "floatNegativeInfinity";
// From resources:
@@ -343,24 +344,28 @@ public final class SettingsValues {
final String[] autoCorrectionThresholdValues = res.getStringArray(
R.array.auto_correction_threshold_values);
// When autoCorrectionThreshold is greater than 1.0, it's like auto correction is off.
- float autoCorrectionThreshold = Float.MAX_VALUE;
+ final float autoCorrectionThreshold;
try {
final int arrayIndex = Integer.valueOf(currentAutoCorrectionSetting);
if (arrayIndex >= 0 && arrayIndex < autoCorrectionThresholdValues.length) {
final String val = autoCorrectionThresholdValues[arrayIndex];
- if (FLOAT_NEGATIVE_INFINITY_MARKER_STRING.equals(val)) {
+ if (FLOAT_MAX_VALUE_MARKER_STRING.equals(val)) {
+ autoCorrectionThreshold = Float.MAX_VALUE;
+ } else if (FLOAT_NEGATIVE_INFINITY_MARKER_STRING.equals(val)) {
autoCorrectionThreshold = Float.NEGATIVE_INFINITY;
} else {
autoCorrectionThreshold = Float.parseFloat(val);
}
+ } else {
+ autoCorrectionThreshold = Float.MAX_VALUE;
}
- } catch (NumberFormatException e) {
+ } catch (final NumberFormatException e) {
// Whenever the threshold settings are correct, never come here.
- autoCorrectionThreshold = Float.MAX_VALUE;
Log.w(TAG, "Cannot load auto correction threshold setting."
+ " currentAutoCorrectionSetting: " + currentAutoCorrectionSetting
+ ", autoCorrectionThresholdValues: "
+ Arrays.toString(autoCorrectionThresholdValues), e);
+ return Float.MAX_VALUE;
}
return autoCorrectionThreshold;
}
diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
index a6829b476..5492c6070 100644
--- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h
@@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy {
virtual float getMultiWordCostMultiplier() const = 0;
+ virtual int getLastDecayedTime() const = 0;
+
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 8753c6eb0..b1170e251 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -360,13 +360,13 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
}
bool DynamicBigramListPolicy::updateProbabilityForDecay(
- BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
+ const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
int *const bigramEntryPos, bool *const outRemoved) const {
*outRemoved = false;
if (mIsDecayingDict) {
// Update bigram probability for decaying.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
- BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
// Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index b358b4ed5..0504b59d5 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -27,6 +27,7 @@
namespace latinime {
class BufferWithExtendableBuffer;
+class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
@@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy;
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
- DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
+ DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const bool isDecayingDict)
- : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
+ : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
+ mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {}
@@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const bool mIsDecayingDict;
@@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
- bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
+ bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index 324b53062..a17a0acf6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@@ -29,7 +30,8 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) {
const int newProbability =
- ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
+ ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
+ mHeaderPolicy);
int writingPos = node->getProbabilityFieldPos();
// Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 463715af5..3ca2f2a01 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -29,6 +29,8 @@
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
class DynamicPatriciaTrieGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
@@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ const DictionaryHeaderStructurePolicy *const headerPolicy,
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
- : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
- mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
+ : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
+ mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
+ mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@@ -72,9 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
+ const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
- const int mIsDecayingDict;
+ const bool mIsDecayingDict;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
@@ -85,7 +90,8 @@ class DynamicPatriciaTrieGcEventListeners {
class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
- TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
+ TraversePolicyToUpdateBigramProbability(
+ DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 60d0db0c0..31e3fb42f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -42,7 +42,6 @@ const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUER
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
-const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@@ -314,15 +313,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
- const bool runGCwithDecay = needsToDecay();
- DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
- &mShortcutListPolicy, runGCwithDecay);
+ const bool needsToDecay = mHeaderPolicy.isDecayingDict()
+ && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
+ DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
+ &mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
+ &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
- if (runGCwithDecay) {
- mNeedsToDecayForTesting = false;
- }
+ mNeedsToDecayForTesting = false;
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@@ -344,16 +343,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy.isDecayingDict()) {
- if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
- // Unigram count exceeds the limit.
- return true;
- } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
- // Bigram count exceeds the limit.
- return true;
- } else if (mindsBlockByGC && needsToDecay()) {
- // Time to update probabilities for decaying.
- return true;
- }
+ return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
+ mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
}
return false;
}
@@ -369,9 +360,4 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
}
}
-bool DynamicPatriciaTriePolicy::needsToDecay() const {
- return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
- || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
-}
-
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index c3bbe9977..903f65e8e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
+ mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
@@ -105,7 +105,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
- static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
@@ -115,8 +114,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
-
- bool needsToDecay() const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 70a9ee564..067c8ec98 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
- if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) {
+ if (mNeedsToDecay) {
+ ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
+ }
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
@@ -481,14 +484,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
- int *const outBigramCount) {
+ const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- this, mBuffer, mNeedsToDecay);
+ headerPolicy, this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
@@ -505,7 +508,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
-
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
@@ -524,7 +526,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
+ DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index 0caf29120..ca8664729 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
- bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
- int *const outUnigramCount, int *const outBigramCount);
+ bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount);
int getUpdatedProbability(const int originalProbability, const int newProbability);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
index b502fe25d..19ca35481 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp
@@ -15,10 +15,12 @@
*/
#include <cmath>
+#include <ctime>
#include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
@@ -35,8 +37,14 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
+const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
+ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
+
+void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
+ mCurrentTime = time(0);
+}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
@@ -76,19 +84,44 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
-/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
- const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
+/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
+ const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
+ int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
// TODO: Implement the decay in more proper way.
- const float currentRate = static_cast<float>(currentEncodedProbability)
- / static_cast<float>(MAX_ENCODED_PROBABILITY);
- const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
- + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
- const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
- if (thresholdToDecay < randValue) {
- return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
- } else {
- return currentEncodedProbability;
+ for (int i = 0; i < decayIterationCount; ++i) {
+ const float currentRate = static_cast<float>(currentEncodedProbability)
+ / static_cast<float>(MAX_ENCODED_PROBABILITY);
+ const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
+ + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
+ const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+ if (thresholdToDecay < randValue) {
+ currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
+ 0);
+ }
+ }
+ return currentEncodedProbability;
+}
+
+/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
+ const int unigramCount, const int bigramCount,
+ const DictionaryHeaderStructurePolicy *const headerPolicy) {
+ if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
+ // Unigram count exceeds the limit.
+ return true;
+ } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
+ // Bigram count exceeds the limit.
+ return true;
+ }
+ if (mindsBlockByDecay) {
+ return false;
+ }
+ if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
+ // Time to decay.
+ return true;
}
+ return false;
}
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
index d666f22aa..2ad423874 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h
@@ -23,16 +23,32 @@
namespace latinime {
+class DictionaryHeaderStructurePolicy;
+
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
+ class TimeKeeper {
+ public:
+ TimeKeeper() : mCurrentTime(0) {}
+ void setCurrentTime();
+ int peekCurrentTime() const { return mCurrentTime; };
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
+
+ int mCurrentTime;
+ };
+
static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC;
+ static TimeKeeper sTimeKeeper;
+
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
@@ -41,7 +57,11 @@ class ForgettingCurveUtils {
static int isValidEncodedProbability(const int encodedProbability);
- static int getEncodedProbabilityToSave(const int encodedProbability);
+ static int getEncodedProbabilityToSave(const int encodedProbability,
+ const DictionaryHeaderStructurePolicy *const headerPolicy);
+
+ static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
+ const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
@@ -68,6 +88,7 @@ class ForgettingCurveUtils {
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY;
+ static const int DECAY_INTERVAL_SECONDS;
static const ProbabilityTable sProbabilityTable;
diff --git a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
index ecceb60d3..66637ac4b 100644
--- a/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/scoring_params.cpp
@@ -27,30 +27,30 @@ const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE = 170;
const int ScoringParams::MAX_CACHE_DIC_NODE_SIZE_FOR_SINGLE_POINT = 310;
const int ScoringParams::THRESHOLD_SHORT_WORD_LENGTH = 4;
-const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.132f;
-const float ScoringParams::PROXIMITY_COST = 0.095f;
-const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.102f;
-const float ScoringParams::FIRST_PROXIMITY_COST = 0.019f;
-const float ScoringParams::OMISSION_COST = 0.458f;
-const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.491f;
-const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.582f;
-const float ScoringParams::INSERTION_COST = 0.730f;
-const float ScoringParams::TERMINAL_INSERTION_COST = 0.93f;
-const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.586f;
-const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.70f;
-const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.623f;
-const float ScoringParams::TRANSPOSITION_COST = 0.526f;
-const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.319f;
-const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.380f;
-const float ScoringParams::SUBSTITUTION_COST = 0.383f;
-const float ScoringParams::COST_NEW_WORD = 0.042f;
-const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.25f;
-const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.123f;
-const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.545f;
-const float ScoringParams::COST_LOOKAHEAD = 0.073f;
-const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.093f;
-const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.041f;
-const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.447f;
+const float ScoringParams::DISTANCE_WEIGHT_LENGTH = 0.1524f;
+const float ScoringParams::PROXIMITY_COST = 0.0694f;
+const float ScoringParams::FIRST_CHAR_PROXIMITY_COST = 0.072f;
+const float ScoringParams::FIRST_PROXIMITY_COST = 0.07788f;
+const float ScoringParams::OMISSION_COST = 0.4676f;
+const float ScoringParams::OMISSION_COST_SAME_CHAR = 0.399f;
+const float ScoringParams::OMISSION_COST_FIRST_CHAR = 0.5256f;
+const float ScoringParams::INSERTION_COST = 0.7248f;
+const float ScoringParams::TERMINAL_INSERTION_COST = 0.9828f;
+const float ScoringParams::INSERTION_COST_SAME_CHAR = 0.5508f;
+const float ScoringParams::INSERTION_COST_PROXIMITY_CHAR = 0.674f;
+const float ScoringParams::INSERTION_COST_FIRST_CHAR = 0.639f;
+const float ScoringParams::TRANSPOSITION_COST = 0.5608f;
+const float ScoringParams::SPACE_SUBSTITUTION_COST = 0.339f;
+const float ScoringParams::ADDITIONAL_PROXIMITY_COST = 0.4576f;
+const float ScoringParams::SUBSTITUTION_COST = 0.3806f;
+const float ScoringParams::COST_NEW_WORD = 0.0292f;
+const float ScoringParams::COST_SECOND_OR_LATER_WORD_FIRST_CHAR_UPPERCASE = 0.3224f;
+const float ScoringParams::DISTANCE_WEIGHT_LANGUAGE = 1.1214f;
+const float ScoringParams::COST_FIRST_LOOKAHEAD = 0.4786f;
+const float ScoringParams::COST_LOOKAHEAD = 0.00624f;
+const float ScoringParams::HAS_PROXIMITY_TERMINAL_COST = 0.06836f;
+const float ScoringParams::HAS_EDIT_CORRECTION_TERMINAL_COST = 0.0362f;
+const float ScoringParams::HAS_MULTI_WORD_TERMINAL_COST = 0.4182f;
const float ScoringParams::TYPING_BASE_OUTPUT_SCORE = 1.0f;
const float ScoringParams::TYPING_MAX_OUTPUT_SCORE_PER_INPUT = 0.1f;
const float ScoringParams::NORMALIZED_SPATIAL_DISTANCE_THRESHOLD_FOR_EDIT = 0.045f;
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
index b2d31c21f..ded8eaa97 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -50,8 +50,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
}
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
- // Entries having low probability would be suppressed once in 2 GCs.
- final int count = 2;
+ // Entries having low probability would be suppressed once in 3 GCs.
+ final int count = 3;
for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 6a21522f9..5b8f0e977 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
+import android.text.TextUtils;
import android.util.Pair;
import com.android.inputmethod.latin.makedict.CodePointUtils;
@@ -126,7 +127,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testRandomlyAddUnigramWord() {
final int wordCount = 1000;
final int codePointSetSize = 50;
- final int seed = 123456789;
+ final long seed = System.currentTimeMillis();
File dictFile = null;
try {
@@ -223,7 +224,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 50;
- final int seed = 11111;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
File dictFile = null;
try {
@@ -234,43 +236,42 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
final ArrayList<String> words = new ArrayList<String>();
- // Test a word that isn't contained within the dictionary.
- final Random random = new Random(seed);
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final int[] unigramProbabilities = new int[wordCount];
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
- unigramProbabilities[i] = unigramProbability;
+ unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
- final int[][] probabilities = new int[wordCount][wordCount];
-
- for (int i = 0; i < wordCount; ++i) {
- for (int j = 0; j < wordCount; ++j) {
- probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
- }
- }
-
for (int i = 0; i < bigramCount; i++) {
- final int word0Index = random.nextInt(wordCount);
- final int word1Index = random.nextInt(wordCount);
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
+ final String word0 = words.get(random.nextInt(wordCount));
+ final String word1 = words.get(random.nextInt(wordCount));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
- probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
- unigramProbabilities[word1Index], bigramProbability);
+ bigramProbabilities.put(bigram, bigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
}
- for (int i = 0; i < words.size(); i++) {
- for (int j = 0; j < words.size(); j++) {
- assertEquals(probabilities[i][j],
- binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
- }
+ for (final Pair<String, String> bigram : bigramWords) {
+ final int unigramProbability = unigramProbabilities.get(bigram.second);
+ final int bigramProbability = bigramProbabilities.get(bigram);
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(probability,
+ binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
dictFile.delete();
@@ -419,8 +420,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int wordCount = 100;
final int bigramCount = 1000;
final int codePointSetSize = 30;
- // TODO: Use various seeds such as a current timestamp to make this test more random.
- final int seed = 314159265;
+ final long seed = System.currentTimeMillis();
+ final Random random = new Random(seed);
File dictFile = null;
try {
@@ -432,35 +433,32 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
final ArrayList<String> words = new ArrayList<String>();
- // Test a word that isn't contained within the dictionary.
- final Random random = new Random(seed);
+ final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
- final int[] unigramProbabilities = new int[wordCount];
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final HashMap<Pair<String, String>, Integer> bigramProbabilities =
+ new HashMap<Pair<String, String>, Integer>();
+
for (int i = 0; i < wordCount; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
- unigramProbabilities[i] = unigramProbability;
+ unigramProbabilities.put(word, unigramProbability);
binaryDictionary.addUnigramWord(word, unigramProbability);
}
- final int[][] probabilities = new int[wordCount][wordCount];
-
- for (int i = 0; i < wordCount; ++i) {
- for (int j = 0; j < wordCount; ++j) {
- probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
- }
- }
-
for (int i = 0; i < bigramCount; i++) {
- final int word0Index = random.nextInt(wordCount);
- final int word1Index = random.nextInt(wordCount);
- final String word0 = words.get(word0Index);
- final String word1 = words.get(word1Index);
+ final String word0 = words.get(random.nextInt(wordCount));
+ final String word1 = words.get(random.nextInt(wordCount));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
+ final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
+ bigramWords.add(bigram);
final int bigramProbability = random.nextInt(0xF);
- probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
- unigramProbabilities[word1Index], bigramProbability);
+ bigramProbabilities.put(bigram, bigramProbability);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
}
@@ -470,12 +468,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- for (int i = 0; i < words.size(); i++) {
- for (int j = 0; j < words.size(); j++) {
- assertEquals(probabilities[i][j],
- binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
- }
+ for (final Pair<String, String> bigram : bigramWords) {
+ final int unigramProbability = unigramProbabilities.get(bigram.second);
+ final int bigramProbability = bigramProbabilities.get(bigram);
+ final int probability = binaryDictionary.calculateProbability(unigramProbability,
+ bigramProbability);
+ assertEquals(probability,
+ binaryDictionary.getBigramProbability(bigram.first, bigram.second));
}
+
dictFile.delete();
}
@@ -487,8 +488,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final float addBigramProb = 0.8f;
final float removeBigramProb = 0.2f;
final int codePointSetSize = 30;
- final int seed = 141421356;
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -539,6 +540,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
}
final String word0 = words.get(word0Index);
final String word1 = words.get(word1Index);
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
final int bigramProbability = random.nextInt(0xF);
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
bigramWords.add(bigram);
@@ -586,8 +590,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
public void testAddManyUnigramsAndFlushWithGC() {
final int flashWithGCIterationCount = 3;
final int codePointSetSize = 50;
- final int seed = 22360679;
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -632,8 +636,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int codePointSetSize = 50;
final int unigramCountPerIteration = 1000;
final int bigramCountPerIteration = 2000;
- final int seed = 1123581321;
-
+ final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
@@ -661,6 +664,9 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < bigramCountPerIteration; j++) {
final String word0 = words.get(random.nextInt(words.size()));
final String word1 = words.get(random.nextInt(words.size()));
+ if (TextUtils.equals(word0, word1)) {
+ continue;
+ }
bigrams.add(new Pair<String, String>(word0, word1));
final int bigramProbability = random.nextInt(0xF);
binaryDictionary.addBigramWords(word0, word1, bigramProbability);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index aa1658301..0189b3334 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -646,7 +646,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
}
- public void testDeleteWord() {
+ private void runTestDeleteWord(final FormatOptions formatOptions) {
final String dictName = "testDeleteWord";
final String dictVersion = Long.toString(System.currentTimeMillis());
final File file = setUpDictionaryFile(dictName, dictVersion);
@@ -655,10 +655,17 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
new FusionDictionary.DictionaryOptions(
new HashMap<String, String>(), false, false));
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
- timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
+ timeWritingDictToFile(file, dict, formatOptions);
- final Ver3DictUpdater dictUpdater = new Ver3DictUpdater(file,
- DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ final DictUpdater dictUpdater;
+ if (formatOptions.mVersion == 3) {
+ dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ } else if (formatOptions.mVersion == 4) {
+ dictUpdater = new Ver4DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ } else {
+ throw new RuntimeException("DictUpdater for version " + formatOptions.mVersion
+ + " doesn't exist.");
+ }
try {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
@@ -676,4 +683,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} catch (UnsupportedFormatException e) {
}
}
+
+ public void testDeleteWord() {
+ runTestDeleteWord(VERSION3_WITH_DYNAMIC_UPDATE);
+ runTestDeleteWord(VERSION4_WITH_DYNAMIC_UPDATE);
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
index acd65856c..afe5adb73 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
@@ -45,6 +45,9 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
+ private static final int VERSION3 = 3;
+ private static final int VERSION4 = 4;
+
private static final String[] CHARACTERS = {
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
@@ -183,11 +186,16 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
// return amount of time to insert a word
private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams,
- final ArrayList<WeightedString> shortcuts) {
+ final ArrayList<WeightedString> shortcuts, final int formatVersion) {
long amountOfTime = -1;
try {
- final Ver3DictUpdater dictUpdater = new Ver3DictUpdater(file,
- DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ final DictUpdater dictUpdater;
+ if (formatVersion == VERSION3) {
+ dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ } else {
+ throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
+ + " exist.");
+ }
if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
@@ -204,10 +212,15 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
return amountOfTime;
}
- private void deleteWord(final File file, final String word) {
+ private void deleteWord(final File file, final String word, final int formatVersion) {
try {
- final Ver3DictUpdater dictUpdater = new Ver3DictUpdater(file,
- DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ final DictUpdater dictUpdater;
+ if (formatVersion == VERSION3) {
+ dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
+ } else {
+ throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
+ + " exist.");
+ }
dictUpdater.deleteWord(word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
@@ -229,7 +242,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
}
- public void testInsertWord() {
+ private void runTestInsertWord(final int formatVersion) {
File file = null;
try {
file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION,
@@ -253,33 +266,37 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
- insertAndCheckWord(file, "abcde", 10, false, null, null);
+ insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion);
- insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null);
+ insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion);
checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
- insertAndCheckWord(file, "abcdabcd", 10, false, null, null);
+ insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion);
checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
// update the existing word.
- insertAndCheckWord(file, "abcdabcd", 15, true, null, null);
+ insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion);
// split 1
- insertAndCheckWord(file, "ab", 20, false, null, null);
+ insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion);
// split 2
- insertAndCheckWord(file, "ami", 30, false, null, null);
+ insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion);
- deleteWord(file, "ami");
+ deleteWord(file, "ami", formatVersion);
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
- insertAndCheckWord(file, "abcdabfg", 30, false, null, null);
+ insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion);
- deleteWord(file, "abcd");
+ deleteWord(file, "abcd", formatVersion);
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
}
- public void testInsertWordWithBigrams() {
+ public void testInsertWord() {
+ runTestInsertWord(VERSION3);
+ }
+
+ private void runTestInsertWordWithBigrams(final int formatVersion) {
File file = null;
try {
file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION,
@@ -306,8 +323,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
banana.add(new WeightedString("banana", 10));
- insertAndCheckWord(file, "banana", 0, false, null, null);
- insertAndCheckWord(file, "recursive", 60, true, banana, null);
+ insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion);
+ insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion);
final PtNodeInfo info = findWordFromFile(file, "recursive");
int bananaPos = getWordPosition(file, "banana");
@@ -316,7 +333,11 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
}
- public void testRandomWords() {
+ public void testInsertWordWithBigrams() {
+ runTestInsertWordWithBigrams(VERSION3);
+ }
+
+ private void runTestRandomWords(final int formatVersion) {
File file = null;
try {
file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION,
@@ -345,7 +366,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
int cnt = 0;
for (final String word : sWords) {
final long diff = insertAndCheckWord(file, word,
- cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null);
+ cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion);
maxTimeToInsert = Math.max(maxTimeToInsert, diff);
minTimeToInsert = Math.min(minTimeToInsert, diff);
sum += diff;
@@ -356,8 +377,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
}
+ Log.d(TAG, "Test version " + formatVersion);
Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
}
+
+ public void testRandomWords() {
+ runTestRandomWords(VERSION3);
+ }
}