aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java30
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java36
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DictUpdater.java50
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java104
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java7
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java82
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java20
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java144
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java1
10 files changed, 320 insertions, 158 deletions
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 0985aae58..c79a4ff90 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -249,6 +249,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
final File file = new File(mContext.getFilesDir(), mFilename);
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
+ mBinaryDictionary = new BinaryDictionary(
+ file.getAbsolutePath(), 0 /* offset */, file.length(),
+ true /* useFullEditDistance */, null, mDictType, mIsUpdatable);
} else {
mDictionaryWriter.clear();
}
@@ -273,11 +276,26 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
lastModifiedTime);
}
+ /**
+ * Check whether GC is needed and run GC if required.
+ */
protected void runGCIfRequired(final boolean mindsBlockByGC) {
if (!ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) return;
+ getExecutor(mFilename).execute(new Runnable() {
+ @Override
+ public void run() {
+ runGCIfRequiredInternalLocked(mindsBlockByGC);
+ }
+ });
+ }
+
+ private void runGCIfRequiredInternalLocked(final boolean mindsBlockByGC) {
+ if (!ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) return;
+ // Calls to needsToRunGC() need to be serialized.
if (mBinaryDictionary.needsToRunGC(mindsBlockByGC)) {
if (setIsRegeneratingIfNotRegenerating()) {
- getExecutor(mFilename).execute(new Runnable() {
+ // Run GC after currently existing time sensitive operations.
+ getExecutor(mFilename).executePrioritized(new Runnable() {
@Override
public void run() {
try {
@@ -300,11 +318,11 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
Log.w(TAG, "addWordDynamically is called for non-updatable dictionary: " + mFilename);
return;
}
- runGCIfRequired(true /* mindsBlockByGC */);
getExecutor(mFilename).execute(new Runnable() {
@Override
public void run() {
if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) {
+ runGCIfRequiredInternalLocked(true /* mindsBlockByGC */);
mBinaryDictionary.addUnigramWord(word, frequency);
} else {
// TODO: Remove.
@@ -324,11 +342,11 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
+ mFilename);
return;
}
- runGCIfRequired(true /* mindsBlockByGC */);
getExecutor(mFilename).execute(new Runnable() {
@Override
public void run() {
if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) {
+ runGCIfRequiredInternalLocked(true /* mindsBlockByGC */);
mBinaryDictionary.addBigramWords(word0, word1, frequency);
} else {
// TODO: Remove.
@@ -348,11 +366,11 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
+ mFilename);
return;
}
- runGCIfRequired(true /* mindsBlockByGC */);
getExecutor(mFilename).execute(new Runnable() {
@Override
public void run() {
if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) {
+ runGCIfRequiredInternalLocked(true /* mindsBlockByGC */);
mBinaryDictionary.removeBigramWords(word0, word1);
} else {
// TODO: Remove.
@@ -479,8 +497,8 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
final long length = file.length();
// Build the new binary dictionary
- final BinaryDictionary newBinaryDictionary = new BinaryDictionary(filename, 0, length,
- true /* useFullEditDistance */, null, mDictType, mIsUpdatable);
+ final BinaryDictionary newBinaryDictionary = new BinaryDictionary(filename, 0 /* offset */,
+ length, true /* useFullEditDistance */, null, mDictType, mIsUpdatable);
// Ensure all threads accessing the current dictionary have finished before
// swapping in the new one.
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index e90137674..0f7d2f6c9 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -288,42 +288,6 @@ public final class BinaryDictIOUtils {
return BinaryDictEncoderUtils.getByteSize(value);
}
- // TODO: Remove this method.
- @Deprecated
- static void skipPtNode(final DictBuffer dictBuffer, final FormatOptions formatOptions) {
- final int flags = dictBuffer.readUnsignedByte();
- BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
- skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
- BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions);
- if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
- if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
- final int shortcutsSize = dictBuffer.readUnsignedShort();
- dictBuffer.position(dictBuffer.position() + shortcutsSize
- - FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
- }
- if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
- int bigramCount = 0;
- while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- final int bigramFlags = dictBuffer.readUnsignedByte();
- switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
- dictBuffer.readUnsignedByte();
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
- dictBuffer.readUnsignedShort();
- break;
- case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
- dictBuffer.readUnsignedInt24();
- break;
- }
- if ((bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT) == 0) break;
- }
- if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- throw new RuntimeException("Too many bigrams in a PtNode.");
- }
- }
- }
-
static void skipString(final DictBuffer dictBuffer,
final boolean hasMultipleChars) {
if (hasMultipleChars) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java
new file mode 100644
index 000000000..413d0301c
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/DictUpdater.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * An interface of a binary dictionary updater.
+ */
+public interface DictUpdater {
+
+ /**
+ * Deletes the word from the binary dictionary.
+ *
+ * @param word the word to be deleted.
+ */
+ public void deleteWord(final String word) throws IOException, UnsupportedFormatException;
+
+ /**
+ * Inserts a word into a binary dictionary.
+ *
+ * @param word the word to be inserted.
+ * @param frequency the frequency of the new word.
+ * @param bigramStrings bigram list, or null if none.
+ * @param shortcuts shortcut list, or null if none.
+ * @param isBlackListEntry whether this should be a blacklist entry.
+ */
+ // TODO: Support batch insertion.
+ public void insertWord(final String word, final int frequency,
+ final ArrayList<WeightedString> bigramStrings,
+ final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
+ final boolean isBlackListEntry) throws IOException, UnsupportedFormatException;
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
index 5c6994119..336277196 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
@@ -42,44 +42,22 @@ public final class DynamicBinaryDictIOUtils {
// This utility class is not publicly instantiable.
}
- private static int markAsDeleted(final int flags) {
+ /* package */ static int markAsDeleted(final int flags) {
return (flags & (~FormatSpec.MASK_CHILDREN_ADDRESS_TYPE)) | FormatSpec.FLAG_IS_DELETED;
}
/**
- * Delete the word from the binary file.
- *
- * @param dictDecoder the dict decoder.
- * @param word the word we delete
- * @throws IOException
- * @throws UnsupportedFormatException
- */
- @UsedForTesting
- public static void deleteWord(final Ver3DictDecoder dictDecoder, final String word)
- throws IOException, UnsupportedFormatException {
- final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
- dictBuffer.position(0);
- dictDecoder.readHeader();
- final int wordPosition = dictDecoder.getTerminalPosition(word);
- if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
-
- dictBuffer.position(wordPosition);
- final int flags = dictBuffer.readUnsignedByte();
- dictBuffer.position(wordPosition);
- dictBuffer.put((byte)markAsDeleted(flags));
- }
-
- /**
* Update a parent address in a PtNode that is referred to by ptNodeOriginAddress.
*
- * @param dictBuffer the DictBuffer to write.
+ * @param dictUpdater the DictUpdater to write.
* @param ptNodeOriginAddress the address of the PtNode.
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
- private static void updateParentAddress(final DictBuffer dictBuffer,
+ private static void updateParentAddress(final Ver3DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
+ final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
final int originalPosition = dictBuffer.position();
dictBuffer.position(ptNodeOriginAddress);
if (!formatOptions.mSupportsDynamicUpdate) {
@@ -104,41 +82,41 @@ public final class DynamicBinaryDictIOUtils {
/**
* Update parent addresses in a node array stored at ptNodeOriginAddress.
*
- * @param dictBuffer the DictBuffer to be modified.
+ * @param dictUpdater the DictUpdater to be modified.
* @param ptNodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
- private static void updateParentAddresses(final DictBuffer dictBuffer,
+ private static void updateParentAddresses(final Ver3DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
- final int originalPosition = dictBuffer.position();
- dictBuffer.position(ptNodeOriginAddress);
+ final int originalPosition = dictUpdater.getPosition();
+ dictUpdater.setPosition(ptNodeOriginAddress);
do {
- final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
+ final int count = dictUpdater.readPtNodeCount();
for (int i = 0; i < count; ++i) {
- updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
+ updateParentAddress(dictUpdater, dictUpdater.getPosition(), newParentAddress,
formatOptions);
- BinaryDictIOUtils.skipPtNode(dictBuffer, formatOptions);
+ dictUpdater.skipPtNode(formatOptions);
}
- final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
- dictBuffer.position(forwardLinkAddress);
- } while (formatOptions.mSupportsDynamicUpdate
- && dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
- dictBuffer.position(originalPosition);
+ if (!dictUpdater.readAndFollowForwardLink()) break;
+ if (dictUpdater.getPosition() == FormatSpec.NO_FORWARD_LINK_ADDRESS) break;
+ } while (formatOptions.mSupportsDynamicUpdate);
+ dictUpdater.setPosition(originalPosition);
}
/**
* Update a children address in a PtNode that is addressed by ptNodeOriginAddress.
*
- * @param dictBuffer the DictBuffer to write.
+ * @param dictUpdater the DictUpdater to write.
* @param ptNodeOriginAddress the address of the PtNode.
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
- private static void updateChildrenAddress(final DictBuffer dictBuffer,
+ private static void updateChildrenAddress(final Ver3DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
+ final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
final int originalPosition = dictBuffer.position();
dictBuffer.position(ptNodeOriginAddress);
final int flags = dictBuffer.readUnsignedByte();
@@ -155,31 +133,33 @@ public final class DynamicBinaryDictIOUtils {
* Helper method to move a PtNode to the tail of the file.
*/
private static int movePtNode(final OutputStream destination,
- final DictBuffer dictBuffer, final PtNodeInfo info,
+ final Ver3DictUpdater dictUpdater, final PtNodeInfo info,
final int nodeArrayOriginAddress, final int oldNodeAddress,
final FormatOptions formatOptions) throws IOException {
- updateParentAddress(dictBuffer, oldNodeAddress, dictBuffer.limit() + 1, formatOptions);
+ final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
+ updateParentAddress(dictUpdater, oldNodeAddress, dictBuffer.limit() + 1, formatOptions);
dictBuffer.position(oldNodeAddress);
final int currentFlags = dictBuffer.readUnsignedByte();
dictBuffer.position(oldNodeAddress);
dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.PTNODE_FLAGS_SIZE;
- updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
+ updateForwardLink(dictUpdater, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNodes(destination, new PtNodeInfo[] { info });
return size;
}
@SuppressWarnings("unused")
- private static void updateForwardLink(final DictBuffer dictBuffer,
+ private static void updateForwardLink(final Ver3DictUpdater dictUpdater,
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) {
- dictBuffer.position(nodeArrayOriginAddress);
+ final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
+ dictUpdater.setPosition(nodeArrayOriginAddress);
int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) {
- final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
+ final int count = dictUpdater.readPtNodeCount();
for (int i = 0; i < count; ++i) {
- BinaryDictIOUtils.skipPtNode(dictBuffer, formatOptions);
+ dictUpdater.readPtNode(dictUpdater.getPosition(), formatOptions);
}
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
@@ -207,7 +187,7 @@ public final class DynamicBinaryDictIOUtils {
* @param shortcutTargets the shortcut targets for this PtNode.
* @param bigrams the bigrams for this PtNode.
* @param destination the stream representing the tail of the file.
- * @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
+ * @param dictUpdater the DictUpdater.
* @param oldPtNodeArrayOrigin the origin of the old PtNode array this PtNode was a part of.
* @param oldPtNodeOrigin the old origin where this PtNode used to be stored.
* @param formatOptions format options for this dictionary.
@@ -218,7 +198,7 @@ public final class DynamicBinaryDictIOUtils {
final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
- final DictBuffer dictBuffer, final int oldPtNodeArrayOrigin,
+ final Ver3DictUpdater dictUpdater, final int oldPtNodeArrayOrigin,
final int oldPtNodeOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0;
final int newPtNodeOrigin = fileEndAddress + 1;
@@ -231,7 +211,7 @@ public final class DynamicBinaryDictIOUtils {
flags, writtenCharacters, frequency, parentAddress,
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams);
- movePtNode(destination, dictBuffer, newInfo, oldPtNodeArrayOrigin, oldPtNodeOrigin,
+ movePtNode(destination, dictUpdater, newInfo, oldPtNodeArrayOrigin, oldPtNodeOrigin,
formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
@@ -239,7 +219,7 @@ public final class DynamicBinaryDictIOUtils {
/**
* Insert a word into a binary dictionary.
*
- * @param dictDecoder the dict decoder.
+ * @param dictUpdater the dict updater.
* @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert.
* @param frequency the frequency of the new word.
@@ -252,17 +232,17 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting
- public static void insertWord(final Ver3DictDecoder dictDecoder,
+ public static void insertWord(final Ver3DictUpdater dictUpdater,
final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
- final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
+ final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) {
- int position = dictDecoder.getTerminalPosition(bigram.mWord);
+ int position = dictUpdater.getTerminalPosition(bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here.
} else {
@@ -277,7 +257,7 @@ public final class DynamicBinaryDictIOUtils {
// find the insert position of the word.
if (dictBuffer.position() != 0) dictBuffer.position(0);
- final FileHeader fileHeader = dictDecoder.readHeader();
+ final FileHeader fileHeader = dictUpdater.readHeader();
int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position();
final int[] codePoints = FusionDictionary.getCodePoints(word);
@@ -292,7 +272,7 @@ public final class DynamicBinaryDictIOUtils {
for (int i = 0; i < ptNodeCount; ++i) {
address = dictBuffer.position();
- final PtNodeInfo currentInfo = dictDecoder.readPtNode(address,
+ final PtNodeInfo currentInfo = dictUpdater.readPtNode(address,
fileHeader.mFormatOptions);
final boolean isMovedNode = BinaryDictIOUtils.isMovedPtNode(currentInfo.mFlags,
fileHeader.mFormatOptions);
@@ -318,12 +298,12 @@ public final class DynamicBinaryDictIOUtils {
false /* isBlackListEntry */, fileHeader.mFormatOptions);
int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p, flags,
frequency, nodeParentAddress, shortcuts, bigrams, destination,
- dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
+ dictUpdater, nodeOriginAddress, address, fileHeader.mFormatOptions);
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
- updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
+ updateParentAddresses(dictUpdater, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions);
}
final PtNodeInfo newInfo2 = new PtNodeInfo(
@@ -359,13 +339,13 @@ public final class DynamicBinaryDictIOUtils {
fileHeader.mFormatOptions);
int written = movePtNode(newNodeAddress, currentInfo.mCharacters, p,
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
- destination, dictBuffer, nodeOriginAddress, address,
+ destination, dictUpdater, nodeOriginAddress, address,
fileHeader.mFormatOptions);
final int[] suffixCharacters = Arrays.copyOfRange(
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
- updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
+ updateParentAddresses(dictUpdater, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions);
}
final int suffixFlags = BinaryDictEncoderUtils.makePtNodeFlags(
@@ -416,7 +396,7 @@ public final class DynamicBinaryDictIOUtils {
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
bigrams);
- movePtNode(destination, dictBuffer, newInfo, nodeOriginAddress, address,
+ movePtNode(destination, dictUpdater, newInfo, nodeOriginAddress, address,
fileHeader.mFormatOptions);
return;
}
@@ -435,7 +415,7 @@ public final class DynamicBinaryDictIOUtils {
* ab - cd - e
*/
final int newNodeArrayAddress = dictBuffer.limit();
- updateChildrenAddress(dictBuffer, address, newNodeArrayAddress,
+ updateChildrenAddress(dictUpdater, address, newNodeArrayAddress,
fileHeader.mFormatOptions);
final int newNodeAddress = newNodeArrayAddress + 1;
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 9481a8c14..a5516bd41 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -266,11 +266,14 @@ public final class FormatSpec {
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
static final String BIGRAM_FILE_EXTENSION = ".bigram";
- static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
- static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index";
+ static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup";
+ static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
+ static final int BIGRAM_CONTENT_COUNT = 1;
+ static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
+ static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
index bf5a28d62..b87259c38 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
@@ -53,9 +53,9 @@ public class Ver3DictDecoder extends DictDecoder {
}
}
- private final File mDictionaryBinaryFile;
+ protected final File mDictionaryBinaryFile;
private final DictionaryBufferFactory mBufferFactory;
- private DictBuffer mDictBuffer;
+ protected DictBuffer mDictBuffer;
/* package */ Ver3DictDecoder(final File file, final int factoryFlag) {
mDictionaryBinaryFile = file;
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
new file mode 100644
index 000000000..fa7ae310a
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+
+/**
+ * An implementation of DictUpdater for version 3 binary dictionary.
+ */
+@UsedForTesting
+public class Ver3DictUpdater extends Ver3DictDecoder implements DictUpdater {
+ private OutputStream mOutStream;
+
+ @UsedForTesting
+ public Ver3DictUpdater(final File dictFile, final int factoryType) {
+ // DictUpdater must have an updatable DictBuffer.
+ super(dictFile, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
+ ? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
+ mOutStream = null;
+ }
+
+ private void openStreamAndBuffer() throws FileNotFoundException, IOException {
+ super.openDictBuffer();
+ mOutStream = new FileOutputStream(mDictionaryBinaryFile, true /* append */);
+ }
+
+ private void close() throws IOException {
+ if (mOutStream != null) {
+ mOutStream.close();
+ mOutStream = null;
+ }
+ }
+
+ @Override @UsedForTesting
+ public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
+ if (mOutStream == null) openStreamAndBuffer();
+ mDictBuffer.position(0);
+ super.readHeader();
+ final int wordPos = getTerminalPosition(word);
+ if (wordPos != FormatSpec.NOT_VALID_WORD) {
+ mDictBuffer.position(wordPos);
+ final int flags = mDictBuffer.readUnsignedByte();
+ mDictBuffer.position(wordPos);
+ mDictBuffer.put((byte) DynamicBinaryDictIOUtils.markAsDeleted(flags));
+ }
+ close();
+ }
+
+ @Override @UsedForTesting
+ public void insertWord(final String word, final int frequency,
+ final ArrayList<WeightedString> bigramStrings,
+ final ArrayList<WeightedString> shortcuts,
+ final boolean isNotAWord, final boolean isBlackListEntry)
+ throws IOException, UnsupportedFormatException {
+ if (mOutStream == null) openStreamAndBuffer();
+ DynamicBinaryDictIOUtils.insertWord(this, mOutStream, word, frequency, bigramStrings,
+ shortcuts, isNotAWord, isBlackListEntry);
+ close();
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 624b2784f..5089687da 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -42,7 +42,7 @@ public class Ver4DictDecoder extends DictDecoder {
private static final int FILETYPE_TRIE = 1;
private static final int FILETYPE_FREQUENCY = 2;
private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
- private static final int FILETYPE_BIGRAM = 4;
+ private static final int FILETYPE_BIGRAM_FREQ = 4;
private final File mDictDirectory;
private final DictionaryBufferFactory mBufferFactory;
@@ -85,9 +85,10 @@ public class Ver4DictDecoder extends DictDecoder {
} else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- } else if (fileType == FILETYPE_BIGRAM) {
+ } else if (fileType == FILETYPE_BIGRAM_FREQ) {
return new File(mDictDirectory,
- mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION);
+ mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION
+ + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
} else {
throw new RuntimeException("Unsupported kind of file : " + fileType);
}
@@ -99,7 +100,7 @@ public class Ver4DictDecoder extends DictDecoder {
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
- mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM));
+ mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
loadBigramAddressSparseTable();
}
@@ -126,11 +127,12 @@ public class Ver4DictDecoder extends DictDecoder {
}
private void loadBigramAddressSparseTable() throws IOException {
- final File lookupIndexFile = new File(mDictDirectory,
- mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
- final File contentFile = new File(mDictDirectory,
- mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
- mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { contentFile },
+ final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
+ mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index a403e25db..b38c33019 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -26,7 +26,6 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
@@ -44,19 +43,115 @@ public class Ver4DictEncoder implements DictEncoder {
private byte[] mTrieBuf;
private int mTriePos;
private int mHeaderSize;
- private SparseTable mBigramAddressTable;
private OutputStream mTrieOutStream;
private OutputStream mFreqOutStream;
private OutputStream mTerminalAddressTableOutStream;
- private OutputStream mBigramOutStream;
private File mDictDir;
private String mBaseFilename;
+ private BigramContentWriter mBigramWriter;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
mDictPlacedDir = dictPlacedDir;
}
+ private interface SparseTableContentWriterInterface {
+ public void write(final OutputStream outStream) throws IOException;
+ }
+
+ private static class SparseTableContentWriter {
+ private final int mContentCount;
+ private final SparseTable mSparseTable;
+ private final File mLookupTableFile;
+ protected final File mBaseDir;
+ private final File[] mAddressTableFiles;
+ private final File[] mContentFiles;
+ protected final OutputStream[] mContentOutStreams;
+
+ public SparseTableContentWriter(final String name, final int contentCount,
+ final int initialCapacity, final int blockSize, final File baseDir,
+ final String[] contentFilenames, final String[] contentIds) {
+ if (contentFilenames.length != contentIds.length) {
+ throw new RuntimeException("The length of contentFilenames and the length of"
+ + " contentIds are different " + contentFilenames.length + ", "
+ + contentIds.length);
+ }
+ mContentCount = contentCount;
+ mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount);
+ mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ mAddressTableFiles = new File[mContentCount];
+ mContentFiles = new File[mContentCount];
+ mBaseDir = baseDir;
+ for (int i = 0; i < mContentCount; ++i) {
+ mAddressTableFiles[i] = new File(mBaseDir,
+ name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
+ mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
+ }
+ mContentOutStreams = new OutputStream[mContentCount];
+ }
+
+ public void openStreams() throws FileNotFoundException {
+ for (int i = 0; i < mContentCount; ++i) {
+ mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]);
+ }
+ }
+
+ protected void write(final int contentIndex, final int index,
+ final SparseTableContentWriterInterface writer) throws IOException {
+ mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length());
+ writer.write(mContentOutStreams[contentIndex]);
+ mContentOutStreams[contentIndex].flush();
+ }
+
+ public void closeStreams() throws IOException {
+ mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles);
+ for (int i = 0; i < mContentCount; ++i) {
+ mContentOutStreams[i].close();
+ }
+ }
+ }
+
+ private static class BigramContentWriter extends SparseTableContentWriter {
+
+ public BigramContentWriter(final String name, final int initialCapacity,
+ final File baseDir) {
+ super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT,
+ initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION },
+ new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID });
+ }
+
+ public void writeBigramsForOneWord(final int terminalId,
+ final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
+ throws IOException {
+ write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
+ new SparseTableContentWriterInterface() {
+ @Override
+ public void write(final OutputStream outStream) throws IOException {
+ writeBigramsForOneWordInternal(outStream, bigramIterator, dict);
+ }
+ });
+ }
+
+ private void writeBigramsForOneWordInternal(final OutputStream outStream,
+ final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
+ throws IOException {
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = bigramIterator.next();
+ final PtNode target =
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
+ final int unigramFrequencyForThisWord = target.mFrequency;
+ final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
+ bigramIterator.hasNext(), 0, bigram.mFrequency,
+ unigramFrequencyForThisWord, bigram.mWord);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, bigramFlags,
+ FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, target.mTerminalId,
+ FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
+ }
+ }
+ }
+
private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
throws FileNotFoundException, IOException {
final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
@@ -66,8 +161,6 @@ public class Ver4DictEncoder implements DictEncoder {
final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
final File terminalAddressTableFile = new File(mDictDir,
mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- final File bigramFile = new File(mDictDir,
- mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION);
if (!mDictDir.isDirectory()) {
if (mDictDir.exists()) mDictDir.delete();
mDictDir.mkdirs();
@@ -78,7 +171,6 @@ public class Ver4DictEncoder implements DictEncoder {
mTrieOutStream = new FileOutputStream(trieFile);
mFreqOutStream = new FileOutputStream(freqFile);
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
- mBigramOutStream = new FileOutputStream(bigramFile);
}
private void close() throws IOException {
@@ -92,14 +184,10 @@ public class Ver4DictEncoder implements DictEncoder {
if (mTerminalAddressTableOutStream != null) {
mTerminalAddressTableOutStream.close();
}
- if (mBigramOutStream != null) {
- mBigramOutStream.close();
- }
} finally {
mTrieOutStream = null;
mFreqOutStream = null;
mTerminalAddressTableOutStream = null;
- mBigramOutStream = null;
}
}
@@ -135,10 +223,8 @@ public class Ver4DictEncoder implements DictEncoder {
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
writeTerminalData(flatNodes, terminalCount);
- mBigramAddressTable = new SparseTable(terminalCount,
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 1 /* contentTableCount */);
+ mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir);
writeBigrams(flatNodes, dict);
- writeBigramAddressSparseTable();
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
@@ -245,40 +331,16 @@ public class Ver4DictEncoder implements DictEncoder {
private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
throws IOException {
- final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream();
-
+ mBigramWriter.openStreams();
for (final PtNodeArray nodeArray : flatNodes) {
for (final PtNode ptNode : nodeArray.mData) {
if (ptNode.mBigrams != null) {
- final int startPos = bigramBuffer.size();
- mBigramAddressTable.set(0 /* contentTableIndex */, ptNode.mTerminalId,
- startPos);
- final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator();
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int unigramFrequencyForThisWord = target.mFrequency;
- final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
- bigramIterator.hasNext(), 0, bigram.mFrequency,
- unigramFrequencyForThisWord, bigram.mWord);
- BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId,
- FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
- }
+ mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId,
+ ptNode.mBigrams.iterator(), dict);
}
}
}
- bigramBuffer.writeTo(mBigramOutStream);
- }
-
- private void writeBigramAddressSparseTable() throws IOException {
- final File lookupIndexFile =
- new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
- final File contentFile =
- new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
- mBigramAddressTable.writeToFiles(lookupIndexFile, new File[] { contentFile });
+ mBigramWriter.closeStreams();
}
@Override
diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
index 266216410..c8b62b6c8 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
@@ -230,6 +230,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
mSessions.remove(session);
}
+ @UsedForTesting
public void clearAndFlushDictionary() {
// Clear the node structure on memory
clear();