aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java4
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java3
-rw-r--r--java/src/com/android/inputmethod/latin/DictionaryWriter.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java9
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java28
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java12
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java18
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java116
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java147
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java)19
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java)10
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java)6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java3
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java427
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java3
-rw-r--r--java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java2
-rw-r--r--java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java3
18 files changed, 145 insertions, 675 deletions
diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
index e6fb9807e..1aee22baf 100644
--- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
@@ -21,7 +21,7 @@ import android.util.Log;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-import com.android.inputmethod.latin.makedict.Ver3DictEncoder;
+import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
import java.io.File;
import java.io.IOException;
@@ -64,7 +64,7 @@ abstract public class AbstractDictionaryWriter {
final String tempFilePath = file.getAbsolutePath() + ".temp";
final File tempFile = new File(tempFilePath);
try {
- final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile);
+ final DictEncoder dictEncoder = new Ver2DictEncoder(tempFile);
writeDictionary(dictEncoder, attributeMap);
tempFile.renameTo(file);
} catch (IOException e) {
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index db4234c63..95ac3e203 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -367,6 +367,7 @@ public final class BinaryDictionary extends Dictionary {
public static class LanguageModelParam {
public final int[] mWord0;
public final int[] mWord1;
+ // TODO: this needs to be a list of shortcuts
public final int[] mShortcutTarget;
public final int mUnigramProbability;
public final int mBigramProbability;
@@ -375,7 +376,7 @@ public final class BinaryDictionary extends Dictionary {
public final boolean mIsBlacklisted;
public final int mTimestamp;
- // Constructor for unigram.
+ // Constructor for unigram. TODO: support shortcuts
public LanguageModelParam(final String word, final int unigramProbability,
final int timestamp) {
mWord0 = null;
diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
index f960c5343..89ef96d7f 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
@@ -18,8 +18,6 @@ package com.android.inputmethod.latin;
import android.content.Context;
-import com.android.inputmethod.keyboard.ProximityInfo;
-import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
@@ -52,7 +50,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
public void clear() {
final HashMap<String, String> attributes = CollectionUtils.newHashMap();
mFusionDictionary = new FusionDictionary(new PtNodeArray(),
- new FusionDictionary.DictionaryOptions(attributes, false, false));
+ new FusionDictionary.DictionaryOptions(attributes));
}
/**
diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
index bbbb8e461..f8fa68f45 100644
--- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
@@ -58,12 +58,9 @@ public abstract class AbstractDictDecoder implements DictDecoder {
headerSize);
final FileHeader header = new FileHeader(headerSize,
- new FusionDictionary.DictionaryOptions(attributes,
- 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
- 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
- new FormatOptions(version,
- 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE),
- 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
+ new FusionDictionary.DictionaryOptions(attributes),
+ new FormatOptions(version,
+ 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
return header;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 83ee7d685..7f0aa777f 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -330,7 +330,7 @@ public final class BinaryDictDecoderUtils {
static int readChildrenAddress(final DictBuffer dictBuffer,
final int optionFlags, final FormatOptions options) {
- if (options.mSupportsDynamicUpdate) {
+ if (options.supportsDynamicUpdate()) {
final int address = dictBuffer.readUnsignedInt24();
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
if ((address & FormatSpec.MSB24) != 0) {
@@ -540,11 +540,11 @@ public final class BinaryDictDecoderUtils {
}
// reach the end of the array.
- if (options.mSupportsDynamicUpdate) {
+ if (options.supportsDynamicUpdate()) {
final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
if (!hasValidForwardLink) break;
}
- } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
+ } while (options.supportsDynamicUpdate() && dictDecoder.hasNextPtNodeArray());
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOriginPos;
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index c0dad3db2..8ba0797de 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -20,7 +20,6 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncodin
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@@ -161,7 +160,7 @@ public class BinaryDictEncoderUtils {
node.mCachedSize = nodeSize;
size += nodeSize;
}
- if (options.mSupportsDynamicUpdate) {
+ if (options.supportsDynamicUpdate()) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
ptNodeArray.mCachedSize = size;
@@ -398,7 +397,7 @@ public class BinaryDictEncoderUtils {
nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
}
}
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else if (null != ptNode.mChildren) {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
@@ -418,7 +417,7 @@ public class BinaryDictEncoderUtils {
ptNode.mCachedSize = nodeSize;
size += nodeSize;
}
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
if (ptNodeArray.mCachedSize != size) {
@@ -534,7 +533,7 @@ public class BinaryDictEncoderUtils {
if (passes > MAX_PASSES) throw new RuntimeException("Too many passes - probably a bug");
} while (changesDone);
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
computeParentAddresses(flatNodes);
}
final PtNodeArray lastPtNodeArray = flatNodes.get(flatNodes.size() - 1);
@@ -643,7 +642,7 @@ public class BinaryDictEncoderUtils {
byte flags = 0;
if (hasMultipleChars) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
if (isTerminal) flags |= FormatSpec.FLAG_IS_TERMINAL;
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
flags |= FormatSpec.FLAG_IS_NOT_MOVED;
} else if (true) {
switch (childrenAddressSize) {
@@ -755,16 +754,11 @@ public class BinaryDictEncoderUtils {
}
/**
- * Makes the 2-byte value for options flags.
+ * Makes the 2-byte value for options flags. Unused at the moment, and always 0.
*/
- private static final int makeOptionsValue(final FusionDictionary dictionary,
- final FormatOptions formatOptions) {
- final DictionaryOptions options = dictionary.mOptions;
- final boolean hasBigrams = dictionary.hasBigrams();
- return (options.mFrenchLigatureProcessing ? FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG : 0)
- + (options.mGermanUmlautProcessing ? FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG : 0)
- + (hasBigrams ? FormatSpec.CONTAINS_BIGRAMS_FLAG : 0)
- + (formatOptions.mSupportsDynamicUpdate ? FormatSpec.SUPPORTS_DYNAMIC_UPDATE : 0);
+ private static final int makeOptionsValue(final FormatOptions formatOptions) {
+ // TODO: why doesn't this handle CONTAINS_TIMESTAMP_FLAG?
+ return 0;
}
/**
@@ -852,7 +846,7 @@ public class BinaryDictEncoderUtils {
}
dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict);
}
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
dictEncoder.writeForwardLinkAddress(FormatSpec.NO_FORWARD_LINK_ADDRESS);
}
if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate
@@ -953,7 +947,7 @@ public class BinaryDictEncoderUtils {
headerBuffer.write((byte) (0xFF & version));
// Options flags
- final int options = makeOptionsValue(dict, formatOptions);
+ final int options = makeOptionsValue(formatOptions);
headerBuffer.write((byte) (0xFF & (options >> 8)));
headerBuffer.write((byte) (0xFF & options));
final int headerSizeOffset = headerBuffer.size();
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 86ebf5844..640d778bb 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -112,7 +112,7 @@ public final class BinaryDictIOUtils {
}
if (p.mPosition == p.mNumOfPtNode) {
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
final boolean hasValidForwardLinkAddress =
dictDecoder.readAndFollowForwardLink();
if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
@@ -228,7 +228,7 @@ public final class BinaryDictIOUtils {
// a forward link address that we need to consult and possibly resume
// search on the next node array in the linked list.
if (foundNextPtNode) break;
- if (!header.mFormatOptions.mSupportsDynamicUpdate) {
+ if (!header.mFormatOptions.supportsDynamicUpdate()) {
return FormatSpec.NOT_VALID_WORD;
}
@@ -507,7 +507,7 @@ public final class BinaryDictIOUtils {
* Helper method to check whether the node is moved.
*/
public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
- return options.mSupportsDynamicUpdate
+ return options.supportsDynamicUpdate()
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
}
@@ -516,14 +516,14 @@ public final class BinaryDictIOUtils {
*/
public static boolean supportsDynamicUpdate(final FormatOptions options) {
return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
- && options.mSupportsDynamicUpdate;
+ && options.supportsDynamicUpdate();
}
/**
* Helper method to check whether the node is deleted.
*/
public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
- return formatOptions.mSupportsDynamicUpdate
+ return formatOptions.supportsDynamicUpdate()
&& ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
}
@@ -546,7 +546,7 @@ public final class BinaryDictIOUtils {
static int getChildrenAddressSize(final int optionFlags,
final FormatOptions formatOptions) {
- if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
+ if (formatOptions.supportsDynamicUpdate()) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
return 1;
diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
index 971b4ff9f..97ad667a6 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
@@ -55,13 +55,13 @@ public final class DynamicBinaryDictIOUtils {
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
- private static void updateParentAddress(final Ver3DictUpdater dictUpdater,
+ private static void updateParentAddress(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
final int originalPosition = dictBuffer.position();
dictBuffer.position(ptNodeOriginAddress);
- if (!formatOptions.mSupportsDynamicUpdate) {
+ if (!formatOptions.supportsDynamicUpdate()) {
throw new RuntimeException("this file format does not support parent addresses");
}
final int flags = dictBuffer.readUnsignedByte();
@@ -88,7 +88,7 @@ public final class DynamicBinaryDictIOUtils {
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
- private static void updateParentAddresses(final Ver3DictUpdater dictUpdater,
+ private static void updateParentAddresses(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictUpdater.getPosition();
@@ -102,7 +102,7 @@ public final class DynamicBinaryDictIOUtils {
}
if (!dictUpdater.readAndFollowForwardLink()) break;
if (dictUpdater.getPosition() == FormatSpec.NO_FORWARD_LINK_ADDRESS) break;
- } while (formatOptions.mSupportsDynamicUpdate);
+ } while (formatOptions.supportsDynamicUpdate());
dictUpdater.setPosition(originalPosition);
}
@@ -114,7 +114,7 @@ public final class DynamicBinaryDictIOUtils {
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
- private static void updateChildrenAddress(final Ver3DictUpdater dictUpdater,
+ private static void updateChildrenAddress(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -134,7 +134,7 @@ public final class DynamicBinaryDictIOUtils {
* Helper method to move a PtNode to the tail of the file.
*/
private static int movePtNode(final OutputStream destination,
- final Ver3DictUpdater dictUpdater, final PtNodeInfo info,
+ final Ver2DictUpdater dictUpdater, final PtNodeInfo info,
final int nodeArrayOriginAddress, final int oldNodeAddress,
final FormatOptions formatOptions) throws IOException {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -151,7 +151,7 @@ public final class DynamicBinaryDictIOUtils {
}
@SuppressWarnings("unused")
- private static void updateForwardLink(final Ver3DictUpdater dictUpdater,
+ private static void updateForwardLink(final Ver2DictUpdater dictUpdater,
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -199,7 +199,7 @@ public final class DynamicBinaryDictIOUtils {
final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
- final Ver3DictUpdater dictUpdater, final int oldPtNodeArrayOrigin,
+ final Ver2DictUpdater dictUpdater, final int oldPtNodeArrayOrigin,
final int oldPtNodeOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0;
final int newPtNodeOrigin = fileEndAddress + 1;
@@ -252,7 +252,7 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting
- public static void insertWord(final Ver3DictUpdater dictUpdater,
+ public static void insertWord(final Ver2DictUpdater dictUpdater,
final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 846aacf11..f23fe4656 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -40,12 +40,8 @@ public final class FormatSpec {
* p | not used 3 bits
* t | each unigram and bigram entry has a time stamp?
* i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG
- * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG
- * n | FRENCH_LIGATURE_PROCESSING_FLAG
- * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE
- * l | GERMAN_UMLAUT_PROCESSING_FLAG
- * a |
- * gs
+ * o |
+ * nflags
*
* h |
* e | size of the file header, 4bytes
@@ -82,45 +78,36 @@ public final class FormatSpec {
* s
*
* f |
- * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
- * r | forward link address, 3byte
- * w | 1 byte = bbbbbbbb match
- * a | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
- * r | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
- * d |
- * linkaddress
+ * o | forward link address, 3byte
+ * r | 1 byte = bbbbbbbb match
+ * w | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
+ * a | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
+ * r |
+ * dlinkaddress
*/
/* Node (FusionDictionary.PtNode) layout is as follows:
- * | IF !SUPPORTS_DYNAMIC_UPDATE
- * | addressType xx : mask with MASK_CHILDREN_ADDRESS_TYPE
- * | 2 bits, 00 = no children : FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS
- * f | 01 = 1 byte : FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE
- * l | 10 = 2 bytes : FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES
- * a | 11 = 3 bytes : FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
- * g | ELSE
- * s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
- * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
- * | 01 = yes : FLAG_IS_MOVED
- * | the new address is stored in the same place as the parent address
- * | is deleted? 10 = yes : FLAG_IS_DELETED
- * | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
- * | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
- * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
+ * | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED
+ * | This must be the same as FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES
+ * | 01 = yes : FLAG_IS_MOVED
+ * f | the new address is stored in the same place as the parent address
+ * l | is deleted? 10 = yes : FLAG_IS_DELETED
+ * a | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS
+ * g | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL
+ * s | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS
* | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS
* | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD
* | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED
*
* p |
- * a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
- * r | parent address, 3byte
- * e | 1 byte = bbbbbbbb match
- * n | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
- * t | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
- * a | This address is relative to the head of the PtNode.
- * d | If the node doesn't have a parent, this field is set to 0.
+ * a | parent address, 3byte
+ * r | 1 byte = bbbbbbbb match
+ * e | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
+ * n | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
+ * t | This address is relative to the head of the PtNode.
+ * a | If the node doesn't have a parent, this field is set to 0.
* d |
- * ress
+ * dress
*
* c | IF FLAG_HAS_MULTIPLE_CHARS
* h | char, char, char, char n * (1 or 3 bytes) : use PtNodeInfo for i/o helpers
@@ -134,23 +121,16 @@ public final class FormatSpec {
* e | frequency 1 byte
* q |
*
- * c | IF SUPPORTS_DYNAMIC_UPDATE
- * h | children address, 3 bytes
- * i | 1 byte = bbbbbbbb match
- * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
- * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
- * r | if this node doesn't have children, this field is set to 0.
- * e | (see BinaryDictEncoderUtils#writeVariableSignedAddress)
- * n | ELSIF 00 = FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS == addressType
- * a | // nothing
- * d | ELSIF 01 = FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE == addressType
- * d | children address, 1 byte
- * r | ELSIF 10 = FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES == addressType
- * e | children address, 2 bytes
- * s | ELSE // 11 = FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES = addressType
- * s | children address, 3 bytes
- * | END
- * | This address is relative to the position of this field.
+ * c |
+ * h | children address, 3 bytes
+ * i | 1 byte = bbbbbbbb match
+ * l | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
+ * d | otherwise => (bbbbbbbb<<16) + (next byte << 8) + next byte
+ * r | if this node doesn't have children, this field is set to 0.
+ * e | (see BinaryDictEncoderUtils#writeVariableSignedAddress)
+ * n | This address is relative to the position of this field.
+ * a |
+ * ddress
*
* | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
* | shortcut string list
@@ -214,11 +194,7 @@ public final class FormatSpec {
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
// These options need to be the same numeric values as the one in the native reading code.
- static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
// TODO: Make the native reading code read this variable.
- static final int SUPPORTS_DYNAMIC_UPDATE = 0x2;
- static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
- static final int CONTAINS_BIGRAMS_FLAG = 0x8;
static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
// TODO: Make this value adaptative to content data, store it in the header, and
@@ -339,30 +315,23 @@ public final class FormatSpec {
*/
public static final class FormatOptions {
public final int mVersion;
- public final boolean mSupportsDynamicUpdate;
public final boolean mHasTerminalId;
public final boolean mHasTimestamp;
- @UsedForTesting
- public FormatOptions(final int version) {
- this(version, false);
- }
@UsedForTesting
- public FormatOptions(final int version, final boolean supportsDynamicUpdate) {
- this(version, supportsDynamicUpdate, false /* hasTimestamp */);
+ public FormatOptions(final int version) {
+ this(version, false /* hasTimestamp */);
}
- public FormatOptions(final int version, final boolean supportsDynamicUpdate,
- final boolean hasTimestamp) {
+ public FormatOptions(final int version, final boolean hasTimestamp) {
mVersion = version;
- if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) {
- throw new RuntimeException("Dynamic updates are only supported with versions "
- + FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior.");
- }
- mSupportsDynamicUpdate = supportsDynamicUpdate;
mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
mHasTimestamp = hasTimestamp;
}
+
+ public boolean supportsDynamicUpdate() {
+ return mVersion >= FIRST_VERSION_WITH_DYNAMIC_UPDATE;
+ }
}
/**
@@ -374,7 +343,6 @@ public final class FormatSpec {
public final FormatOptions mFormatOptions;
// Note that these are corresponding definitions in native code in latinime::HeaderPolicy
// and latinime::HeaderReadWriteUtils.
- public static final String SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE = "SUPPORTS_DYNAMIC_UPDATE";
public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE";
public static final String HAS_HISTORICAL_INFO_ATTRIBUTE = "HAS_HISTORICAL_INFO";
public static final String ATTRIBUTE_VALUE_TRUE = "1";
@@ -433,7 +401,7 @@ public final class FormatSpec {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile, bufferType);
} else if (dictFile.isFile()) {
- return new Ver3DictDecoder(dictFile, bufferType);
+ return new Ver2DictDecoder(dictFile, bufferType);
}
return null;
}
@@ -443,7 +411,7 @@ public final class FormatSpec {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile, factory);
} else if (dictFile.isFile()) {
- return new Ver3DictDecoder(dictFile, factory);
+ return new Ver2DictDecoder(dictFile, factory);
}
return null;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 3bb218bea..fdf2ae7b5 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -303,14 +303,9 @@ public final class FusionDictionary implements Iterable<Word> {
* Options global to the dictionary.
*/
public static final class DictionaryOptions {
- public final boolean mGermanUmlautProcessing;
- public final boolean mFrenchLigatureProcessing;
public final HashMap<String, String> mAttributes;
- public DictionaryOptions(final HashMap<String, String> attributes,
- final boolean germanUmlautProcessing, final boolean frenchLigatureProcessing) {
+ public DictionaryOptions(final HashMap<String, String> attributes) {
mAttributes = attributes;
- mGermanUmlautProcessing = germanUmlautProcessing;
- mFrenchLigatureProcessing = frenchLigatureProcessing;
}
@Override
public String toString() { // Convenience method
@@ -339,14 +334,6 @@ public final class FusionDictionary implements Iterable<Word> {
}
s.append("\n");
}
- if (mGermanUmlautProcessing) {
- s.append(indent);
- s.append("Needs German umlaut processing\n");
- }
- if (mFrenchLigatureProcessing) {
- s.append(indent);
- s.append("Needs French ligature processing\n");
- }
return s.toString();
}
}
@@ -701,138 +688,6 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
- * Recursively count the number of nodes in a given branch of the trie.
- *
- * @param nodeArray the node array to count.
- * @return the number of nodes in this branch.
- */
- public static int countNodeArrays(final PtNodeArray nodeArray) {
- int size = 1;
- for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
- PtNode ptNode = nodeArray.mData.get(i);
- if (null != ptNode.mChildren)
- size += countNodeArrays(ptNode.mChildren);
- }
- return size;
- }
-
- // Recursively find out whether there are any bigrams.
- // This can be pretty expensive especially if there aren't any (we return as soon
- // as we find one, so it's much cheaper if there are bigrams)
- private static boolean hasBigramsInternal(final PtNodeArray nodeArray) {
- if (null == nodeArray) return false;
- for (int i = nodeArray.mData.size() - 1; i >= 0; --i) {
- PtNode ptNode = nodeArray.mData.get(i);
- if (null != ptNode.mBigrams) return true;
- if (hasBigramsInternal(ptNode.mChildren)) return true;
- }
- return false;
- }
-
- /**
- * Finds out whether there are any bigrams in this dictionary.
- *
- * @return true if there is any bigram, false otherwise.
- */
- // TODO: this is expensive especially for large dictionaries without any bigram.
- // The up side is, this is always accurate and correct and uses no memory. We should
- // find a more efficient way of doing this, without compromising too much on memory
- // and ease of use.
- public boolean hasBigrams() {
- return hasBigramsInternal(mRootNodeArray);
- }
-
- // Historically, the tails of the words were going to be merged to save space.
- // However, that would prevent the code to search for a specific address in log(n)
- // time so this was abandoned.
- // The code is still of interest as it does add some compression to any dictionary
- // that has no need for attributes. Implementations that does not read attributes should be
- // able to read a dictionary with merged tails.
- // Also, the following code does support frequencies, as in, it will only merges
- // tails that share the same frequency. Though it would result in the above loss of
- // performance while searching by address, it is still technically possible to merge
- // tails that contain attributes, but this code does not take that into account - it does
- // not compare attributes and will merge terminals with different attributes regardless.
- public void mergeTails() {
- MakedictLog.i("Do not merge tails");
- return;
-
-// MakedictLog.i("Merging PtNodes. Number of PtNodes : " + countPtNodes(root));
-// MakedictLog.i("Number of PtNodes : " + countPtNodes(root));
-//
-// final HashMap<String, ArrayList<PtNodeArray>> repository =
-// new HashMap<String, ArrayList<PtNodeArray>>();
-// mergeTailsInner(repository, root);
-//
-// MakedictLog.i("Number of different pseudohashes : " + repository.size());
-// int size = 0;
-// for (ArrayList<PtNodeArray> a : repository.values()) {
-// size += a.size();
-// }
-// MakedictLog.i("Number of nodes after merge : " + (1 + size));
-// MakedictLog.i("Recursively seen nodes : " + countNodes(root));
- }
-
- // The following methods are used by the deactivated mergeTails()
-// private static boolean isEqual(PtNodeArray a, PtNodeArray b) {
-// if (null == a && null == b) return true;
-// if (null == a || null == b) return false;
-// if (a.data.size() != b.data.size()) return false;
-// final int size = a.data.size();
-// for (int i = size - 1; i >= 0; --i) {
-// PtNode aPtNode = a.data.get(i);
-// PtNode bPtNode = b.data.get(i);
-// if (aPtNode.frequency != bPtNode.frequency) return false;
-// if (aPtNode.alternates == null && bPtNode.alternates != null) return false;
-// if (aPtNode.alternates != null && !aPtNode.equals(bPtNode.alternates)) return false;
-// if (!Arrays.equals(aPtNode.chars, bPtNode.chars)) return false;
-// if (!isEqual(aPtNode.children, bPtNode.children)) return false;
-// }
-// return true;
-// }
-
-// static private HashMap<String, ArrayList<PtNodeArray>> mergeTailsInner(
-// final HashMap<String, ArrayList<PtNodeArray>> map, final PtNodeArray nodeArray) {
-// final ArrayList<PtNode> branches = nodeArray.data;
-// final int nodeSize = branches.size();
-// for (int i = 0; i < nodeSize; ++i) {
-// PtNode ptNode = branches.get(i);
-// if (null != ptNode.children) {
-// String pseudoHash = getPseudoHash(ptNode.children);
-// ArrayList<PtNodeArray> similarList = map.get(pseudoHash);
-// if (null == similarList) {
-// similarList = new ArrayList<PtNodeArray>();
-// map.put(pseudoHash, similarList);
-// }
-// boolean merged = false;
-// for (PtNodeArray similar : similarList) {
-// if (isEqual(ptNode.children, similar)) {
-// ptNode.children = similar;
-// merged = true;
-// break;
-// }
-// }
-// if (!merged) {
-// similarList.add(ptNode.children);
-// }
-// mergeTailsInner(map, ptNode.children);
-// }
-// }
-// return map;
-// }
-
-// private static String getPseudoHash(final PtNodeArray nodeArray) {
-// StringBuilder s = new StringBuilder();
-// for (PtNode ptNode : nodeArray.data) {
-// s.append(ptNode.frequency);
-// for (int ch : ptNode.chars) {
-// s.append(Character.toChars(ch));
-// }
-// }
-// return s.toString();
-// }
-
- /**
* Iterator to walk through a dictionary.
*
* This is purely for convenience.
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index acab4f8a5..e9667ab0b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -34,18 +34,11 @@ import java.util.ArrayList;
import java.util.Arrays;
/**
- * An implementation of DictDecoder for version 3 binary dictionary.
+ * An implementation of DictDecoder for version 2 binary dictionary.
*/
@UsedForTesting
-public class Ver3DictDecoder extends AbstractDictDecoder {
- private static final String TAG = Ver3DictDecoder.class.getSimpleName();
-
- static {
- JniUtils.loadNativeLibrary();
- }
-
- // TODO: implement something sensical instead of just a phony method
- private static native int doNothing();
+public class Ver2DictDecoder extends AbstractDictDecoder {
+ private static final String TAG = Ver2DictDecoder.class.getSimpleName();
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
private static int readFrequency(final DictBuffer dictBuffer) {
@@ -57,7 +50,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
private final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer;
- /* package */ Ver3DictDecoder(final File file, final int factoryFlag) {
+ /* package */ Ver2DictDecoder(final File file, final int factoryFlag) {
mDictionaryBinaryFile = file;
mDictBuffer = null;
@@ -72,7 +65,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
}
}
- /* package */ Ver3DictDecoder(final File file, final DictionaryBufferFactory factory) {
+ /* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) {
mDictionaryBinaryFile = file;
mBufferFactory = factory;
}
@@ -166,7 +159,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
+ addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
addressPointer);
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index 5da34534e..665544228 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
@@ -31,16 +31,16 @@ import java.util.ArrayList;
import java.util.Iterator;
/**
- * An implementation of DictEncoder for version 3 binary dictionary.
+ * An implementation of DictEncoder for version 2 binary dictionary.
*/
-public class Ver3DictEncoder implements DictEncoder {
+public class Ver2DictEncoder implements DictEncoder {
private final File mDictFile;
private OutputStream mOutStream;
private byte[] mBuffer;
private int mPosition;
- public Ver3DictEncoder(final File dictFile) {
+ public Ver2DictEncoder(final File dictFile) {
mDictFile = dictFile;
mOutStream = null;
mBuffer = null;
@@ -49,7 +49,7 @@ public class Ver3DictEncoder implements DictEncoder {
// This constructor is used only by BinaryDictOffdeviceUtilsTests.
// If you want to use this in the production code, you should consider keeping consistency of
// the interface of Ver3DictDecoder by using factory.
- public Ver3DictEncoder(final OutputStream outStream) {
+ public Ver2DictEncoder(final OutputStream outStream) {
mDictFile = null;
mOutStream = outStream;
}
@@ -169,7 +169,7 @@ public class Ver3DictEncoder implements DictEncoder {
private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
- if (formatOptions.mSupportsDynamicUpdate) {
+ if (formatOptions.supportsDynamicUpdate()) {
mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition,
childrenPos);
} else {
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java
index 07adda625..6419340ff 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java
@@ -27,14 +27,14 @@ import java.io.OutputStream;
import java.util.ArrayList;
/**
- * An implementation of DictUpdater for version 3 binary dictionary.
+ * An implementation of DictUpdater for version 2 binary dictionary.
*/
@UsedForTesting
-public class Ver3DictUpdater extends Ver3DictDecoder implements DictUpdater {
+public class Ver2DictUpdater extends Ver2DictDecoder implements DictUpdater {
private OutputStream mOutStream;
@UsedForTesting
- public Ver3DictUpdater(final File dictFile, final int factoryType) {
+ public Ver2DictUpdater(final File dictFile, final int factoryType) {
// DictUpdater must have an updatable DictBuffer.
super(dictFile, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 07522b54b..3be62f066 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -183,14 +183,11 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
* An auxiliary class for reading bigrams.
*/
protected static class BigramContentReader extends SparseTableContentReader {
- private final boolean mHasTimestamp;
-
public BigramContentReader(final String name, final File baseDir,
final DictionaryBufferFactory factory, final boolean hasTimestamp) {
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
- mHasTimestamp = hasTimestamp;
}
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 1a245b6db..a746f9945 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -1,5 +1,4 @@
/*
-/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,25 +17,15 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.utils.CollectionUtils;
-import com.android.inputmethod.latin.utils.FileUtils;
+import com.android.inputmethod.latin.utils.LocaleUtils;
import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
/**
* An implementation of DictEncoder for version 4 binary dictionary.
@@ -44,197 +33,19 @@ import java.util.Iterator;
@UsedForTesting
public class Ver4DictEncoder implements DictEncoder {
private final File mDictPlacedDir;
- private byte[] mTrieBuf;
- private int mTriePos;
- private OutputStream mTrieOutStream;
- private OutputStream mHeaderOutStream;
- private OutputStream mFreqOutStream;
- private OutputStream mUnigramTimestampOutStream;
- private OutputStream mTerminalAddressTableOutStream;
- private File mDictDir;
- private String mBaseFilename;
- private BigramContentWriter mBigramWriter;
- private ShortcutContentWriter mShortcutWriter;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
mDictPlacedDir = dictPlacedDir;
}
- private static class BigramContentWriter extends SparseTableContentWriter {
- private final boolean mWriteTimestamp;
-
- public BigramContentWriter(final String name, final int initialCapacity,
- final File baseDir, final boolean writeTimestamp) {
- super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity,
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp));
- mWriteTimestamp = writeTimestamp;
- }
-
- private static String[] getContentFilenames(final String name,
- final boolean writeTimestamp) {
- final String[] contentFilenames;
- if (writeTimestamp) {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
- name + FormatSpec.BIGRAM_FILE_EXTENSION };
- } else {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
- }
- return contentFilenames;
- }
-
- private static String[] getContentIds(final boolean writeTimestamp) {
- final String[] contentIds;
- if (writeTimestamp) {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
- FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
- } else {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
- }
- return contentIds;
- }
-
- public void writeBigramsForOneWord(final int terminalId, final int bigramCount,
- final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
- throws IOException {
- write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- writeBigramsForOneWordInternal(outStream, bigramIterator, dict);
- }});
- if (mWriteTimestamp) {
- write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream,
- bigramCount);
- }});
- }
- }
-
- private void writeBigramsForOneWordInternal(final OutputStream outStream,
- final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
- throws IOException {
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int unigramFrequencyForThisWord = target.mFrequency;
- final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
- bigramIterator.hasNext(), 0, bigram.mFrequency,
- unigramFrequencyForThisWord, bigram.mWord);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, bigramFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, target.mTerminalId,
- FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
- }
- }
-
- private void initBigramTimestampsCountersAndLevelsForOneWordInternal(
- final OutputStream outStream, final int bigramCount) throws IOException {
- for (int i = 0; i < bigramCount; ++i) {
- // TODO: Figure out what initial values should be.
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_TIMESTAMP_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_COUNTER_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_LEVEL_SIZE);
- }
- }
- }
-
- private static class ShortcutContentWriter extends SparseTableContentWriter {
- public ShortcutContentWriter(final String name, final int initialCapacity,
- final File baseDir) {
- super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity,
- FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
- new String[] { FormatSpec.SHORTCUT_CONTENT_ID });
- }
-
- public void writeShortcutForOneWord(final int terminalId,
- final Iterator<WeightedString> shortcutIterator) throws IOException {
- write(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- writeShortcutForOneWordInternal(outStream, shortcutIterator);
- }
- });
- }
-
- private void writeShortcutForOneWordInternal(final OutputStream outStream,
- final Iterator<WeightedString> shortcutIterator) throws IOException {
- while (shortcutIterator.hasNext()) {
- final WeightedString target = shortcutIterator.next();
- final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
- shortcutIterator.hasNext(), target.mFrequency);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, shortcutFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- CharEncoding.writeString(outStream, target.mWord);
- }
- }
- }
-
- private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
- throws FileNotFoundException, IOException {
- final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
- mBaseFilename = header.getId() + "." + header.getVersion();
- mDictDir = new File(mDictPlacedDir, mBaseFilename);
- final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
- final File headerFile = new File(mDictDir,
- mBaseFilename + FormatSpec.HEADER_FILE_EXTENSION);
- final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
- final File timestampFile = new File(mDictDir,
- mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
- final File terminalAddressTableFile = new File(mDictDir,
- mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- if (!mDictDir.isDirectory()) {
- if (mDictDir.exists()) {
- FileUtils.deleteRecursively(mDictDir);
- }
- mDictDir.mkdirs();
- }
- mTrieOutStream = new FileOutputStream(trieFile);
- mHeaderOutStream = new FileOutputStream(headerFile);
- mFreqOutStream = new FileOutputStream(freqFile);
- mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
- if (formatOptions.mHasTimestamp) {
- mUnigramTimestampOutStream = new FileOutputStream(timestampFile);
- }
- }
-
- private void close() throws IOException {
- try {
- if (mTrieOutStream != null) {
- mTrieOutStream.close();
- }
- if (mHeaderOutStream != null) {
- mHeaderOutStream.close();
- }
- if (mFreqOutStream != null) {
- mFreqOutStream.close();
- }
- if (mTerminalAddressTableOutStream != null) {
- mTerminalAddressTableOutStream.close();
- }
- if (mUnigramTimestampOutStream != null) {
- mUnigramTimestampOutStream.close();
- }
- } finally {
- mTrieOutStream = null;
- mHeaderOutStream = null;
- mFreqOutStream = null;
- mTerminalAddressTableOutStream = null;
- }
- }
-
+ // TODO: This builds a FusionDictionary first and iterates it to add words to the binary
+ // dictionary. However, it is possible to just add words directly to the binary dictionary
+ // instead.
+ // In the long run, when we stop supporting version 2, FusionDictionary will become deprecated
+ // and we can remove it. Then we'll be able to just call BinaryDictionary directly.
@Override
- public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
+ public void writeDictionary(FusionDictionary dict, FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
if (formatOptions.mVersion != FormatSpec.VERSION4) {
throw new UnsupportedFormatException("File header has a wrong version number : "
@@ -243,208 +54,70 @@ public class Ver4DictEncoder implements DictEncoder {
if (!mDictPlacedDir.isDirectory()) {
throw new UnsupportedFormatException("Given path is not a directory.");
}
-
- if (mTrieOutStream == null) {
- openStreams(formatOptions, dict.mOptions);
- }
-
- BinaryDictEncoderUtils.writeDictionaryHeader(mHeaderOutStream, dict, formatOptions);
-
- MakedictLog.i("Flattening the tree...");
- ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
- int terminalCount = 0;
- final ArrayList<PtNode> nodes = CollectionUtils.newArrayList();
- for (final PtNodeArray array : flatNodes) {
- for (final PtNode node : array.mData) {
- if (node.isTerminal()) {
- nodes.add(node);
- node.mTerminalId = terminalCount++;
+ if (!BinaryDictionary.createEmptyDictFile(mDictPlacedDir.getAbsolutePath(),
+ FormatSpec.VERSION4, dict.mOptions.mAttributes)) {
+ throw new IOException("Cannot create dictionary file");
+ }
+ final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(),
+ 0l, mDictPlacedDir.length(), true /* useFullEditDistance */,
+ LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get(
+ FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE)),
+ Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */,
+ true /* isUpdatable */);
+ if (!binaryDict.isValidDictionary()) {
+ // Somehow createEmptyDictFile returned true, but the file was not created correctly
+ throw new IOException("Cannot create dictionary file");
+ }
+ for (final Word word : dict) {
+ // TODO: switch to addMultipleDictionaryEntries when they support shortcuts
+ if (null == word.mShortcutTargets || word.mShortcutTargets.isEmpty()) {
+ binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ null /* shortcutTarget */, 0 /* shortcutProbability */,
+ word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
+ } else {
+ for (final WeightedString shortcutTarget : word.mShortcutTargets) {
+ binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ shortcutTarget.mWord, shortcutTarget.mFrequency,
+ word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
}
}
- }
- Collections.sort(nodes, new Comparator<PtNode>() {
- @Override
- public int compare(final PtNode lhs, final PtNode rhs) {
- if (lhs.mFrequency != rhs.mFrequency) {
- return lhs.mFrequency < rhs.mFrequency ? -1 : 1;
- }
- if (lhs.mTerminalId < rhs.mTerminalId) return -1;
- if (lhs.mTerminalId > rhs.mTerminalId) return 1;
- return 0;
+ if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDict.flushWithGC();
}
- });
- int count = 0;
- for (final PtNode node : nodes) {
- node.mTerminalId = count++;
- }
-
- MakedictLog.i("Computing addresses...");
- BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
- if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
-
- writeTerminalData(flatNodes, terminalCount);
- if (formatOptions.mHasTimestamp) {
- initUnigramTimestamps(terminalCount);
- }
- mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir,
- formatOptions.mHasTimestamp);
- writeBigrams(flatNodes, dict);
- mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir);
- writeShortcuts(flatNodes);
-
- final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
- final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
- mTrieBuf = new byte[bufferSize];
-
- MakedictLog.i("Writing file...");
- for (PtNodeArray nodeArray : flatNodes) {
- BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
}
- if (MakedictLog.DBG) {
- BinaryDictEncoderUtils.showStatistics(flatNodes);
- MakedictLog.i("has " + terminalCount + " terminals.");
+ for (final Word word0 : dict) {
+ if (null == word0.mBigrams) continue;
+ for (final WeightedString word1 : word0.mBigrams) {
+ binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency,
+ 0 /* timestamp */);
+ }
+ if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDict.flushWithGC();
+ }
}
- mTrieOutStream.write(mTrieBuf);
-
- MakedictLog.i("Done");
- close();
+ binaryDict.flushWithGC();
+ binaryDict.close();
}
@Override
public void setPosition(int position) {
- if (mTrieBuf == null || position < 0 || position > mTrieBuf.length) return;
- mTriePos = position;
}
@Override
public int getPosition() {
- return mTriePos;
+ return 0;
}
@Override
public void writePtNodeCount(int ptNodeCount) {
- final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
- // ptNodeCount must fit on one byte or two bytes.
- // Please see comments in FormatSpec
- if (countSize != 1 && countSize != 2) {
- throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize);
- }
- final int encodedPtNodeCount = (countSize == 2) ?
- (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, encodedPtNodeCount,
- countSize);
- }
-
- private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
- BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions),
- FormatSpec.PTNODE_FLAGS_SIZE);
- }
-
- private void writeParentPosition(int parentPos, final PtNode ptNode,
- final FormatOptions formatOptions) {
- if (parentPos != FormatSpec.NO_PARENT_ADDRESS) {
- parentPos -= ptNode.mCachedAddressAfterUpdate;
- }
- mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos,
- formatOptions);
- }
-
- private void writeCharacters(final int[] characters, final boolean hasSeveralChars) {
- mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos);
- if (hasSeveralChars) {
- mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
- }
- }
-
- private void writeTerminalId(final int terminalId) {
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId,
- FormatSpec.PTNODE_TERMINAL_ID_SIZE);
- }
-
- private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
- if (formatOptions.mSupportsDynamicUpdate) {
- mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
- mTriePos, childrenPos);
- } else {
- mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
- mTriePos, childrenPos);
- }
- }
-
- private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
- throws IOException {
- mBigramWriter.openStreams();
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.mBigrams != null) {
- mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(),
- ptNode.mBigrams.iterator(), dict);
- }
- }
- }
- mBigramWriter.closeStreams();
- }
-
- private void writeShortcuts(final ArrayList<PtNodeArray> flatNodes) throws IOException {
- mShortcutWriter.openStreams();
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.mShortcutTargets != null && !ptNode.mShortcutTargets.isEmpty()) {
- mShortcutWriter.writeShortcutForOneWord(ptNode.mTerminalId,
- ptNode.mShortcutTargets.iterator());
- }
- }
- }
- mShortcutWriter.closeStreams();
}
@Override
public void writeForwardLinkAddress(int forwardLinkAddress) {
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
- forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
}
@Override
- public void writePtNode(final PtNode ptNode, final int parentPosition,
- final FormatOptions formatOptions, final FusionDictionary dict) {
- writePtNodeFlags(ptNode, formatOptions);
- writeParentPosition(parentPosition, ptNode, formatOptions);
- writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
- if (ptNode.isTerminal()) {
- writeTerminalId(ptNode.mTerminalId);
- }
- writeChildrenPosition(ptNode, formatOptions);
- }
-
- private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,
- final int terminalCount) throws IOException {
- final byte[] freqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE];
- final byte[] terminalAddressTableBuf =
- new byte[terminalCount * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE];
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.isTerminal()) {
- BinaryDictEncoderUtils.writeUIntToBuffer(freqBuf,
- ptNode.mTerminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE,
- ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf,
- ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
- ptNode.mCachedAddressAfterUpdate,
- FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- }
- }
- }
- mFreqOutStream.write(freqBuf);
- mTerminalAddressTableOutStream.write(terminalAddressTableBuf);
- }
-
- private void initUnigramTimestamps(final int terminalCount) throws IOException {
- // Initial value of time stamps for each word is 0.
- final byte[] unigramTimestampBuf =
- new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE];
- mUnigramTimestampOutStream.write(unigramTimestampBuf);
+ public void writePtNode(
+ PtNode ptNode, int parentPosition, FormatOptions formatOptions, FusionDictionary dict) {
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
index 91d9cf345..c46bc36bb 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
@@ -54,8 +54,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
}
private static class BigramContentUpdater extends SparseTableContentUpdater {
- private final boolean mHasTimestamp;
-
public BigramContentUpdater(final String name, final File baseDir,
final boolean hasTimestamp) {
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
@@ -63,7 +61,6 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
BigramContentReader.getContentFilenames(name, hasTimestamp),
BigramContentReader.getContentIds(hasTimestamp),
new DictionaryBufferFromWritableByteBufferFactory());
- mHasTimestamp = hasTimestamp;
}
public void insertBigramEntries(final int terminalId, final int frequency,
diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
index 296733fad..9b573b4b8 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
@@ -95,8 +95,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
@Override
protected Map<String, String> getHeaderAttributeMap() {
HashMap<String, String> attributeMap = new HashMap<String, String>();
- attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
- FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
attributeMap.put(FormatSpec.FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE,
diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
index a2c3ed44d..db628fe18 100644
--- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java
@@ -95,8 +95,7 @@ public final class UserHistoryDictIOUtils {
static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
- new FusionDictionary.DictionaryOptions(options, false,
- false));
+ new FusionDictionary.DictionaryOptions(options));
int profTotal = 0;
for (final String word1 : bigrams.keySet()) {
final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);