aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java19
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java14
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java269
5 files changed, 305 insertions, 7 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 21e9811ef..f333b0d86 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -126,8 +126,14 @@ public class BinaryDictEncoderUtils {
*/
private static int getPtNodeMaximumSize(final PtNode ptNode, final FormatOptions options) {
int size = getNodeHeaderSize(ptNode, options);
- // If terminal, one byte for the frequency
- if (ptNode.isTerminal()) size += FormatSpec.PTNODE_FREQUENCY_SIZE;
+ if (ptNode.isTerminal()) {
+ // If terminal, one byte for the frequency or four bytes for the terminal id.
+ if (options.mHasTerminalId) {
+ size += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
+ } else {
+ size += FormatSpec.PTNODE_FREQUENCY_SIZE;
+ }
+ }
size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
size += getShortcutListSize(ptNode.mShortcutTargets);
if (null != ptNode.mBigrams) {
@@ -345,7 +351,13 @@ public class BinaryDictEncoderUtils {
changed = true;
}
int nodeSize = getNodeHeaderSize(ptNode, formatOptions);
- if (ptNode.isTerminal()) nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
+ if (ptNode.isTerminal()) {
+ if (formatOptions.mHasTerminalId) {
+ nodeSize += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
+ } else {
+ nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
+ }
+ }
if (formatOptions.mSupportsDynamicUpdate) {
nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
} else if (null != ptNode.mChildren) {
@@ -787,7 +799,6 @@ public class BinaryDictEncoderUtils {
+ FormatSpec.MAX_TERMINAL_FREQUENCY
+ " : " + ptNode.mFrequency);
}
-
dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict);
}
if (formatOptions.mSupportsDynamicUpdate) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 44ae33de1..96ccd8e49 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -198,9 +198,12 @@ public final class FormatSpec {
public static final int MAGIC_NUMBER = 0x9BC13AFE;
static final int MINIMUM_SUPPORTED_VERSION = 2;
- static final int MAXIMUM_SUPPORTED_VERSION = 3;
+ static final int MAXIMUM_SUPPORTED_VERSION = 4;
static final int NOT_A_VERSION_NUMBER = -1;
static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3;
+ static final int FIRST_VERSION_WITH_TERMINAL_ID = 4;
+ static final int VERSION3 = 3;
+ static final int VERSION4 = 4;
// These options need to be the same numeric values as the one in the native reading code.
static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
@@ -251,11 +254,17 @@ public final class FormatSpec {
static final int PTNODE_TERMINATOR_SIZE = 1;
static final int PTNODE_FLAGS_SIZE = 1;
static final int PTNODE_FREQUENCY_SIZE = 1;
+ static final int PTNODE_TERMINAL_ID_SIZE = 4;
static final int PTNODE_MAX_ADDRESS_SIZE = 3;
static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1;
static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2;
+ // These values are used only by version 4 or later.
+ static final String TRIE_FILE_EXTENSION = ".trie";
+ static final String FREQ_FILE_EXTENSION = ".freq";
+ static final int FREQUENCY_AND_FLAGS_SIZE = 2;
+
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
static final int NO_FORWARD_LINK_ADDRESS = 0;
@@ -264,6 +273,7 @@ public final class FormatSpec {
static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127
static final int MAX_PTNODES_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767
static final int MAX_BIGRAMS_IN_A_PTNODE = 10000;
+ static final int MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE = 0xFFFF;
static final int MAX_TERMINAL_FREQUENCY = 255;
static final int MAX_BIGRAM_FREQUENCY = 15;
@@ -287,6 +297,7 @@ public final class FormatSpec {
public static final class FormatOptions {
public final int mVersion;
public final boolean mSupportsDynamicUpdate;
+ public final boolean mHasTerminalId;
@UsedForTesting
public FormatOptions(final int version) {
this(version, false);
@@ -300,6 +311,7 @@ public final class FormatSpec {
+ FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior.");
}
mSupportsDynamicUpdate = supportsDynamicUpdate;
+ mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 3e685a3d7..be653feec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -111,6 +111,7 @@ public final class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mShortcutTargets;
ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
+ int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal.
PtNodeArray mChildren;
boolean mIsNotAWord; // Only a shortcut
boolean mIsBlacklistEntry;
@@ -129,6 +130,7 @@ public final class FusionDictionary implements Iterable<Word> {
final boolean isNotAWord, final boolean isBlacklistEntry) {
mChars = chars;
mFrequency = frequency;
+ mTerminalId = frequency;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mChildren = null;
@@ -156,6 +158,10 @@ public final class FusionDictionary implements Iterable<Word> {
mChildren.mData.add(n);
}
+ public int getTerminalId() {
+ return mTerminalId;
+ }
+
public boolean isTerminal() {
return NOT_A_TERMINAL != mFrequency;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java
index 48a823d43..222a0f474 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java
@@ -68,7 +68,7 @@ public class Ver3DictEncoder implements DictEncoder {
@Override
public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
- if (formatOptions.mVersion > 3) {
+ if (formatOptions.mVersion > FormatSpec.VERSION3) {
throw new UnsupportedFormatException(
"The given format options has wrong version number : "
+ formatOptions.mVersion);
@@ -200,7 +200,7 @@ public class Ver3DictEncoder implements DictEncoder {
mPosition += shortcutShift;
}
final int shortcutByteSize = mPosition - indexOfShortcutByteSize;
- if (shortcutByteSize > 0xFFFF) {
+ if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
throw new RuntimeException("Shortcut list too large");
}
BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize,
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
new file mode 100644
index 000000000..75b75ae2e
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -0,0 +1,269 @@
+/*
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+/**
+ * An implementation of DictEncoder for version 4 binary dictionary.
+ */
+@UsedForTesting
+public class Ver4DictEncoder implements DictEncoder {
+ private final File mDictPlacedDir;
+ private byte[] mTrieBuf;
+ private byte[] mFreqBuf;
+ private int mTriePos;
+ private OutputStream mTrieOutStream;
+ private OutputStream mFreqOutStream;
+
+ @UsedForTesting
+ public Ver4DictEncoder(final File dictPlacedDir) {
+ mDictPlacedDir = dictPlacedDir;
+ }
+
+ private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
+ throws FileNotFoundException, IOException {
+ final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
+ final String filename = header.getId() + "." + header.getVersion();
+ final File mDictDir = new File(mDictPlacedDir, filename);
+ final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION);
+ final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION);
+ if (!mDictDir.isDirectory()) {
+ if (mDictDir.exists()) mDictDir.delete();
+ mDictDir.mkdirs();
+ }
+ if (!trieFile.exists()) trieFile.createNewFile();
+ if (!freqFile.exists()) freqFile.createNewFile();
+ mTrieOutStream = new FileOutputStream(trieFile);
+ mFreqOutStream = new FileOutputStream(freqFile);
+ }
+
+ private void close() throws IOException {
+ try {
+ if (mTrieOutStream != null) {
+ mTrieOutStream.close();
+ }
+ if (mFreqOutStream != null) {
+ mFreqOutStream.close();
+ }
+ } finally {
+ mTrieOutStream = null;
+ mFreqOutStream = null;
+ }
+ }
+
+ @Override
+ public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
+ throws IOException, UnsupportedFormatException {
+ if (formatOptions.mVersion != FormatSpec.VERSION4) {
+ throw new UnsupportedFormatException("File header has a wrong version number : "
+ + formatOptions.mVersion);
+ }
+ if (!mDictPlacedDir.isDirectory()) {
+ throw new UnsupportedFormatException("Given path is not a directory.");
+ }
+
+ if (mTrieOutStream == null) {
+ openStreams(formatOptions, dict.mOptions);
+ }
+
+ BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions);
+
+ MakedictLog.i("Flattening the tree...");
+ ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
+ int terminalCount = 0;
+ for (final PtNodeArray array : flatNodes) {
+ for (final PtNode node : array.mData) {
+ if (node.isTerminal()) node.mTerminalId = terminalCount++;
+ }
+ }
+
+ MakedictLog.i("Computing addresses...");
+ BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
+ if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
+
+ final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
+ final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
+ mTrieBuf = new byte[bufferSize];
+ mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE];
+
+ MakedictLog.i("Writing file...");
+ for (PtNodeArray nodeArray : flatNodes) {
+ BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
+ }
+ if (MakedictLog.DBG) {
+ BinaryDictEncoderUtils.showStatistics(flatNodes);
+ MakedictLog.i("has " + terminalCount + " terminals.");
+ }
+ mTrieOutStream.write(mTrieBuf);
+ mFreqOutStream.write(mFreqBuf);
+
+ MakedictLog.i("Done");
+ close();
+ }
+
+ @Override
+ public void setPosition(int position) {
+ if (mTrieBuf == null || position < 0 || position >- mTrieBuf.length) return;
+ mTriePos = position;
+ }
+
+ @Override
+ public int getPosition() {
+ return mTriePos;
+ }
+
+ @Override
+ public void writePtNodeCount(int ptNodeCount) {
+ final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
+ // ptNodeCount must fit on one byte or two bytes.
+ // Please see comments in FormatSpec
+ if (countSize != 1 && countSize != 2) {
+ throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize);
+ }
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, ptNodeCount,
+ countSize);
+ }
+
+ private void writePtNodeFlags(final PtNode ptNode, final int parentAddress,
+ final FormatOptions formatOptions) {
+ final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
+ BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mTriePos, childrenPos,
+ formatOptions),
+ FormatSpec.PTNODE_FLAGS_SIZE);
+ }
+
+ private void writeParentPosition(int parentPos, final PtNode ptNode,
+ final FormatOptions formatOptions) {
+ if (parentPos != FormatSpec.NO_PARENT_ADDRESS) {
+ parentPos -= ptNode.mCachedAddressAfterUpdate;
+ }
+ mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos,
+ formatOptions);
+ }
+
+ private void writeCharacters(final int[] characters, final boolean hasSeveralChars) {
+ mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos);
+ if (hasSeveralChars) {
+ mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
+ }
+ }
+
+ private void writeTerminalId(final int terminalId) {
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId,
+ FormatSpec.PTNODE_TERMINAL_ID_SIZE);
+ }
+
+ private void writeFrequency(final int frequency, final int terminalId) {
+ final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE;
+ BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency,
+ FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
+ }
+
+ private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
+ final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
+ if (formatOptions.mSupportsDynamicUpdate) {
+ mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
+ mTriePos, childrenPos);
+ } else {
+ mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
+ mTriePos, childrenPos);
+ }
+ }
+
+ private void writeShortcuts(ArrayList<WeightedString> shortcuts) {
+ if (null == shortcuts || shortcuts.isEmpty()) return;
+
+ final int indexOfShortcutByteSize = mTriePos;
+ mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
+ final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = shortcutIterator.next();
+ final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
+ shortcutIterator.hasNext(),
+ target.mFrequency);
+ mTrieBuf[mTriePos++] = (byte)shortcutFlags;
+ final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos,
+ target.mWord);
+ mTriePos += shortcutShift;
+ }
+ final int shortcutByteSize = mTriePos - indexOfShortcutByteSize;
+ if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
+ throw new RuntimeException("Shortcut list too large : " + shortcutByteSize);
+ }
+ BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize,
+ shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
+ }
+
+ private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) {
+ if (bigrams == null) return;
+
+ final Iterator<WeightedString> bigramIterator = bigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = bigramIterator.next();
+ final PtNode target =
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
+ final int addressOfBigram = target.mCachedAddressAfterUpdate;
+ final int unigramFrequencyForThisWord = target.mFrequency;
+ final int offset = addressOfBigram
+ - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
+ offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
+ mTrieBuf[mTriePos++] = (byte) bigramFlags;
+ mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
+ mTriePos, Math.abs(offset));
+ }
+ }
+
+ @Override
+ public void writeForwardLinkAddress(int forwardLinkAddress) {
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
+ forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
+ }
+
+ @Override
+ public void writePtNode(final PtNode ptNode, final int parentPosition,
+ final FormatOptions formatOptions, final FusionDictionary dict) {
+ writePtNodeFlags(ptNode, parentPosition, formatOptions);
+ writeParentPosition(parentPosition, ptNode, formatOptions);
+ writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
+ if (ptNode.isTerminal()) {
+ writeTerminalId(ptNode.mTerminalId);
+ writeFrequency(ptNode.mFrequency, ptNode.mTerminalId);
+ }
+ writeChildrenPosition(ptNode, formatOptions);
+ writeShortcuts(ptNode.mShortcutTargets);
+ writeBigrams(ptNode.mBigrams, dict);
+ }
+}