aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java8
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java2
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java13
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java48
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java73
5 files changed, 107 insertions, 37 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 2c3d1346f..216492b4d 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -23,11 +23,11 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
@@ -219,14 +219,14 @@ public final class BinaryDictDecoderUtils {
}
/**
- * Writes a string with our character format to a ByteArrayOutputStream.
+ * Writes a string with our character format to an OutputStream.
*
* This will also write the terminator byte.
*
- * @param buffer the ByteArrayOutputStream to write to.
+ * @param buffer the OutputStream to write to.
* @param word the string to write.
*/
- static void writeString(final ByteArrayOutputStream buffer, final String word) {
+ static void writeString(final OutputStream buffer, final String word) throws IOException {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index b6024243f..f761829de 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -383,8 +383,8 @@ public class BinaryDictEncoderUtils {
nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
nodeSize + size, ptNode.mChildren));
}
- nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) {
+ nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
if (null != ptNode.mBigrams) {
for (WeightedString bigram : ptNode.mBigrams) {
final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index a5516bd41..5a5d7af6b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -266,15 +266,28 @@ public final class FormatSpec {
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
static final String BIGRAM_FILE_EXTENSION = ".bigram";
+ static final String SHORTCUT_FILE_EXTENSION = ".shortcut";
static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup";
static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+
+ // With the English main dictionary as of October 2013, the size of bigram address table is
+ // is 584KB with the block size being 4.
+ // This is 91% of that of full address table.
static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
static final int BIGRAM_CONTENT_COUNT = 1;
static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
+ static final int SHORTCUT_CONTENT_COUNT = 1;
+ static final int SHORTCUT_CONTENT_INDEX = 0;
+ // With the English main dictionary as of October 2013, the size of shortcut address table is
+ // 29KB with the block size being 64.
+ // This is only 4.4% of that of full address table.
+ static final int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
+ static final String SHORTCUT_CONTENT_ID = "_shortcut";
+
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
static final int NO_FORWARD_LINK_ADDRESS = 0;
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 5089687da..2d2da5fe0 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -23,6 +23,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CollectionUtils;
import android.util.Log;
@@ -43,6 +44,7 @@ public class Ver4DictDecoder extends DictDecoder {
private static final int FILETYPE_FREQUENCY = 2;
private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
private static final int FILETYPE_BIGRAM_FREQ = 4;
+ private static final int FILETYPE_SHORTCUT = 5;
private final File mDictDirectory;
private final DictionaryBufferFactory mBufferFactory;
@@ -50,7 +52,9 @@ public class Ver4DictDecoder extends DictDecoder {
private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer;
private DictBuffer mBigramBuffer;
+ private DictBuffer mShortcutBuffer;
private SparseTable mBigramAddressTable;
+ private SparseTable mShortcutAddressTable;
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
@@ -89,6 +93,10 @@ public class Ver4DictDecoder extends DictDecoder {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION
+ FormatSpec.BIGRAM_FREQ_CONTENT_ID);
+ } else if (fileType == FILETYPE_SHORTCUT) {
+ return new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION
+ + FormatSpec.SHORTCUT_CONTENT_ID);
} else {
throw new RuntimeException("Unsupported kind of file : " + fileType);
}
@@ -102,6 +110,8 @@ public class Ver4DictDecoder extends DictDecoder {
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
loadBigramAddressSparseTable();
+ mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
+ loadShortcutAddressSparseTable();
}
@Override
@@ -136,6 +146,18 @@ public class Ver4DictDecoder extends DictDecoder {
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
}
+ // TODO: Let's have something like SparseTableContentsReader in this class.
+ private void loadShortcutAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.SHORTCUT_CONTENT_ID);
+ mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
+ new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
+ }
+
+
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
@@ -147,6 +169,23 @@ public class Ver4DictDecoder extends DictDecoder {
}
}
+ private ArrayList<WeightedString> readShortcuts(final int terminalId) {
+ if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;
+
+ final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
+ final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX,
+ terminalId);
+ mShortcutBuffer.position(posOfShortcuts);
+ while (true) {
+ final int flags = mShortcutBuffer.readUnsignedByte();
+ final String word = CharEncoding.readString(mShortcutBuffer);
+ ret.add(new WeightedString(word,
+ flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
+ if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ return ret;
+ }
+
// TODO: Make this buffer thread safe.
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
@@ -197,14 +236,7 @@ public class Ver4DictDecoder extends DictDecoder {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
- final ArrayList<WeightedString> shortcutTargets;
- if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
- // readShortcut will add shortcuts to shortcutTargets.
- shortcutTargets = new ArrayList<WeightedString>();
- addressPointer += PtNodeReader.readShortcut(mDictBuffer, shortcutTargets);
- } else {
- shortcutTargets = null;
- }
+ final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index b38c33019..f9dcacf77 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -49,6 +49,7 @@ public class Ver4DictEncoder implements DictEncoder {
private File mDictDir;
private String mBaseFilename;
private BigramContentWriter mBigramWriter;
+ private ShortcutContentWriter mShortcutWriter;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
@@ -152,6 +153,39 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
+ private static class ShortcutContentWriter extends SparseTableContentWriter {
+ public ShortcutContentWriter(final String name, final int initialCapacity,
+ final File baseDir) {
+ super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT,
+ initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
+ new String[] { FormatSpec.SHORTCUT_CONTENT_ID });
+ }
+
+ public void writeShortcutForOneWord(final int terminalId,
+ final Iterator<WeightedString> shortcutIterator) throws IOException {
+ write(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
+ new SparseTableContentWriterInterface() {
+ @Override
+ public void write(final OutputStream outStream) throws IOException {
+ writeShortcutForOneWordInternal(outStream, shortcutIterator);
+ }
+ });
+ }
+
+ private void writeShortcutForOneWordInternal(final OutputStream outStream,
+ final Iterator<WeightedString> shortcutIterator) throws IOException {
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = shortcutIterator.next();
+ final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
+ shortcutIterator.hasNext(), target.mFrequency);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, shortcutFlags,
+ FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ CharEncoding.writeString(outStream, target.mWord);
+ }
+ }
+ }
+
private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
throws FileNotFoundException, IOException {
final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
@@ -225,6 +259,8 @@ public class Ver4DictEncoder implements DictEncoder {
writeTerminalData(flatNodes, terminalCount);
mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir);
writeBigrams(flatNodes, dict);
+ mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir);
+ writeShortcuts(flatNodes);
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
@@ -306,29 +342,6 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
- private void writeShortcuts(ArrayList<WeightedString> shortcuts) {
- if (null == shortcuts || shortcuts.isEmpty()) return;
-
- final int indexOfShortcutByteSize = mTriePos;
- mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
- final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
- while (shortcutIterator.hasNext()) {
- final WeightedString target = shortcutIterator.next();
- final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
- shortcutIterator.hasNext(), target.mFrequency);
- mTrieBuf[mTriePos++] = (byte)shortcutFlags;
- final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos,
- target.mWord);
- mTriePos += shortcutShift;
- }
- final int shortcutByteSize = mTriePos - indexOfShortcutByteSize;
- if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
- throw new RuntimeException("Shortcut list too large : " + shortcutByteSize);
- }
- BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize,
- shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
- }
-
private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
throws IOException {
mBigramWriter.openStreams();
@@ -343,6 +356,19 @@ public class Ver4DictEncoder implements DictEncoder {
mBigramWriter.closeStreams();
}
+ private void writeShortcuts(final ArrayList<PtNodeArray> flatNodes) throws IOException {
+ mShortcutWriter.openStreams();
+ for (final PtNodeArray nodeArray : flatNodes) {
+ for (final PtNode ptNode : nodeArray.mData) {
+ if (ptNode.mShortcutTargets != null && !ptNode.mShortcutTargets.isEmpty()) {
+ mShortcutWriter.writeShortcutForOneWord(ptNode.mTerminalId,
+ ptNode.mShortcutTargets.iterator());
+ }
+ }
+ }
+ mShortcutWriter.closeStreams();
+ }
+
@Override
public void writeForwardLinkAddress(int forwardLinkAddress) {
mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
@@ -359,7 +385,6 @@ public class Ver4DictEncoder implements DictEncoder {
writeTerminalId(ptNode.mTerminalId);
}
writeChildrenPosition(ptNode, formatOptions);
- writeShortcuts(ptNode.mShortcutTargets);
}
private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,