aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java340
1 files changed, 116 insertions, 224 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 3be62f066..734223ec2 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -40,52 +40,26 @@ import java.util.Arrays;
public class Ver4DictDecoder extends AbstractDictDecoder {
private static final String TAG = Ver4DictDecoder.class.getSimpleName();
- protected static final int FILETYPE_TRIE = 1;
- protected static final int FILETYPE_FREQUENCY = 2;
- protected static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
- protected static final int FILETYPE_BIGRAM_FREQ = 4;
- protected static final int FILETYPE_SHORTCUT = 5;
- protected static final int FILETYPE_HEADER = 6;
-
- protected final File mDictDirectory;
- protected final DictionaryBufferFactory mBufferFactory;
+ private static final int FILETYPE_TRIE = 1;
+ private static final int FILETYPE_FREQUENCY = 2;
+ private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
+ private static final int FILETYPE_BIGRAM_FREQ = 4;
+ private static final int FILETYPE_SHORTCUT = 5;
+
+ private final File mDictDirectory;
+ private final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer;
- protected DictBuffer mHeaderBuffer;
- protected DictBuffer mFrequencyBuffer;
- protected DictBuffer mTerminalAddressTableBuffer;
- private BigramContentReader mBigramReader;
- private ShortcutContentReader mShortcutReader;
-
- /**
- * Raw PtNode info straight out of a trie file in version 4 dictionary.
- */
- protected static final class Ver4PtNodeInfo {
- public final int mFlags;
- public final int[] mCharacters;
- public final int mTerminalId;
- public final int mChildrenPos;
- public final int mParentPos;
- public final int mNodeSize;
- public int mStartIndexOfCharacters;
- public int mEndIndexOfCharacters; // exclusive
-
- public Ver4PtNodeInfo(final int flags, final int[] characters, final int terminalId,
- final int childrenPos, final int parentPos, final int nodeSize) {
- mFlags = flags;
- mCharacters = characters;
- mTerminalId = terminalId;
- mChildrenPos = childrenPos;
- mParentPos = parentPos;
- mNodeSize = nodeSize;
- mStartIndexOfCharacters = 0;
- mEndIndexOfCharacters = characters.length;
- }
- }
+ private DictBuffer mFrequencyBuffer;
+ private DictBuffer mTerminalAddressTableBuffer;
+ private DictBuffer mBigramBuffer;
+ private DictBuffer mShortcutBuffer;
+ private SparseTable mBigramAddressTable;
+ private SparseTable mShortcutAddressTable;
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
mDictDirectory = dictDirectory;
- mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
+ mDictBuffer = mFrequencyBuffer = null;
if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
@@ -102,16 +76,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
/* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
mDictDirectory = dictDirectory;
mBufferFactory = factory;
- mDictBuffer = mHeaderBuffer = mFrequencyBuffer = null;
+ mDictBuffer = mFrequencyBuffer = null;
}
- protected File getFile(final int fileType) throws UnsupportedFormatException {
+ private File getFile(final int fileType) {
if (fileType == FILETYPE_TRIE) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
- } else if (fileType == FILETYPE_HEADER) {
- return new File(mDictDirectory,
- mDictDirectory.getName() + FormatSpec.HEADER_FILE_EXTENSION);
} else if (fileType == FILETYPE_FREQUENCY) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
@@ -127,27 +98,20 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION
+ FormatSpec.SHORTCUT_CONTENT_ID);
} else {
- throw new UnsupportedFormatException("Unsupported kind of file : " + fileType);
+ throw new RuntimeException("Unsupported kind of file : " + fileType);
}
}
@Override
- public void openDictBuffer() throws FileNotFoundException, IOException,
- UnsupportedFormatException {
- if (!mDictDirectory.isDirectory()) {
- throw new UnsupportedFormatException("Format 4 dictionary needs a directory");
- }
- mHeaderBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_HEADER));
+ public void openDictBuffer() throws FileNotFoundException, IOException {
mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
- mBigramReader = new BigramContentReader(mDictDirectory.getName(),
- mDictDirectory, mBufferFactory, false);
- mBigramReader.openBuffers();
- mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory,
- mBufferFactory);
- mShortcutReader.openBuffers();
+ mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
+ loadBigramAddressSparseTable();
+ mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
+ loadShortcutAddressSparseTable();
}
@Override
@@ -155,134 +119,46 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
return mDictBuffer != null;
}
- @UsedForTesting
- /* package */ DictBuffer getHeaderBuffer() {
- return mHeaderBuffer;
- }
-
- @UsedForTesting
/* package */ DictBuffer getDictBuffer() {
return mDictBuffer;
}
@Override
public FileHeader readHeader() throws IOException, UnsupportedFormatException {
- if (mHeaderBuffer == null) {
+ if (mDictBuffer == null) {
openDictBuffer();
}
- mHeaderBuffer.position(0);
- final FileHeader header = super.readHeader(mHeaderBuffer);
+ final FileHeader header = super.readHeader(mDictBuffer);
final int version = header.mFormatOptions.mVersion;
- if (version != FormatSpec.VERSION4) {
+ if (version != 4) {
throw new UnsupportedFormatException("File header has a wrong version : " + version);
}
return header;
}
- /**
- * An auxiliary class for reading bigrams.
- */
- protected static class BigramContentReader extends SparseTableContentReader {
- public BigramContentReader(final String name, final File baseDir,
- final DictionaryBufferFactory factory, final boolean hasTimestamp) {
- super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
- }
-
- // TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
- protected static String[] getContentFilenames(final String name,
- final boolean hasTimestamp) {
- final String[] contentFilenames;
- if (hasTimestamp) {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
- name + FormatSpec.BIGRAM_FILE_EXTENSION };
- } else {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
- }
- return contentFilenames;
- }
-
- // TODO: Consolidate this method and BigramContentWriter.getContentIds.
- protected static String[] getContentIds(final boolean hasTimestamp) {
- final String[] contentIds;
- if (hasTimestamp) {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
- FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
- } else {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
- }
- return contentIds;
- }
-
- public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
- final DictBuffer terminalAddressTableBuffer) {
- final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
- read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
- new SparseTableContentReaderInterface() {
- @Override
- public void read(final DictBuffer buffer) {
- while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
- // remaining bigram entries are ignored.
- final int bigramFlags = buffer.readUnsignedByte();
- final int targetTerminalId = buffer.readUnsignedInt24();
- terminalAddressTableBuffer.position(targetTerminalId
- * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- final int targetAddress =
- terminalAddressTableBuffer.readUnsignedInt24();
- bigrams.add(new PendingAttribute(bigramFlags
- & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
- targetAddress));
- if (0 == (bigramFlags
- & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
- break;
- }
- }
- if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- throw new RuntimeException("Too many bigrams in a PtNode ("
- + bigrams.size() + " but max is "
- + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
- }
- }
- });
- if (bigrams.isEmpty()) return null;
- return bigrams;
- }
+ private void loadBigramAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
+ mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
}
- /**
- * An auxiliary class for reading shortcuts.
- */
- protected static class ShortcutContentReader extends SparseTableContentReader {
- public ShortcutContentReader(final String name, final File baseDir,
- final DictionaryBufferFactory factory) {
- super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
- FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
- new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, factory);
- }
-
- public ArrayList<WeightedString> readShortcuts(final int terminalId) {
- final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
- read(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
- new SparseTableContentReaderInterface() {
- @Override
- public void read(final DictBuffer buffer) {
- while (true) {
- final int flags = buffer.readUnsignedByte();
- final String word = CharEncoding.readString(buffer);
- shortcuts.add(new WeightedString(word,
- flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
- if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
- break;
- }
- }
- }
- });
- if (shortcuts.isEmpty()) return null;
- return shortcuts;
- }
+ // TODO: Let's have something like SparseTableContentsReader in this class.
+ private void loadShortcutAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.SHORTCUT_CONTENT_ID);
+ final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.SHORTCUT_CONTENT_ID);
+ mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
+ new File[] { contentFile, timestampsFile },
+ FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
}
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
@@ -296,82 +172,102 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
}
- private final int[] mCharacterBufferForReadingVer4PtNodeInfo
- = new int[FormatSpec.MAX_WORD_LENGTH];
+ private ArrayList<WeightedString> readShortcuts(final int terminalId) {
+ if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;
+
+ final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
+ final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX,
+ terminalId);
+ mShortcutBuffer.position(posOfShortcuts);
+ while (true) {
+ final int flags = mShortcutBuffer.readUnsignedByte();
+ final String word = CharEncoding.readString(mShortcutBuffer);
+ ret.add(new WeightedString(word,
+ flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
+ if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ return ret;
+ }
- /**
- * Reads PtNode from ptNodePos in the trie file and returns Ver4PtNodeInfo.
- *
- * @param ptNodePos the position of PtNode.
- * @param options the format options.
- * @return Ver4PtNodeInfo.
- */
// TODO: Make this buffer thread safe.
// TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
- protected Ver4PtNodeInfo readVer4PtNodeInfo(final int ptNodePos, final FormatOptions options) {
- int readingPos = ptNodePos;
+ private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
+ @Override
+ public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
+ int addressPointer = ptNodePos;
final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
- readingPos += FormatSpec.PTNODE_FLAGS_SIZE;
+ addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
- final int parentPos = PtNodeReader.readParentAddress(mDictBuffer, options);
+ final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
- readingPos += FormatSpec.PARENT_ADDRESS_SIZE;
+ addressPointer += FormatSpec.PARENT_ADDRESS_SIZE;
}
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(mDictBuffer);
- readingPos += CharEncoding.getCharSize(character);
+ addressPointer += CharEncoding.getCharSize(character);
while (FormatSpec.INVALID_CHARACTER != character
&& index < FormatSpec.MAX_WORD_LENGTH) {
- mCharacterBufferForReadingVer4PtNodeInfo[index++] = character;
+ mCharacterBuffer[index++] = character;
character = CharEncoding.readChar(mDictBuffer);
- readingPos += CharEncoding.getCharSize(character);
+ addressPointer += CharEncoding.getCharSize(character);
}
- characters = Arrays.copyOfRange(mCharacterBufferForReadingVer4PtNodeInfo, 0, index);
+ characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
} else {
final int character = CharEncoding.readChar(mDictBuffer);
- readingPos += CharEncoding.getCharSize(character);
+ addressPointer += CharEncoding.getCharSize(character);
characters = new int[] { character };
}
final int terminalId;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
terminalId = PtNodeReader.readTerminalId(mDictBuffer);
- readingPos += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
+ addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
} else {
terminalId = PtNode.NOT_A_TERMINAL;
}
- int childrenPos = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
- if (childrenPos != FormatSpec.NO_CHILDREN_ADDRESS) {
- childrenPos += readingPos;
- }
- readingPos += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
-
- return new Ver4PtNodeInfo(flags, characters, terminalId, childrenPos, parentPos,
- readingPos - ptNodePos);
- }
-
- @Override
- public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
- final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options);
-
final int frequency;
- if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) {
- frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId);
+ if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
+ frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId);
} else {
frequency = PtNode.NOT_A_TERMINAL;
}
-
- final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(
- nodeInfo.mTerminalId);
- final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies(
- nodeInfo.mTerminalId, mTerminalAddressTableBuffer);
-
- return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags,
- nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos,
- shortcutTargets, bigrams);
+ int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
+ if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
+ childrenAddress += addressPointer;
+ }
+ addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
+ final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
+
+ final ArrayList<PendingAttribute> bigrams;
+ if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
+ bigrams = new ArrayList<PendingAttribute>();
+ final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
+ mBigramBuffer.position(posOfBigrams);
+ while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
+ // remaining bigram entries are ignored.
+ final int bigramFlags = mBigramBuffer.readUnsignedByte();
+ final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
+ mTerminalAddressTableBuffer.position(
+ targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
+ final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
+ bigrams.add(new PendingAttribute(
+ bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
+ targetAddress));
+ if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
+ if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
+ + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
+ }
+ } else {
+ bigrams = null;
+ }
+ return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
+ parentAddress, childrenAddress, shortcutTargets, bigrams);
}
private void deleteDictFiles() {
@@ -422,14 +318,10 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
@Override
public boolean readAndFollowForwardLink() {
- final int forwardLinkPos = mDictBuffer.position();
- int nextRelativePos = BinaryDictDecoderUtils.readSInt24(mDictBuffer);
- if (nextRelativePos != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
- final int nextPos = forwardLinkPos + nextRelativePos;
- if (nextPos >= 0 && nextPos < mDictBuffer.limit()) {
- mDictBuffer.position(nextPos);
- return true;
- }
+ final int nextAddress = mDictBuffer.readUnsignedInt24();
+ if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
+ mDictBuffer.position(nextAddress);
+ return true;
}
return false;
}