aboutsummaryrefslogtreecommitdiffstats
path: root/java
diff options
context:
space:
mode:
authorYuichiro Hanada <yhanada@google.com>2013-10-14 10:36:33 +0900
committerYuichiro Hanada <yhanada@google.com>2013-10-16 17:24:59 +0900
commit098639d99f12a2910b4f942551fcdf7278d7616c (patch)
tree3d643d24b639fb7007f917d8cb6824fac7a6dd32 /java
parent7b5f2b71f5ce8af82b4c0faad38437bca750cffe (diff)
downloadlatinime-098639d99f12a2910b4f942551fcdf7278d7616c.tar.gz
latinime-098639d99f12a2910b4f942551fcdf7278d7616c.tar.xz
latinime-098639d99f12a2910b4f942551fcdf7278d7616c.zip
Add SparseContentTableReader.
Change-Id: I976afc5d320bc65835d659bae1d10d2cdc68262b
Diffstat (limited to 'java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java120
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java117
2 files changed, 198 insertions, 39 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java
new file mode 100644
index 000000000..00f401ea7
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
+import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+/**
+ * An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
+ */
+public class SparseTableContentReader {
+
+ /**
+ * An interface of a function which is passed to SparseTableContentReader.read.
+ */
+ public interface SparseTableContentReaderInterface {
+ /**
+ * Reads data.
+ *
+ * @param buffer the DictBuffer. The position of the buffer is set to the head of data.
+ */
+ public void read(final DictBuffer buffer);
+ }
+
+ private final int mContentCount;
+ private final int mBlockSize;
+ protected final File mBaseDir;
+ private final File mLookupTableFile;
+ private final File[] mAddressTableFiles;
+ private final File[] mContentFiles;
+ private DictBuffer mLookupTableBuffer;
+ private final DictBuffer[] mAddressTableBuffers;
+ private final DictBuffer[] mContentBuffers;
+ private final DictionaryBufferFactory mFactory;
+
+ /**
+ * Sole constructor of SparseTableContentReader.
+ *
+ * @param name the name of SparseTable.
+ * @param blockSize the block size of the content table.
+ * @param baseDir the directory which contains the files of the content table.
+ * @param contentFilenames the file names of content files.
+ * @param contentIds the ids of contents. These ids are used for a suffix of a name of
+ * address files and content files.
+ * @param factory the DictionaryBufferFactory which is used for opening the files.
+ */
+ public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
+ final String[] contentFilenames, final String[] contentIds,
+ final DictionaryBufferFactory factory) {
+ if (contentFilenames.length != contentIds.length) {
+ throw new RuntimeException("The length of contentFilenames and the length of"
+ + " contentIds are different " + contentFilenames.length + ", "
+ + contentIds.length);
+ }
+ mBlockSize = blockSize;
+ mBaseDir = baseDir;
+ mFactory = factory;
+ mContentCount = contentFilenames.length;
+ mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
+ mAddressTableFiles = new File[mContentCount];
+ mContentFiles = new File[mContentCount];
+ for (int i = 0; i < mContentCount; ++i) {
+ mAddressTableFiles[i] = new File(mBaseDir,
+ name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
+ mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
+ }
+ mAddressTableBuffers = new DictBuffer[mContentCount];
+ mContentBuffers = new DictBuffer[mContentCount];
+ }
+
+ public void openBuffers() throws FileNotFoundException, IOException {
+ mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
+ for (int i = 0; i < mContentCount; ++i) {
+ mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
+ mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
+ }
+ }
+
+ protected void read(final int contentIndex, final int index,
+ final SparseTableContentReaderInterface reader) {
+ if (index < 0 || (index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
+ >= mLookupTableBuffer.limit()) {
+ return;
+ }
+
+ mLookupTableBuffer.position((index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
+ final int posInAddressTable = mLookupTableBuffer.readInt();
+ if (posInAddressTable == SparseTable.NOT_EXIST) {
+ return;
+ }
+
+ mAddressTableBuffers[contentIndex].position(
+ (posInAddressTable + index % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
+ final int address = mAddressTableBuffers[contentIndex].readInt();
+ if (address == SparseTable.NOT_EXIST) {
+ return;
+ }
+
+ mContentBuffers[contentIndex].position(address);
+ reader.read(mContentBuffers[contentIndex]);
+ }
+} \ No newline at end of file
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index a94e04a8e..64538c102 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -51,9 +51,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
protected DictBuffer mDictBuffer;
private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer;
- private DictBuffer mBigramBuffer;
private DictBuffer mShortcutBuffer;
- private SparseTable mBigramAddressTable;
+ private BigramContentReader mBigramReader;
private SparseTable mShortcutAddressTable;
@UsedForTesting
@@ -108,8 +107,9 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
- mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
- loadBigramAddressSparseTable();
+ mBigramReader = new BigramContentReader(mDictDirectory.getName(),
+ mDictDirectory, mBufferFactory, false);
+ mBigramReader.openBuffers();
mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
loadShortcutAddressSparseTable();
}
@@ -136,16 +136,6 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
return header;
}
- private void loadBigramAddressSparseTable() throws IOException {
- final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
- + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
- final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
- + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
- + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
- mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
- }
-
// TODO: Let's have something like SparseTableContentsReader in this class.
private void loadShortcutAddressSparseTable() throws IOException {
final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
@@ -161,6 +151,77 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
}
+ /**
+ * An auxiliary class for reading bigrams.
+ */
+ protected static class BigramContentReader extends SparseTableContentReader {
+ private final boolean mHasTimestamp;
+
+ public BigramContentReader(final String name, final File baseDir,
+ final DictionaryBufferFactory factory, final boolean hasTimestamp) {
+ super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
+ mHasTimestamp = hasTimestamp;
+ }
+
+ // TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
+ private static String[] getContentFilenames(final String name, final boolean hasTimestamp) {
+ final String[] contentFilenames;
+ if (hasTimestamp) {
+ contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
+ name + FormatSpec.BIGRAM_FILE_EXTENSION };
+ } else {
+ contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
+ }
+ return contentFilenames;
+ }
+
+ // TODO: Consolidate this method and BigramContentWriter.getContentIds.
+ private static String[] getContentIds(final boolean hasTimestamp) {
+ final String[] contentIds;
+ if (hasTimestamp) {
+ contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
+ FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
+ } else {
+ contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
+ }
+ return contentIds;
+ }
+
+ public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
+ final DictBuffer terminalAddressTableBuffer) {
+ final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
+ read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
+ new SparseTableContentReaderInterface() {
+ @Override
+ public void read(final DictBuffer buffer) {
+ while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
+ // remaining bigram entries are ignored.
+ final int bigramFlags = buffer.readUnsignedByte();
+ final int targetTerminalId = buffer.readUnsignedInt24();
+ terminalAddressTableBuffer.position(
+ targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
+ final int targetAddress = terminalAddressTableBuffer.readUnsignedInt24();
+ bigrams.add(new PendingAttribute(
+ bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
+ targetAddress));
+ if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
+ break;
+ }
+ }
+ if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
+ + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
+ }
+ }
+ });
+ if (bigrams.isEmpty()) return null;
+ return bigrams;
+ }
+ }
+
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
@@ -240,32 +301,10 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
+ final ArrayList<PendingAttribute> bigrams =
+ mBigramReader.readTargetsAndFrequencies(terminalId,
+ mTerminalAddressTableBuffer);
- final ArrayList<PendingAttribute> bigrams;
- if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
- bigrams = new ArrayList<PendingAttribute>();
- final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
- mBigramBuffer.position(posOfBigrams);
- while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
- // remaining bigram entries are ignored.
- final int bigramFlags = mBigramBuffer.readUnsignedByte();
- final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
- mTerminalAddressTableBuffer.position(
- targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
- bigrams.add(new PendingAttribute(
- bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
- targetAddress));
- if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
- }
- if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
- throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
- + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
- }
- } else {
- bigrams = null;
- }
return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
}