aboutsummaryrefslogtreecommitdiffstats
path: root/java/src
diff options
context:
space:
mode:
Diffstat (limited to 'java/src')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java3
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java32
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java80
4 files changed, 94 insertions, 27 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
index 9f7f502ea..fda97dafc 100644
--- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
@@ -60,7 +60,8 @@ public abstract class AbstractDictDecoder implements DictDecoder {
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
- 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
+ 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE),
+ 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
return header;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 5a5d7af6b..605930ab4 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -37,13 +37,15 @@ public final class FormatSpec {
* sion
*
* o |
- * p | not used 4 bits
- * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG
- * i | FRENCH_LIGATURE_PROCESSING_FLAG
- * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE
- * n | GERMAN_UMLAUT_PROCESSING_FLAG
- * f |
- * lags
+ * p | not used 3 bits
+ * t | each unigram and bigram entry has a time stamp?
+ * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG
+ * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG
+ * n | FRENCH_LIGATURE_PROCESSING_FLAG
+ * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE
+ * l | GERMAN_UMLAUT_PROCESSING_FLAG
+ * a |
+ * gs
*
* h |
* e | size of the file header, 4bytes
@@ -211,6 +213,8 @@ public final class FormatSpec {
static final int SUPPORTS_DYNAMIC_UPDATE = 0x2;
static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
static final int CONTAINS_BIGRAMS_FLAG = 0x8;
+ // TODO: Implement timestamps for unigram.
+ static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
// TODO: Make this value adaptative to content data, store it in the header, and
// use it in the reading code.
@@ -276,9 +280,14 @@ public final class FormatSpec {
// is 584KB with the block size being 4.
// This is 91% of that of full address table.
static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
- static final int BIGRAM_CONTENT_COUNT = 1;
+ static final int BIGRAM_CONTENT_COUNT = 2;
static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
+ static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1;
static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
+ static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp";
+ static final int BIGRAM_TIMESTAMP_SIZE = 4;
+ static final int BIGRAM_COUNTER_SIZE = 1;
+ static final int BIGRAM_LEVEL_SIZE = 1;
static final int SHORTCUT_CONTENT_COUNT = 1;
static final int SHORTCUT_CONTENT_INDEX = 0;
@@ -321,6 +330,7 @@ public final class FormatSpec {
public final int mVersion;
public final boolean mSupportsDynamicUpdate;
public final boolean mHasTerminalId;
+ public final boolean mHasTimestamp;
@UsedForTesting
public FormatOptions(final int version) {
this(version, false);
@@ -328,6 +338,11 @@ public final class FormatSpec {
@UsedForTesting
public FormatOptions(final int version, final boolean supportsDynamicUpdate) {
+ this(version, supportsDynamicUpdate, false /* hasTimestamp */);
+ }
+
+ public FormatOptions(final int version, final boolean supportsDynamicUpdate,
+ final boolean hasTimestamp) {
mVersion = version;
if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) {
throw new RuntimeException("Dynamic updates are only supported with versions "
@@ -335,6 +350,7 @@ public final class FormatSpec {
}
mSupportsDynamicUpdate = supportsDynamicUpdate;
mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID);
+ mHasTimestamp = hasTimestamp;
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 53729075f..734223ec2 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -153,8 +153,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
+ FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ FormatSpec.SHORTCUT_CONTENT_ID);
+ final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName()
+ + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
+ + FormatSpec.SHORTCUT_CONTENT_ID);
mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
- new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
+ new File[] { contentFile, timestampsFile },
+ FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
}
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index f9dcacf77..fe9894246 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -69,16 +69,16 @@ public class Ver4DictEncoder implements DictEncoder {
private final File[] mContentFiles;
protected final OutputStream[] mContentOutStreams;
- public SparseTableContentWriter(final String name, final int contentCount,
- final int initialCapacity, final int blockSize, final File baseDir,
- final String[] contentFilenames, final String[] contentIds) {
+ public SparseTableContentWriter(final String name, final int initialCapacity,
+ final int blockSize, final File baseDir, final String[] contentFilenames,
+ final String[] contentIds) {
if (contentFilenames.length != contentIds.length) {
throw new RuntimeException("The length of contentFilenames and the length of"
+ " contentIds are different " + contentFilenames.length + ", "
+ contentIds.length);
}
- mContentCount = contentCount;
- mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount);
+ mContentCount = contentFilenames.length;
+ mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount);
mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
mAddressTableFiles = new File[mContentCount];
mContentFiles = new File[mContentCount];
@@ -113,16 +113,40 @@ public class Ver4DictEncoder implements DictEncoder {
}
private static class BigramContentWriter extends SparseTableContentWriter {
+ private final boolean mWriteTimestamp;
public BigramContentWriter(final String name, final int initialCapacity,
- final File baseDir) {
- super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT,
- initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION },
- new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID });
+ final File baseDir, final boolean writeTimestamp) {
+ super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp));
+ mWriteTimestamp = writeTimestamp;
+ }
+
+ private static String[] getContentFilenames(final String name,
+ final boolean writeTimestamp) {
+ final String[] contentFilenames;
+ if (writeTimestamp) {
+ contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
+ name + FormatSpec.BIGRAM_FILE_EXTENSION };
+ } else {
+ contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
+ }
+ return contentFilenames;
+ }
+
+ private static String[] getContentIds(final boolean writeTimestamp) {
+ final String[] contentIds;
+ if (writeTimestamp) {
+ contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
+ FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
+ } else {
+ contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
+ }
+ return contentIds;
}
- public void writeBigramsForOneWord(final int terminalId,
+ public void writeBigramsForOneWord(final int terminalId, final int bigramCount,
final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
throws IOException {
write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
@@ -130,8 +154,16 @@ public class Ver4DictEncoder implements DictEncoder {
@Override
public void write(final OutputStream outStream) throws IOException {
writeBigramsForOneWordInternal(outStream, bigramIterator, dict);
- }
- });
+ }});
+ if (mWriteTimestamp) {
+ write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId,
+ new SparseTableContentWriterInterface() {
+ @Override
+ public void write(final OutputStream outStream) throws IOException {
+ initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream,
+ bigramCount);
+ }});
+ }
}
private void writeBigramsForOneWordInternal(final OutputStream outStream,
@@ -151,13 +183,26 @@ public class Ver4DictEncoder implements DictEncoder {
FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
}
}
+
+ private void initBigramTimestampsCountersAndLevelsForOneWordInternal(
+ final OutputStream outStream, final int bigramCount) throws IOException {
+ for (int i = 0; i < bigramCount; ++i) {
+ // TODO: Figure out what initial values should be.
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
+ FormatSpec.BIGRAM_TIMESTAMP_SIZE);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
+ FormatSpec.BIGRAM_COUNTER_SIZE);
+ BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
+ FormatSpec.BIGRAM_LEVEL_SIZE);
+ }
+ }
}
private static class ShortcutContentWriter extends SparseTableContentWriter {
public ShortcutContentWriter(final String name, final int initialCapacity,
final File baseDir) {
- super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT,
- initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+ super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity,
+ FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
new String[] { FormatSpec.SHORTCUT_CONTENT_ID });
}
@@ -257,7 +302,8 @@ public class Ver4DictEncoder implements DictEncoder {
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
writeTerminalData(flatNodes, terminalCount);
- mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir);
+ mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir,
+ formatOptions.mHasTimestamp);
writeBigrams(flatNodes, dict);
mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir);
writeShortcuts(flatNodes);
@@ -348,7 +394,7 @@ public class Ver4DictEncoder implements DictEncoder {
for (final PtNodeArray nodeArray : flatNodes) {
for (final PtNode ptNode : nodeArray.mData) {
if (ptNode.mBigrams != null) {
- mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId,
+ mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(),
ptNode.mBigrams.iterator(), dict);
}
}