diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java')
-rw-r--r-- | java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java | 106 |
1 files changed, 85 insertions, 21 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index f9dcacf77..8d5b48a9b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -45,6 +45,7 @@ public class Ver4DictEncoder implements DictEncoder { private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mUnigramTimestampOutStream; private OutputStream mTerminalAddressTableOutStream; private File mDictDir; private String mBaseFilename; @@ -69,16 +70,16 @@ public class Ver4DictEncoder implements DictEncoder { private final File[] mContentFiles; protected final OutputStream[] mContentOutStreams; - public SparseTableContentWriter(final String name, final int contentCount, - final int initialCapacity, final int blockSize, final File baseDir, - final String[] contentFilenames, final String[] contentIds) { + public SparseTableContentWriter(final String name, final int initialCapacity, + final int blockSize, final File baseDir, final String[] contentFilenames, + final String[] contentIds) { if (contentFilenames.length != contentIds.length) { throw new RuntimeException("The length of contentFilenames and the length of" + " contentIds are different " + contentFilenames.length + ", " + contentIds.length); } - mContentCount = contentCount; - mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount); + mContentCount = contentFilenames.length; + mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); mAddressTableFiles = new File[mContentCount]; mContentFiles = new File[mContentCount]; @@ -113,16 +114,40 @@ public class Ver4DictEncoder implements DictEncoder { } private static class BigramContentWriter extends SparseTableContentWriter { + private final boolean mWriteTimestamp; public BigramContentWriter(final String name, final int initialCapacity, - final File baseDir) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT, - initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }, - new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }); + final File baseDir, final boolean writeTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp)); + mWriteTimestamp = writeTimestamp; } - public void writeBigramsForOneWord(final int terminalId, + private static String[] getContentFilenames(final String name, + final boolean writeTimestamp) { + final String[] contentFilenames; + if (writeTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } + return contentFilenames; + } + + private static String[] getContentIds(final boolean writeTimestamp) { + final String[] contentIds; + if (writeTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; + } + return contentIds; + } + + public void writeBigramsForOneWord(final int terminalId, final int bigramCount, final Iterator<WeightedString> bigramIterator, final FusionDictionary dict) throws IOException { write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, @@ -130,8 +155,16 @@ public class Ver4DictEncoder implements DictEncoder { @Override public void write(final OutputStream outStream) throws IOException { writeBigramsForOneWordInternal(outStream, bigramIterator, dict); - } - }); + }}); + if (mWriteTimestamp) { + write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId, + new SparseTableContentWriterInterface() { + @Override + public void write(final OutputStream outStream) throws IOException { + initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream, + bigramCount); + }}); + } } private void writeBigramsForOneWordInternal(final OutputStream outStream, @@ -151,13 +184,26 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } + + private void initBigramTimestampsCountersAndLevelsForOneWordInternal( + final OutputStream outStream, final int bigramCount) throws IOException { + for (int i = 0; i < bigramCount; ++i) { + // TODO: Figure out what initial values should be. + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_TIMESTAMP_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_COUNTER_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_LEVEL_SIZE); + } + } } private static class ShortcutContentWriter extends SparseTableContentWriter { public ShortcutContentWriter(final String name, final int initialCapacity, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT, - initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, new String[] { FormatSpec.SHORTCUT_CONTENT_ID }); } @@ -193,18 +239,20 @@ public class Ver4DictEncoder implements DictEncoder { mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); + final File timestampFile = new File(mDictDir, + mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } - if (!trieFile.exists()) trieFile.createNewFile(); - if (!freqFile.exists()) freqFile.createNewFile(); - if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); + if (formatOptions.mHasTimestamp) { + mUnigramTimestampOutStream = new FileOutputStream(timestampFile); + } } private void close() throws IOException { @@ -218,6 +266,9 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } + if (mUnigramTimestampOutStream != null) { + mUnigramTimestampOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; @@ -257,7 +308,11 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); - mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir); + if (formatOptions.mHasTimestamp) { + initUnigramTimestamps(terminalCount); + } + mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, + formatOptions.mHasTimestamp); writeBigrams(flatNodes, dict); mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir); writeShortcuts(flatNodes); @@ -299,7 +354,9 @@ public class Ver4DictEncoder implements DictEncoder { if (countSize != 1 && countSize != 2) { throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize); } - mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, ptNodeCount, + final int encodedPtNodeCount = (countSize == 2) ? + (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount; + mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, encodedPtNodeCount, countSize); } @@ -348,7 +405,7 @@ public class Ver4DictEncoder implements DictEncoder { for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { - mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, + mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(), ptNode.mBigrams.iterator(), dict); } } @@ -408,4 +465,11 @@ public class Ver4DictEncoder implements DictEncoder { mFreqOutStream.write(freqBuf); mTerminalAddressTableOutStream.write(terminalAddressTableBuf); } + + private void initUnigramTimestamps(final int terminalCount) throws IOException { + // Initial value of time stamps for each word is 0. + final byte[] unigramTimestampBuf = + new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE]; + mUnigramTimestampOutStream.write(unigramTimestampBuf); + } } |