aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java269
1 files changed, 269 insertions, 0 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
new file mode 100644
index 000000000..75b75ae2e
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -0,0 +1,269 @@
+/*
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.annotations.UsedForTesting;
+import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
+import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
+import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
+import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+/**
+ * An implementation of DictEncoder for version 4 binary dictionary.
+ */
+@UsedForTesting
+public class Ver4DictEncoder implements DictEncoder {
+ private final File mDictPlacedDir;
+ private byte[] mTrieBuf;
+ private byte[] mFreqBuf;
+ private int mTriePos;
+ private OutputStream mTrieOutStream;
+ private OutputStream mFreqOutStream;
+
+ @UsedForTesting
+ public Ver4DictEncoder(final File dictPlacedDir) {
+ mDictPlacedDir = dictPlacedDir;
+ }
+
+ private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
+ throws FileNotFoundException, IOException {
+ final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
+ final String filename = header.getId() + "." + header.getVersion();
+ final File mDictDir = new File(mDictPlacedDir, filename);
+ final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION);
+ final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION);
+ if (!mDictDir.isDirectory()) {
+ if (mDictDir.exists()) mDictDir.delete();
+ mDictDir.mkdirs();
+ }
+ if (!trieFile.exists()) trieFile.createNewFile();
+ if (!freqFile.exists()) freqFile.createNewFile();
+ mTrieOutStream = new FileOutputStream(trieFile);
+ mFreqOutStream = new FileOutputStream(freqFile);
+ }
+
+ private void close() throws IOException {
+ try {
+ if (mTrieOutStream != null) {
+ mTrieOutStream.close();
+ }
+ if (mFreqOutStream != null) {
+ mFreqOutStream.close();
+ }
+ } finally {
+ mTrieOutStream = null;
+ mFreqOutStream = null;
+ }
+ }
+
+ @Override
+ public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
+ throws IOException, UnsupportedFormatException {
+ if (formatOptions.mVersion != FormatSpec.VERSION4) {
+ throw new UnsupportedFormatException("File header has a wrong version number : "
+ + formatOptions.mVersion);
+ }
+ if (!mDictPlacedDir.isDirectory()) {
+ throw new UnsupportedFormatException("Given path is not a directory.");
+ }
+
+ if (mTrieOutStream == null) {
+ openStreams(formatOptions, dict.mOptions);
+ }
+
+ BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions);
+
+ MakedictLog.i("Flattening the tree...");
+ ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
+ int terminalCount = 0;
+ for (final PtNodeArray array : flatNodes) {
+ for (final PtNode node : array.mData) {
+ if (node.isTerminal()) node.mTerminalId = terminalCount++;
+ }
+ }
+
+ MakedictLog.i("Computing addresses...");
+ BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
+ if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
+
+ final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
+ final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
+ mTrieBuf = new byte[bufferSize];
+ mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE];
+
+ MakedictLog.i("Writing file...");
+ for (PtNodeArray nodeArray : flatNodes) {
+ BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
+ }
+ if (MakedictLog.DBG) {
+ BinaryDictEncoderUtils.showStatistics(flatNodes);
+ MakedictLog.i("has " + terminalCount + " terminals.");
+ }
+ mTrieOutStream.write(mTrieBuf);
+ mFreqOutStream.write(mFreqBuf);
+
+ MakedictLog.i("Done");
+ close();
+ }
+
+ @Override
+ public void setPosition(int position) {
+ if (mTrieBuf == null || position < 0 || position >- mTrieBuf.length) return;
+ mTriePos = position;
+ }
+
+ @Override
+ public int getPosition() {
+ return mTriePos;
+ }
+
+ @Override
+ public void writePtNodeCount(int ptNodeCount) {
+ final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
+ // ptNodeCount must fit on one byte or two bytes.
+ // Please see comments in FormatSpec
+ if (countSize != 1 && countSize != 2) {
+ throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize);
+ }
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, ptNodeCount,
+ countSize);
+ }
+
+ private void writePtNodeFlags(final PtNode ptNode, final int parentAddress,
+ final FormatOptions formatOptions) {
+ final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
+ BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mTriePos, childrenPos,
+ formatOptions),
+ FormatSpec.PTNODE_FLAGS_SIZE);
+ }
+
+ private void writeParentPosition(int parentPos, final PtNode ptNode,
+ final FormatOptions formatOptions) {
+ if (parentPos != FormatSpec.NO_PARENT_ADDRESS) {
+ parentPos -= ptNode.mCachedAddressAfterUpdate;
+ }
+ mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos,
+ formatOptions);
+ }
+
+ private void writeCharacters(final int[] characters, final boolean hasSeveralChars) {
+ mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos);
+ if (hasSeveralChars) {
+ mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
+ }
+ }
+
+ private void writeTerminalId(final int terminalId) {
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId,
+ FormatSpec.PTNODE_TERMINAL_ID_SIZE);
+ }
+
+ private void writeFrequency(final int frequency, final int terminalId) {
+ final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE;
+ BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency,
+ FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
+ }
+
+ private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
+ final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
+ if (formatOptions.mSupportsDynamicUpdate) {
+ mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
+ mTriePos, childrenPos);
+ } else {
+ mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
+ mTriePos, childrenPos);
+ }
+ }
+
+ private void writeShortcuts(ArrayList<WeightedString> shortcuts) {
+ if (null == shortcuts || shortcuts.isEmpty()) return;
+
+ final int indexOfShortcutByteSize = mTriePos;
+ mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
+ final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
+ while (shortcutIterator.hasNext()) {
+ final WeightedString target = shortcutIterator.next();
+ final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
+ shortcutIterator.hasNext(),
+ target.mFrequency);
+ mTrieBuf[mTriePos++] = (byte)shortcutFlags;
+ final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos,
+ target.mWord);
+ mTriePos += shortcutShift;
+ }
+ final int shortcutByteSize = mTriePos - indexOfShortcutByteSize;
+ if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
+ throw new RuntimeException("Shortcut list too large : " + shortcutByteSize);
+ }
+ BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize,
+ shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
+ }
+
+ private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) {
+ if (bigrams == null) return;
+
+ final Iterator<WeightedString> bigramIterator = bigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = bigramIterator.next();
+ final PtNode target =
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
+ final int addressOfBigram = target.mCachedAddressAfterUpdate;
+ final int unigramFrequencyForThisWord = target.mFrequency;
+ final int offset = addressOfBigram
+ - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
+ offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
+ mTrieBuf[mTriePos++] = (byte) bigramFlags;
+ mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
+ mTriePos, Math.abs(offset));
+ }
+ }
+
+ @Override
+ public void writeForwardLinkAddress(int forwardLinkAddress) {
+ mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
+ forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
+ }
+
+ @Override
+ public void writePtNode(final PtNode ptNode, final int parentPosition,
+ final FormatOptions formatOptions, final FusionDictionary dict) {
+ writePtNodeFlags(ptNode, parentPosition, formatOptions);
+ writeParentPosition(parentPosition, ptNode, formatOptions);
+ writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
+ if (ptNode.isTerminal()) {
+ writeTerminalId(ptNode.mTerminalId);
+ writeFrequency(ptNode.mFrequency, ptNode.mTerminalId);
+ }
+ writeChildrenPosition(ptNode, formatOptions);
+ writeShortcuts(ptNode.mShortcutTargets);
+ writeBigrams(ptNode.mBigrams, dict);
+ }
+}