aboutsummaryrefslogtreecommitdiffstats
path: root/java/src/com
diff options
context:
space:
mode:
Diffstat (limited to 'java/src/com')
-rw-r--r--java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java4
-rw-r--r--java/src/com/android/inputmethod/latin/BinaryDictionary.java3
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java14
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java4
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java)19
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java)8
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java (renamed from java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java)6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java427
8 files changed, 76 insertions, 409 deletions
diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
index e6fb9807e..1aee22baf 100644
--- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
@@ -21,7 +21,7 @@ import android.util.Log;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
-import com.android.inputmethod.latin.makedict.Ver3DictEncoder;
+import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
import java.io.File;
import java.io.IOException;
@@ -64,7 +64,7 @@ abstract public class AbstractDictionaryWriter {
final String tempFilePath = file.getAbsolutePath() + ".temp";
final File tempFile = new File(tempFilePath);
try {
- final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile);
+ final DictEncoder dictEncoder = new Ver2DictEncoder(tempFile);
writeDictionary(dictEncoder, attributeMap);
tempFile.renameTo(file);
} catch (IOException e) {
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index db4234c63..95ac3e203 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -367,6 +367,7 @@ public final class BinaryDictionary extends Dictionary {
public static class LanguageModelParam {
public final int[] mWord0;
public final int[] mWord1;
+ // TODO: this needs to be a list of shortcuts
public final int[] mShortcutTarget;
public final int mUnigramProbability;
public final int mBigramProbability;
@@ -375,7 +376,7 @@ public final class BinaryDictionary extends Dictionary {
public final boolean mIsBlacklisted;
public final int mTimestamp;
- // Constructor for unigram.
+ // Constructor for unigram. TODO: support shortcuts
public LanguageModelParam(final String word, final int unigramProbability,
final int timestamp) {
mWord0 = null;
diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
index ff03190a3..97ad667a6 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
@@ -55,7 +55,7 @@ public final class DynamicBinaryDictIOUtils {
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
- private static void updateParentAddress(final Ver3DictUpdater dictUpdater,
+ private static void updateParentAddress(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -88,7 +88,7 @@ public final class DynamicBinaryDictIOUtils {
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
- private static void updateParentAddresses(final Ver3DictUpdater dictUpdater,
+ private static void updateParentAddresses(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictUpdater.getPosition();
@@ -114,7 +114,7 @@ public final class DynamicBinaryDictIOUtils {
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
- private static void updateChildrenAddress(final Ver3DictUpdater dictUpdater,
+ private static void updateChildrenAddress(final Ver2DictUpdater dictUpdater,
final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -134,7 +134,7 @@ public final class DynamicBinaryDictIOUtils {
* Helper method to move a PtNode to the tail of the file.
*/
private static int movePtNode(final OutputStream destination,
- final Ver3DictUpdater dictUpdater, final PtNodeInfo info,
+ final Ver2DictUpdater dictUpdater, final PtNodeInfo info,
final int nodeArrayOriginAddress, final int oldNodeAddress,
final FormatOptions formatOptions) throws IOException {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -151,7 +151,7 @@ public final class DynamicBinaryDictIOUtils {
}
@SuppressWarnings("unused")
- private static void updateForwardLink(final Ver3DictUpdater dictUpdater,
+ private static void updateForwardLink(final Ver2DictUpdater dictUpdater,
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) {
final DictBuffer dictBuffer = dictUpdater.getDictBuffer();
@@ -199,7 +199,7 @@ public final class DynamicBinaryDictIOUtils {
final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
- final Ver3DictUpdater dictUpdater, final int oldPtNodeArrayOrigin,
+ final Ver2DictUpdater dictUpdater, final int oldPtNodeArrayOrigin,
final int oldPtNodeOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0;
final int newPtNodeOrigin = fileEndAddress + 1;
@@ -252,7 +252,7 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting
- public static void insertWord(final Ver3DictUpdater dictUpdater,
+ public static void insertWord(final Ver2DictUpdater dictUpdater,
final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 20ddba836..f23fe4656 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -401,7 +401,7 @@ public final class FormatSpec {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile, bufferType);
} else if (dictFile.isFile()) {
- return new Ver3DictDecoder(dictFile, bufferType);
+ return new Ver2DictDecoder(dictFile, bufferType);
}
return null;
}
@@ -411,7 +411,7 @@ public final class FormatSpec {
if (dictFile.isDirectory()) {
return new Ver4DictDecoder(dictFile, factory);
} else if (dictFile.isFile()) {
- return new Ver3DictDecoder(dictFile, factory);
+ return new Ver2DictDecoder(dictFile, factory);
}
return null;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index acab4f8a5..e9667ab0b 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -34,18 +34,11 @@ import java.util.ArrayList;
import java.util.Arrays;
/**
- * An implementation of DictDecoder for version 3 binary dictionary.
+ * An implementation of DictDecoder for version 2 binary dictionary.
*/
@UsedForTesting
-public class Ver3DictDecoder extends AbstractDictDecoder {
- private static final String TAG = Ver3DictDecoder.class.getSimpleName();
-
- static {
- JniUtils.loadNativeLibrary();
- }
-
- // TODO: implement something sensical instead of just a phony method
- private static native int doNothing();
+public class Ver2DictDecoder extends AbstractDictDecoder {
+ private static final String TAG = Ver2DictDecoder.class.getSimpleName();
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
private static int readFrequency(final DictBuffer dictBuffer) {
@@ -57,7 +50,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
private final DictionaryBufferFactory mBufferFactory;
protected DictBuffer mDictBuffer;
- /* package */ Ver3DictDecoder(final File file, final int factoryFlag) {
+ /* package */ Ver2DictDecoder(final File file, final int factoryFlag) {
mDictionaryBinaryFile = file;
mDictBuffer = null;
@@ -72,7 +65,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
}
}
- /* package */ Ver3DictDecoder(final File file, final DictionaryBufferFactory factory) {
+ /* package */ Ver2DictDecoder(final File file, final DictionaryBufferFactory factory) {
mDictionaryBinaryFile = file;
mBufferFactory = factory;
}
@@ -166,7 +159,7 @@ public class Ver3DictDecoder extends AbstractDictDecoder {
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
+ addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
addressPointer);
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index 92eb861d6..665544228 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
@@ -31,16 +31,16 @@ import java.util.ArrayList;
import java.util.Iterator;
/**
- * An implementation of DictEncoder for version 3 binary dictionary.
+ * An implementation of DictEncoder for version 2 binary dictionary.
*/
-public class Ver3DictEncoder implements DictEncoder {
+public class Ver2DictEncoder implements DictEncoder {
private final File mDictFile;
private OutputStream mOutStream;
private byte[] mBuffer;
private int mPosition;
- public Ver3DictEncoder(final File dictFile) {
+ public Ver2DictEncoder(final File dictFile) {
mDictFile = dictFile;
mOutStream = null;
mBuffer = null;
@@ -49,7 +49,7 @@ public class Ver3DictEncoder implements DictEncoder {
// This constructor is used only by BinaryDictOffdeviceUtilsTests.
// If you want to use this in the production code, you should consider keeping consistency of
// the interface of Ver3DictDecoder by using factory.
- public Ver3DictEncoder(final OutputStream outStream) {
+ public Ver2DictEncoder(final OutputStream outStream) {
mDictFile = null;
mOutStream = outStream;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java
index 07adda625..6419340ff 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictUpdater.java
@@ -27,14 +27,14 @@ import java.io.OutputStream;
import java.util.ArrayList;
/**
- * An implementation of DictUpdater for version 3 binary dictionary.
+ * An implementation of DictUpdater for version 2 binary dictionary.
*/
@UsedForTesting
-public class Ver3DictUpdater extends Ver3DictDecoder implements DictUpdater {
+public class Ver2DictUpdater extends Ver2DictDecoder implements DictUpdater {
private OutputStream mOutStream;
@UsedForTesting
- public Ver3DictUpdater(final File dictFile, final int factoryType) {
+ public Ver2DictUpdater(final File dictFile, final int factoryType) {
// DictUpdater must have an updatable DictBuffer.
super(dictFile, ((factoryType & MASK_DICTBUFFER) == USE_BYTEARRAY)
? USE_BYTEARRAY : USE_WRITABLE_BYTEBUFFER);
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 8b80ebe63..a746f9945 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -1,5 +1,4 @@
/*
-/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,25 +17,15 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
-import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
-import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
+import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
-import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
-import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
-import com.android.inputmethod.latin.utils.CollectionUtils;
-import com.android.inputmethod.latin.utils.FileUtils;
+import com.android.inputmethod.latin.utils.LocaleUtils;
import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
/**
* An implementation of DictEncoder for version 4 binary dictionary.
@@ -44,197 +33,19 @@ import java.util.Iterator;
@UsedForTesting
public class Ver4DictEncoder implements DictEncoder {
private final File mDictPlacedDir;
- private byte[] mTrieBuf;
- private int mTriePos;
- private OutputStream mTrieOutStream;
- private OutputStream mHeaderOutStream;
- private OutputStream mFreqOutStream;
- private OutputStream mUnigramTimestampOutStream;
- private OutputStream mTerminalAddressTableOutStream;
- private File mDictDir;
- private String mBaseFilename;
- private BigramContentWriter mBigramWriter;
- private ShortcutContentWriter mShortcutWriter;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
mDictPlacedDir = dictPlacedDir;
}
- private static class BigramContentWriter extends SparseTableContentWriter {
- private final boolean mWriteTimestamp;
-
- public BigramContentWriter(final String name, final int initialCapacity,
- final File baseDir, final boolean writeTimestamp) {
- super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity,
- FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp));
- mWriteTimestamp = writeTimestamp;
- }
-
- private static String[] getContentFilenames(final String name,
- final boolean writeTimestamp) {
- final String[] contentFilenames;
- if (writeTimestamp) {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
- name + FormatSpec.BIGRAM_FILE_EXTENSION };
- } else {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
- }
- return contentFilenames;
- }
-
- private static String[] getContentIds(final boolean writeTimestamp) {
- final String[] contentIds;
- if (writeTimestamp) {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
- FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
- } else {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
- }
- return contentIds;
- }
-
- public void writeBigramsForOneWord(final int terminalId, final int bigramCount,
- final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
- throws IOException {
- write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- writeBigramsForOneWordInternal(outStream, bigramIterator, dict);
- }});
- if (mWriteTimestamp) {
- write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream,
- bigramCount);
- }});
- }
- }
-
- private void writeBigramsForOneWordInternal(final OutputStream outStream,
- final Iterator<WeightedString> bigramIterator, final FusionDictionary dict)
- throws IOException {
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int unigramFrequencyForThisWord = target.mFrequency;
- final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
- bigramIterator.hasNext(), 0, bigram.mFrequency,
- unigramFrequencyForThisWord, bigram.mWord);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, bigramFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, target.mTerminalId,
- FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
- }
- }
-
- private void initBigramTimestampsCountersAndLevelsForOneWordInternal(
- final OutputStream outStream, final int bigramCount) throws IOException {
- for (int i = 0; i < bigramCount; ++i) {
- // TODO: Figure out what initial values should be.
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_TIMESTAMP_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_COUNTER_SIZE);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */,
- FormatSpec.BIGRAM_LEVEL_SIZE);
- }
- }
- }
-
- private static class ShortcutContentWriter extends SparseTableContentWriter {
- public ShortcutContentWriter(final String name, final int initialCapacity,
- final File baseDir) {
- super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity,
- FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
- new String[] { FormatSpec.SHORTCUT_CONTENT_ID });
- }
-
- public void writeShortcutForOneWord(final int terminalId,
- final Iterator<WeightedString> shortcutIterator) throws IOException {
- write(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
- new SparseTableContentWriterInterface() {
- @Override
- public void write(final OutputStream outStream) throws IOException {
- writeShortcutForOneWordInternal(outStream, shortcutIterator);
- }
- });
- }
-
- private void writeShortcutForOneWordInternal(final OutputStream outStream,
- final Iterator<WeightedString> shortcutIterator) throws IOException {
- while (shortcutIterator.hasNext()) {
- final WeightedString target = shortcutIterator.next();
- final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
- shortcutIterator.hasNext(), target.mFrequency);
- BinaryDictEncoderUtils.writeUIntToStream(outStream, shortcutFlags,
- FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- CharEncoding.writeString(outStream, target.mWord);
- }
- }
- }
-
- private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
- throws FileNotFoundException, IOException {
- final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
- mBaseFilename = header.getId() + "." + header.getVersion();
- mDictDir = new File(mDictPlacedDir, mBaseFilename);
- final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
- final File headerFile = new File(mDictDir,
- mBaseFilename + FormatSpec.HEADER_FILE_EXTENSION);
- final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
- final File timestampFile = new File(mDictDir,
- mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION);
- final File terminalAddressTableFile = new File(mDictDir,
- mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
- if (!mDictDir.isDirectory()) {
- if (mDictDir.exists()) {
- FileUtils.deleteRecursively(mDictDir);
- }
- mDictDir.mkdirs();
- }
- mTrieOutStream = new FileOutputStream(trieFile);
- mHeaderOutStream = new FileOutputStream(headerFile);
- mFreqOutStream = new FileOutputStream(freqFile);
- mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
- if (formatOptions.mHasTimestamp) {
- mUnigramTimestampOutStream = new FileOutputStream(timestampFile);
- }
- }
-
- private void close() throws IOException {
- try {
- if (mTrieOutStream != null) {
- mTrieOutStream.close();
- }
- if (mHeaderOutStream != null) {
- mHeaderOutStream.close();
- }
- if (mFreqOutStream != null) {
- mFreqOutStream.close();
- }
- if (mTerminalAddressTableOutStream != null) {
- mTerminalAddressTableOutStream.close();
- }
- if (mUnigramTimestampOutStream != null) {
- mUnigramTimestampOutStream.close();
- }
- } finally {
- mTrieOutStream = null;
- mHeaderOutStream = null;
- mFreqOutStream = null;
- mTerminalAddressTableOutStream = null;
- }
- }
-
+ // TODO: This builds a FusionDictionary first and iterates it to add words to the binary
+ // dictionary. However, it is possible to just add words directly to the binary dictionary
+ // instead.
+ // In the long run, when we stop supporting version 2, FusionDictionary will become deprecated
+ // and we can remove it. Then we'll be able to just call BinaryDictionary directly.
@Override
- public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
+ public void writeDictionary(FusionDictionary dict, FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
if (formatOptions.mVersion != FormatSpec.VERSION4) {
throw new UnsupportedFormatException("File header has a wrong version number : "
@@ -243,208 +54,70 @@ public class Ver4DictEncoder implements DictEncoder {
if (!mDictPlacedDir.isDirectory()) {
throw new UnsupportedFormatException("Given path is not a directory.");
}
-
- if (mTrieOutStream == null) {
- openStreams(formatOptions, dict.mOptions);
- }
-
- BinaryDictEncoderUtils.writeDictionaryHeader(mHeaderOutStream, dict, formatOptions);
-
- MakedictLog.i("Flattening the tree...");
- ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
- int terminalCount = 0;
- final ArrayList<PtNode> nodes = CollectionUtils.newArrayList();
- for (final PtNodeArray array : flatNodes) {
- for (final PtNode node : array.mData) {
- if (node.isTerminal()) {
- nodes.add(node);
- node.mTerminalId = terminalCount++;
+ if (!BinaryDictionary.createEmptyDictFile(mDictPlacedDir.getAbsolutePath(),
+ FormatSpec.VERSION4, dict.mOptions.mAttributes)) {
+ throw new IOException("Cannot create dictionary file");
+ }
+ final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(),
+ 0l, mDictPlacedDir.length(), true /* useFullEditDistance */,
+ LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get(
+ FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE)),
+ Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */,
+ true /* isUpdatable */);
+ if (!binaryDict.isValidDictionary()) {
+ // Somehow createEmptyDictFile returned true, but the file was not created correctly
+ throw new IOException("Cannot create dictionary file");
+ }
+ for (final Word word : dict) {
+ // TODO: switch to addMultipleDictionaryEntries when they support shortcuts
+ if (null == word.mShortcutTargets || word.mShortcutTargets.isEmpty()) {
+ binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ null /* shortcutTarget */, 0 /* shortcutProbability */,
+ word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
+ } else {
+ for (final WeightedString shortcutTarget : word.mShortcutTargets) {
+ binaryDict.addUnigramWord(word.mWord, word.mFrequency,
+ shortcutTarget.mWord, shortcutTarget.mFrequency,
+ word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
}
}
- }
- Collections.sort(nodes, new Comparator<PtNode>() {
- @Override
- public int compare(final PtNode lhs, final PtNode rhs) {
- if (lhs.mFrequency != rhs.mFrequency) {
- return lhs.mFrequency < rhs.mFrequency ? -1 : 1;
- }
- if (lhs.mTerminalId < rhs.mTerminalId) return -1;
- if (lhs.mTerminalId > rhs.mTerminalId) return 1;
- return 0;
+ if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDict.flushWithGC();
}
- });
- int count = 0;
- for (final PtNode node : nodes) {
- node.mTerminalId = count++;
- }
-
- MakedictLog.i("Computing addresses...");
- BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
- if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
-
- writeTerminalData(flatNodes, terminalCount);
- if (formatOptions.mHasTimestamp) {
- initUnigramTimestamps(terminalCount);
- }
- mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir,
- formatOptions.mHasTimestamp);
- writeBigrams(flatNodes, dict);
- mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir);
- writeShortcuts(flatNodes);
-
- final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
- final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
- mTrieBuf = new byte[bufferSize];
-
- MakedictLog.i("Writing file...");
- for (PtNodeArray nodeArray : flatNodes) {
- BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
}
- if (MakedictLog.DBG) {
- BinaryDictEncoderUtils.showStatistics(flatNodes);
- MakedictLog.i("has " + terminalCount + " terminals.");
+ for (final Word word0 : dict) {
+ if (null == word0.mBigrams) continue;
+ for (final WeightedString word1 : word0.mBigrams) {
+ binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency,
+ 0 /* timestamp */);
+ }
+ if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
+ binaryDict.flushWithGC();
+ }
}
- mTrieOutStream.write(mTrieBuf);
-
- MakedictLog.i("Done");
- close();
+ binaryDict.flushWithGC();
+ binaryDict.close();
}
@Override
public void setPosition(int position) {
- if (mTrieBuf == null || position < 0 || position > mTrieBuf.length) return;
- mTriePos = position;
}
@Override
public int getPosition() {
- return mTriePos;
+ return 0;
}
@Override
public void writePtNodeCount(int ptNodeCount) {
- final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
- // ptNodeCount must fit on one byte or two bytes.
- // Please see comments in FormatSpec
- if (countSize != 1 && countSize != 2) {
- throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize);
- }
- final int encodedPtNodeCount = (countSize == 2) ?
- (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, encodedPtNodeCount,
- countSize);
- }
-
- private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
- BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions),
- FormatSpec.PTNODE_FLAGS_SIZE);
- }
-
- private void writeParentPosition(int parentPos, final PtNode ptNode,
- final FormatOptions formatOptions) {
- if (parentPos != FormatSpec.NO_PARENT_ADDRESS) {
- parentPos -= ptNode.mCachedAddressAfterUpdate;
- }
- mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos,
- formatOptions);
- }
-
- private void writeCharacters(final int[] characters, final boolean hasSeveralChars) {
- mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos);
- if (hasSeveralChars) {
- mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
- }
- }
-
- private void writeTerminalId(final int terminalId) {
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId,
- FormatSpec.PTNODE_TERMINAL_ID_SIZE);
- }
-
- private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) {
- final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
- if (formatOptions.supportsDynamicUpdate()) {
- mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf,
- mTriePos, childrenPos);
- } else {
- mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
- mTriePos, childrenPos);
- }
- }
-
- private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
- throws IOException {
- mBigramWriter.openStreams();
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.mBigrams != null) {
- mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(),
- ptNode.mBigrams.iterator(), dict);
- }
- }
- }
- mBigramWriter.closeStreams();
- }
-
- private void writeShortcuts(final ArrayList<PtNodeArray> flatNodes) throws IOException {
- mShortcutWriter.openStreams();
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.mShortcutTargets != null && !ptNode.mShortcutTargets.isEmpty()) {
- mShortcutWriter.writeShortcutForOneWord(ptNode.mTerminalId,
- ptNode.mShortcutTargets.iterator());
- }
- }
- }
- mShortcutWriter.closeStreams();
}
@Override
public void writeForwardLinkAddress(int forwardLinkAddress) {
- mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos,
- forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
}
@Override
- public void writePtNode(final PtNode ptNode, final int parentPosition,
- final FormatOptions formatOptions, final FusionDictionary dict) {
- writePtNodeFlags(ptNode, formatOptions);
- writeParentPosition(parentPosition, ptNode, formatOptions);
- writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
- if (ptNode.isTerminal()) {
- writeTerminalId(ptNode.mTerminalId);
- }
- writeChildrenPosition(ptNode, formatOptions);
- }
-
- private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,
- final int terminalCount) throws IOException {
- final byte[] freqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE];
- final byte[] terminalAddressTableBuf =
- new byte[terminalCount * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE];
- for (final PtNodeArray nodeArray : flatNodes) {
- for (final PtNode ptNode : nodeArray.mData) {
- if (ptNode.isTerminal()) {
- BinaryDictEncoderUtils.writeUIntToBuffer(freqBuf,
- ptNode.mTerminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE,
- ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
- BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf,
- ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
- ptNode.mCachedAddressAfterUpdate,
- FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
- }
- }
- }
- mFreqOutStream.write(freqBuf);
- mTerminalAddressTableOutStream.write(terminalAddressTableBuf);
- }
-
- private void initUnigramTimestamps(final int terminalCount) throws IOException {
- // Initial value of time stamps for each word is 0.
- final byte[] unigramTimestampBuf =
- new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE];
- mUnigramTimestampOutStream.write(unigramTimestampBuf);
+ public void writePtNode(
+ PtNode ptNode, int parentPosition, FormatOptions formatOptions, FusionDictionary dict) {
}
}