aboutsummaryrefslogtreecommitdiffstats
path: root/java
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-01-29 20:19:24 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-01-29 20:19:24 +0900
commit26bd46095a05843e7574dfcf7db53406f215525d (patch)
treed3b36dd577a46bb2f907358eba927cbfb51df2aa /java
parenta454a7b85c513fb7b38e70e4e199958e0060c667 (diff)
downloadlatinime-26bd46095a05843e7574dfcf7db53406f215525d.tar.gz
latinime-26bd46095a05843e7574dfcf7db53406f215525d.tar.xz
latinime-26bd46095a05843e7574dfcf7db53406f215525d.zip
Reading dictionary containing timestamps in Java Side.
Just skipping historical information fields. Bug: 11281877 Change-Id: I43d2adaa576b7da11ed3ca54990265dbb6f53b08
Diffstat (limited to 'java')
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java6
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java1
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java11
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FormatSpec.java12
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java1
-rw-r--r--java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java69
6 files changed, 48 insertions, 52 deletions
diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
index f8fa68f45..370782b33 100644
--- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java
@@ -48,7 +48,7 @@ public abstract class AbstractDictDecoder implements DictDecoder {
throw new UnsupportedFormatException("Unsupported version : " + version);
}
// TODO: Remove this field.
- final int optionsFlags = HeaderReader.readOptionFlags(headerBuffer);
+ HeaderReader.readOptionFlags(headerBuffer);
final int headerSize = HeaderReader.readHeaderSize(headerBuffer);
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
@@ -59,8 +59,8 @@ public abstract class AbstractDictDecoder implements DictDecoder {
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes),
- new FormatOptions(version,
- 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
+ new FormatOptions(version, FileHeader.ATTRIBUTE_VALUE_TRUE.equals(
+ attributes.get(FileHeader.HAS_HISTORICAL_INFO_ATTRIBUTE))));
return header;
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 9a24c47af..31747155e 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -499,7 +499,6 @@ public final class BinaryDictDecoderUtils {
final int nodeArrayOriginPos = dictDecoder.getPosition();
do { // Scan the linked-list node.
- final int nodeArrayHeadPos = dictDecoder.getPosition();
final int count = dictDecoder.readPtNodeCount();
int groupPos = dictDecoder.getPosition();
for (int i = count; i > 0; --i) { // Scan the array of PtNode.
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index bb40e0dd5..eff8fc375 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -756,14 +756,6 @@ public class BinaryDictEncoderUtils {
}
/**
- * Makes the 2-byte value for options flags. Unused at the moment, and always 0.
- */
- private static final int makeOptionsValue(final FormatOptions formatOptions) {
- // TODO: why doesn't this handle CONTAINS_TIMESTAMP_FLAG?
- return 0;
- }
-
- /**
* Makes the flag value for a shortcut.
*
* @param more whether there are more attributes after this one.
@@ -949,7 +941,8 @@ public class BinaryDictEncoderUtils {
headerBuffer.write((byte) (0xFF & version));
// Options flags
- final int options = makeOptionsValue(formatOptions);
+ // TODO: Remove this field.
+ final int options = 0;
headerBuffer.write((byte) (0xFF & (options >> 8)));
headerBuffer.write((byte) (0xFF & options));
final int headerSizeOffset = headerBuffer.size();
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 437fa942b..61c17fc46 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -192,10 +192,6 @@ public final class FormatSpec {
static final int MINIMUM_SUPPORTED_VERSION = VERSION2;
static final int MAXIMUM_SUPPORTED_VERSION = VERSION4;
- // These options need to be the same numeric values as the one in the native reading code.
- // TODO: Make the native reading code read this variable.
- static final int CONTAINS_TIMESTAMP_FLAG = 0x10;
-
// TODO: Make this value adaptative to content data, store it in the header, and
// use it in the reading code.
static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
@@ -249,26 +245,26 @@ public final class FormatSpec {
static final String TRIE_FILE_EXTENSION = ".trie";
public static final String HEADER_FILE_EXTENSION = ".header";
static final String FREQ_FILE_EXTENSION = ".freq";
- static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp";
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
static final String BIGRAM_FILE_EXTENSION = ".bigram";
static final String SHORTCUT_FILE_EXTENSION = ".shortcut";
static final String LOOKUP_TABLE_FILE_SUFFIX = "_lookup";
static final String CONTENT_TABLE_FILE_SUFFIX = "_index";
+ static final int FLAGS_IN_FREQ_FILE_SIZE = 1;
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
static final int UNIGRAM_TIMESTAMP_SIZE = 4;
+ static final int UNIGRAM_COUNTER_SIZE = 1;
+ static final int UNIGRAM_LEVEL_SIZE = 1;
// With the English main dictionary as of October 2013, the size of bigram address table is
// is 345KB with the block size being 16.
// This is 54% of that of full address table.
static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
- static final int BIGRAM_CONTENT_COUNT = 2;
+ static final int BIGRAM_CONTENT_COUNT = 1;
static final int BIGRAM_FREQ_CONTENT_INDEX = 0;
- static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1;
static final String BIGRAM_FREQ_CONTENT_ID = "_freq";
- static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp";
static final int BIGRAM_TIMESTAMP_SIZE = 4;
static final int BIGRAM_COUNTER_SIZE = 1;
static final int BIGRAM_LEVEL_SIZE = 1;
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index fdf2ae7b5..5b0e8399a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -61,6 +61,7 @@ public final class FusionDictionary implements Iterable<Word> {
mData = new ArrayList<PtNode>();
}
public PtNodeArray(ArrayList<PtNode> data) {
+ Collections.sort(data, PTNODE_COMPARATOR);
mData = data;
}
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 7071893d2..f23022992 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -143,7 +143,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
mBigramReader = new BigramContentReader(mDictDirectory.getName(),
- mDictDirectory, mBufferFactory, false);
+ mDictDirectory, mBufferFactory);
mBigramReader.openBuffers();
mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory,
mBufferFactory);
@@ -184,39 +184,24 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
*/
protected static class BigramContentReader extends SparseTableContentReader {
public BigramContentReader(final String name, final File baseDir,
- final DictionaryBufferFactory factory, final boolean hasTimestamp) {
+ final DictionaryBufferFactory factory) {
super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
- getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory);
+ getContentFilenames(name), getContentIds(), factory);
}
// TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
- protected static String[] getContentFilenames(final String name,
- final boolean hasTimestamp) {
- final String[] contentFilenames;
- if (hasTimestamp) {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
- name + FormatSpec.BIGRAM_FILE_EXTENSION };
- } else {
- contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
- }
- return contentFilenames;
+ protected static String[] getContentFilenames(final String name) {
+ return new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION };
}
// TODO: Consolidate this method and BigramContentWriter.getContentIds.
- protected static String[] getContentIds(final boolean hasTimestamp) {
- final String[] contentIds;
- if (hasTimestamp) {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
- FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID };
- } else {
- contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
- }
- return contentIds;
+ protected static String[] getContentIds() {
+ return new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID };
}
public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId,
- final DictBuffer terminalAddressTableBuffer) {
+ final DictBuffer terminalAddressTableBuffer, final FormatOptions options) {
final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
new SparseTableContentReaderInterface() {
@@ -226,14 +211,25 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
// If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
// remaining bigram entries are ignored.
final int bigramFlags = buffer.readUnsignedByte();
+ final int probability;
+
+ if (options.mHasTimestamp) {
+ probability = buffer.readUnsignedByte();
+ final int pos = buffer.position();
+ // Skip historical info.
+ buffer.position(pos + FormatSpec.BIGRAM_TIMESTAMP_SIZE
+ + FormatSpec.BIGRAM_LEVEL_SIZE
+ + FormatSpec.BIGRAM_COUNTER_SIZE);
+ } else {
+ probability = bigramFlags
+ & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
+ }
final int targetTerminalId = buffer.readUnsignedInt24();
terminalAddressTableBuffer.position(targetTerminalId
* FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
final int targetAddress =
terminalAddressTableBuffer.readUnsignedInt24();
- bigrams.add(new PendingAttribute(bigramFlags
- & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
- targetAddress));
+ bigrams.add(new PendingAttribute(probability, targetAddress));
if (0 == (bigramFlags
& FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) {
break;
@@ -286,8 +282,19 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
- protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
- frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
+ protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId,
+ final FormatOptions formatOptions) {
+ final int readingPos;
+ if (formatOptions.mHasTimestamp) {
+ final int entrySize = FormatSpec.FREQUENCY_AND_FLAGS_SIZE
+ + FormatSpec.UNIGRAM_TIMESTAMP_SIZE + FormatSpec.UNIGRAM_LEVEL_SIZE
+ + FormatSpec.UNIGRAM_COUNTER_SIZE;
+ readingPos = terminalId * entrySize + FormatSpec.FLAGS_IN_FREQ_FILE_SIZE;
+ } else {
+ readingPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE
+ + FormatSpec.FLAGS_IN_FREQ_FILE_SIZE;
+ }
+ frequencyBuffer.position(readingPos);
return frequencyBuffer.readUnsignedByte();
}
@@ -354,12 +361,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
@Override
- public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
+ public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options);
final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) {
- frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId);
+ frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId, options);
} else {
frequency = PtNode.NOT_A_TERMINAL;
}
@@ -367,7 +374,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts(
nodeInfo.mTerminalId);
final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies(
- nodeInfo.mTerminalId, mTerminalAddressTableBuffer);
+ nodeInfo.mTerminalId, mTerminalAddressTableBuffer, options);
return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags,
nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos,