diff options
Diffstat (limited to 'java/src/com/android/inputmethod/latin')
13 files changed, 542 insertions, 245 deletions
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index c4f96016c..9a9653094 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -174,6 +174,7 @@ public final class Constants { public static final int CODE_SLASH = '/'; public static final int CODE_COMMERCIAL_AT = '@'; public static final int CODE_PLUS = '+'; + public static final int CODE_PERCENT = '%'; public static final int CODE_CLOSING_PARENTHESIS = ')'; public static final int CODE_CLOSING_SQUARE_BRACKET = ']'; public static final int CODE_CLOSING_CURLY_BRACKET = '}'; diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 10f643c49..65ff95458 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -196,9 +196,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private int mLastSelectionStart = NOT_A_CURSOR_POSITION; private int mLastSelectionEnd = NOT_A_CURSOR_POSITION; - // Whether we are expecting an onUpdateSelection event to fire. If it does when we don't - // "expect" it, it means the user actually moved the cursor. - private boolean mExpectingUpdateSelection; private int mDeleteCount; private long mLastKeyTime; private final TreeSet<Long> mCurrentlyPressedHardwareKeys = CollectionUtils.newTreeSet(); @@ -1081,16 +1078,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen + ", ce=" + composingSpanEnd); } if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { - final boolean expectingUpdateSelectionFromLogger = - ResearchLogger.getAndClearLatinIMEExpectingUpdateSelection(); ResearchLogger.latinIME_onUpdateSelection(mLastSelectionStart, mLastSelectionEnd, oldSelStart, oldSelEnd, newSelStart, newSelEnd, composingSpanStart, - composingSpanEnd, mExpectingUpdateSelection, - expectingUpdateSelectionFromLogger, mConnection); - if (expectingUpdateSelectionFromLogger) { - // TODO: Investigate. Quitting now sounds wrong - we won't do the resetting work - return; - } + composingSpanEnd, mConnection); } final boolean selectionChanged = mLastSelectionStart != newSelStart @@ -1109,14 +1099,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // TODO: revisit this when LatinIME supports hardware keyboards. // NOTE: the test harness subclasses LatinIME and overrides isInputViewShown(). // TODO: find a better way to simulate actual execution. - if (isInputViewShown() && !mExpectingUpdateSelection + if (isInputViewShown() && !mConnection.isBelatedExpectedUpdate(oldSelStart, newSelStart)) { - // TAKE CARE: there is a race condition when we enter this test even when the user - // did not explicitly move the cursor. This happens when typing fast, where two keys - // turn this flag on in succession and both onUpdateSelection() calls arrive after - // the second one - the first call successfully avoids this test, but the second one - // enters. For the moment we rely on noComposingSpan to further reduce the impact. - // TODO: the following is probably better done in resetEntireInputState(). // it should only happen when the cursor moved, and the very purpose of the // test below is to narrow down whether this happened or not. Likewise with @@ -1161,7 +1145,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mRecapitalizeStatus.deactivate(); mKeyboardSwitcher.updateShiftState(); } - mExpectingUpdateSelection = false; // Make a note of the cursor position mLastSelectionStart = newSelStart; @@ -1502,6 +1485,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen || codePoint == Constants.CODE_CLOSING_CURLY_BRACKET || codePoint == Constants.CODE_CLOSING_ANGLE_BRACKET || codePoint == Constants.CODE_PLUS + || codePoint == Constants.CODE_PERCENT || Character.getType(codePoint) == Character.OTHER_SYMBOL; } @@ -1739,7 +1723,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } handleCharacter(primaryCode, keyX, keyY, spaceState); } - mExpectingUpdateSelection = true; return didAutoCorrect; } @@ -1805,7 +1788,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } else { commitTyped(LastComposedWord.NOT_A_SEPARATOR); } - mExpectingUpdateSelection = true; } final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (Character.isLetterOrDigit(codePointBeforeCursor) @@ -1995,7 +1977,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mWordComposer.setBatchInputWord(batchInputText); mConnection.setComposingText(batchInputText, 1); } - mExpectingUpdateSelection = true; mConnection.endBatchEdit(); if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0, suggestedWords); @@ -2049,9 +2030,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } private void handleBackspace(final int spaceState) { - // We revert these in this method if the deletion doesn't happen. mDeleteCount++; - mExpectingUpdateSelection = true; // In many cases, we may have to put the keyboard in auto-shift state again. However // we want to wait a few milliseconds before doing it to avoid the keyboard flashing @@ -2144,10 +2123,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } final int codePointBeforeCursor = mConnection.getCodePointBeforeCursor(); if (codePointBeforeCursor == Constants.NOT_A_CODE) { - // Nothing to delete before the cursor. We have to revert the deletion states - // that were updated at the beginning of this method. - mDeleteCount--; - mExpectingUpdateSelection = false; + // Nothing to delete before the cursor. return; } final int lengthToDelete = @@ -2318,9 +2294,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen if (!mRecapitalizeStatus.isSetAt(mLastSelectionStart, mLastSelectionEnd)) { mLastSelectionStart = mRecapitalizeStatus.getNewCursorStart(); mLastSelectionEnd = mRecapitalizeStatus.getNewCursorEnd(); - mConnection.setSelection(mLastSelectionStart, mLastSelectionEnd); } } + mConnection.finishComposingText(); mRecapitalizeStatus.rotate(); final int numCharsDeleted = mLastSelectionEnd - mLastSelectionStart; mConnection.setSelection(mLastSelectionEnd, mLastSelectionEnd); @@ -2656,7 +2632,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection, separator, mWordComposer.isBatchMode(), suggestedWords); } - mExpectingUpdateSelection = true; commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD, separator); if (!typedWord.equals(autoCorrection)) { @@ -2727,7 +2702,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen // typed word. final String replacedWord = mWordComposer.getTypedWord(); LatinImeLogger.logOnManualSuggestion(replacedWord, suggestion, index, suggestedWords); - mExpectingUpdateSelection = true; commitChosenWord(suggestion, LastComposedWord.COMMIT_TYPE_MANUAL_PICK, LastComposedWord.NOT_A_SEPARATOR); if (ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS) { diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index 9f7f502ea..fda97dafc 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -60,7 +60,8 @@ public abstract class AbstractDictDecoder implements DictDecoder { 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE), + 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 216492b4d..8109321b6 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -225,20 +225,26 @@ public final class BinaryDictDecoderUtils { * * @param buffer the OutputStream to write to. * @param word the string to write. + * @return the size written, in bytes. */ - static void writeString(final OutputStream buffer, final String word) throws IOException { + static int writeString(final OutputStream buffer, final String word) throws IOException { final int length = word.length(); + int written = 0; for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { + final int charSize = getCharSize(codePoint); + if (1 == charSize) { buffer.write((byte) codePoint); } else { buffer.write((byte) (0xFF & (codePoint >> 16))); buffer.write((byte) (0xFF & (codePoint >> 8))); buffer.write((byte) (0xFF & codePoint)); } + written += charSize; } buffer.write(FormatSpec.PTNODE_CHARACTERS_TERMINATOR); + written += FormatSpec.PTNODE_TERMINATOR_SIZE; + return written; } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index 0f7d2f6c9..9a28629b1 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -301,35 +301,6 @@ public final class BinaryDictIOUtils { } /** - * Write a string to a stream. - * - * @param destination the stream to write. - * @param word the string to be written. - * @return the size written, in bytes. - * @throws IOException - */ - private static int writeString(final OutputStream destination, final String word) - throws IOException { - int size = 0; - final int length = word.length(); - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (CharEncoding.getCharSize(codePoint) == 1) { - destination.write((byte)codePoint); - size++; - } else { - destination.write((byte)(0xFF & (codePoint >> 16))); - destination.write((byte)(0xFF & (codePoint >> 8))); - destination.write((byte)(0xFF & codePoint)); - size += 3; - } - } - destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR); - size += FormatSpec.PTNODE_TERMINATOR_SIZE; - return size; - } - - /** * Write a PtNode to an output stream from a PtNodeInfo. * A PtNode is an in-memory representation of a node in the patricia trie. * A PtNode info is a container for low-level information about how the @@ -387,7 +358,7 @@ public final class BinaryDictIOUtils { destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency)); size++; - size += writeString(destination, target.mWord); + size += CharEncoding.writeString(destination, target.mWord); } } diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index 3dbeee099..b8636eecd 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -43,7 +43,7 @@ public interface DictDecoder { public FileHeader readHeader() throws IOException, UnsupportedFormatException; /** - * Reads PtNode from nodeAddress. + * Reads PtNode from ptNodePos. * @param ptNodePos the position of PtNode. * @param formatOptions the format options. * @return PtNodeInfo. diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 336277196..28da9ffdd 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.IOException; import java.io.OutputStream; @@ -217,6 +218,25 @@ public final class DynamicBinaryDictIOUtils { } /** + * Converts a list of WeightedString to a list of PendingAttribute. + */ + public static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater, + final ArrayList<WeightedString> bigramStrings) + throws IOException, UnsupportedFormatException { + if (bigramStrings == null) return CollectionUtils.newArrayList(); + final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); + for (final WeightedString bigram : bigramStrings) { + final int pos = dictUpdater.getTerminalPosition(bigram.mWord); + if (pos == FormatSpec.NOT_VALID_WORD) { + // TODO: figure out what is the correct thing to do here. + } else { + bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); + } + } + return bigrams; + } + + /** * Insert a word into a binary dictionary. * * @param dictUpdater the dict updater. @@ -238,18 +258,9 @@ public final class DynamicBinaryDictIOUtils { final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { - final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); + final ArrayList<PendingAttribute> bigrams = resolveBigramPositions(dictUpdater, + bigramStrings); final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - if (bigramStrings != null) { - for (final WeightedString bigram : bigramStrings) { - int position = dictUpdater.getTerminalPosition(bigram.mWord); - if (position == FormatSpec.NOT_VALID_WORD) { - // TODO: figure out what is the correct thing to do here. - } else { - bigrams.add(new PendingAttribute(bigram.mFrequency, position)); - } - } - } final boolean isTerminal = true; final boolean hasBigrams = !bigrams.isEmpty(); diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 5a5d7af6b..6d5827023 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -37,13 +37,15 @@ public final class FormatSpec { * sion * * o | - * p | not used 4 bits - * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG - * i | FRENCH_LIGATURE_PROCESSING_FLAG - * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE - * n | GERMAN_UMLAUT_PROCESSING_FLAG - * f | - * lags + * p | not used 3 bits + * t | each unigram and bigram entry has a time stamp? + * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG + * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG + * n | FRENCH_LIGATURE_PROCESSING_FLAG + * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE + * l | GERMAN_UMLAUT_PROCESSING_FLAG + * a | + * gs * * h | * e | size of the file header, 4bytes @@ -211,6 +213,7 @@ public final class FormatSpec { static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; static final int CONTAINS_BIGRAMS_FLAG = 0x8; + static final int CONTAINS_TIMESTAMP_FLAG = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. @@ -263,6 +266,7 @@ public final class FormatSpec { // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; + static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; @@ -271,14 +275,20 @@ public final class FormatSpec { static final String CONTENT_TABLE_FILE_SUFFIX = "_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; + static final int UNIGRAM_TIMESTAMP_SIZE = 4; // With the English main dictionary as of October 2013, the size of bigram address table is // is 584KB with the block size being 4. // This is 91% of that of full address table. static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; - static final int BIGRAM_CONTENT_COUNT = 1; + static final int BIGRAM_CONTENT_COUNT = 2; static final int BIGRAM_FREQ_CONTENT_INDEX = 0; + static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1; static final String BIGRAM_FREQ_CONTENT_ID = "_freq"; + static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp"; + static final int BIGRAM_TIMESTAMP_SIZE = 4; + static final int BIGRAM_COUNTER_SIZE = 1; + static final int BIGRAM_LEVEL_SIZE = 1; static final int SHORTCUT_CONTENT_COUNT = 1; static final int SHORTCUT_CONTENT_INDEX = 0; @@ -321,6 +331,7 @@ public final class FormatSpec { public final int mVersion; public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; + public final boolean mHasTimestamp; @UsedForTesting public FormatOptions(final int version) { this(version, false); @@ -328,6 +339,11 @@ public final class FormatSpec { @UsedForTesting public FormatOptions(final int version, final boolean supportsDynamicUpdate) { + this(version, supportsDynamicUpdate, false /* hasTimestamp */); + } + + public FormatOptions(final int version, final boolean supportsDynamicUpdate, + final boolean hasTimestamp) { mVersion = version; if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { throw new RuntimeException("Dynamic updates are only supported with versions " @@ -335,6 +351,7 @@ public final class FormatSpec { } mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); + mHasTimestamp = hasTimestamp; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java new file mode 100644 index 000000000..00f401ea7 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * An auxiliary class for reading SparseTable and data written by SparseTableContentWriter. + */ +public class SparseTableContentReader { + + /** + * An interface of a function which is passed to SparseTableContentReader.read. + */ + public interface SparseTableContentReaderInterface { + /** + * Reads data. + * + * @param buffer the DictBuffer. The position of the buffer is set to the head of data. + */ + public void read(final DictBuffer buffer); + } + + private final int mContentCount; + private final int mBlockSize; + protected final File mBaseDir; + private final File mLookupTableFile; + private final File[] mAddressTableFiles; + private final File[] mContentFiles; + private DictBuffer mLookupTableBuffer; + private final DictBuffer[] mAddressTableBuffers; + private final DictBuffer[] mContentBuffers; + private final DictionaryBufferFactory mFactory; + + /** + * Sole constructor of SparseTableContentReader. + * + * @param name the name of SparseTable. + * @param blockSize the block size of the content table. + * @param baseDir the directory which contains the files of the content table. + * @param contentFilenames the file names of content files. + * @param contentIds the ids of contents. These ids are used for a suffix of a name of + * address files and content files. + * @param factory the DictionaryBufferFactory which is used for opening the files. + */ + public SparseTableContentReader(final String name, final int blockSize, final File baseDir, + final String[] contentFilenames, final String[] contentIds, + final DictionaryBufferFactory factory) { + if (contentFilenames.length != contentIds.length) { + throw new RuntimeException("The length of contentFilenames and the length of" + + " contentIds are different " + contentFilenames.length + ", " + + contentIds.length); + } + mBlockSize = blockSize; + mBaseDir = baseDir; + mFactory = factory; + mContentCount = contentFilenames.length; + mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); + mAddressTableFiles = new File[mContentCount]; + mContentFiles = new File[mContentCount]; + for (int i = 0; i < mContentCount; ++i) { + mAddressTableFiles[i] = new File(mBaseDir, + name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); + mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); + } + mAddressTableBuffers = new DictBuffer[mContentCount]; + mContentBuffers = new DictBuffer[mContentCount]; + } + + public void openBuffers() throws FileNotFoundException, IOException { + mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile); + for (int i = 0; i < mContentCount; ++i) { + mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]); + mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]); + } + } + + protected void read(final int contentIndex, final int index, + final SparseTableContentReaderInterface reader) { + if (index < 0 || (index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES + >= mLookupTableBuffer.limit()) { + return; + } + + mLookupTableBuffer.position((index / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); + final int posInAddressTable = mLookupTableBuffer.readInt(); + if (posInAddressTable == SparseTable.NOT_EXIST) { + return; + } + + mAddressTableBuffers[contentIndex].position( + (posInAddressTable + index % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES); + final int address = mAddressTableBuffers[contentIndex].readInt(); + if (address == SparseTable.NOT_EXIST) { + return; + } + + mContentBuffers[contentIndex].position(address); + reader.read(mContentBuffers[contentIndex]); + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java new file mode 100644 index 000000000..49f0fd624 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentWriter.java @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * An auxiliary class for writing data associated with SparseTable to files. + */ +public class SparseTableContentWriter { + public interface SparseTableContentWriterInterface { + public void write(final OutputStream outStream) throws IOException; + } + + private final int mContentCount; + private final SparseTable mSparseTable; + private final File mLookupTableFile; + protected final File mBaseDir; + private final File[] mAddressTableFiles; + private final File[] mContentFiles; + protected final OutputStream[] mContentOutStreams; + + /** + * Sole constructor of SparseTableContentWriter. + * + * @param name the name of SparseTable. + * @param initialCapacity the initial capacity of SparseTable. + * @param blockSize the block size of the content table. + * @param baseDir the directory which contains the files of the content table. + * @param contentFilenames the file names of content files. + * @param contentIds the ids of contents. These ids are used for a suffix of a name of address + * files and content files. + */ + public SparseTableContentWriter(final String name, final int initialCapacity, + final int blockSize, final File baseDir, final String[] contentFilenames, + final String[] contentIds) { + if (contentFilenames.length != contentIds.length) { + throw new RuntimeException("The length of contentFilenames and the length of" + + " contentIds are different " + contentFilenames.length + ", " + + contentIds.length); + } + mContentCount = contentFilenames.length; + mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); + mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); + mAddressTableFiles = new File[mContentCount]; + mContentFiles = new File[mContentCount]; + mBaseDir = baseDir; + for (int i = 0; i < mContentCount; ++i) { + mAddressTableFiles[i] = new File(mBaseDir, + name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); + mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); + } + mContentOutStreams = new OutputStream[mContentCount]; + } + + public void openStreams() throws FileNotFoundException { + for (int i = 0; i < mContentCount; ++i) { + mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]); + } + } + + protected void write(final int contentIndex, final int index, + final SparseTableContentWriterInterface writer) throws IOException { + mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length()); + writer.write(mContentOutStreams[contentIndex]); + mContentOutStreams[contentIndex].flush(); + } + + public void closeStreams() throws IOException { + mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles); + for (int i = 0; i < mContentCount; ++i) { + mContentOutStreams[i].close(); + } + } +}
\ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 53729075f..add03c323 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -51,10 +51,30 @@ public class Ver4DictDecoder extends AbstractDictDecoder { protected DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; - private DictBuffer mBigramBuffer; - private DictBuffer mShortcutBuffer; - private SparseTable mBigramAddressTable; - private SparseTable mShortcutAddressTable; + private BigramContentReader mBigramReader; + private ShortcutContentReader mShortcutReader; + + /** + * Raw PtNode info straight out of a trie file in version 4 dictionary. + */ + protected static final class Ver4PtNodeInfo { + public final int mFlags; + public final int[] mCharacters; + public final int mTerminalId; + public final int mChildrenPos; + public final int mParentPos; + public final int mNodeSize; + + public Ver4PtNodeInfo(final int flags, final int[] characters, final int terminalId, + final int childrenPos, final int parentPos, final int nodeSize) { + mFlags = flags; + mCharacters = characters; + mTerminalId = terminalId; + mChildrenPos = childrenPos; + mParentPos = parentPos; + mNodeSize = nodeSize; + } + } @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { @@ -108,10 +128,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder { mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); - mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ)); - loadBigramAddressSparseTable(); - mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT)); - loadShortcutAddressSparseTable(); + mBigramReader = new BigramContentReader(mDictDirectory.getName(), + mDictDirectory, mBufferFactory, false); + mBigramReader.openBuffers(); + mShortcutReader = new ShortcutContentReader(mDictDirectory.getName(), mDictDirectory, + mBufferFactory); + mShortcutReader.openBuffers(); } @Override @@ -136,25 +158,112 @@ public class Ver4DictDecoder extends AbstractDictDecoder { return header; } - private void loadBigramAddressSparseTable() throws IOException { - final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - final File freqsFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX - + FormatSpec.BIGRAM_FREQ_CONTENT_ID); - mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile }, - FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); + /** + * An auxiliary class for reading bigrams. + */ + protected static class BigramContentReader extends SparseTableContentReader { + private final boolean mHasTimestamp; + + public BigramContentReader(final String name, final File baseDir, + final DictionaryBufferFactory factory, final boolean hasTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, hasTimestamp), getContentIds(hasTimestamp), factory); + mHasTimestamp = hasTimestamp; + } + + // TODO: Consolidate this method and BigramContentWriter.getContentFilenames. + private static String[] getContentFilenames(final String name, final boolean hasTimestamp) { + final String[] contentFilenames; + if (hasTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } + return contentFilenames; + } + + // TODO: Consolidate this method and BigramContentWriter.getContentIds. + private static String[] getContentIds(final boolean hasTimestamp) { + final String[] contentIds; + if (hasTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; + } + return contentIds; + } + + public ArrayList<PendingAttribute> readTargetsAndFrequencies(final int terminalId, + final DictBuffer terminalAddressTableBuffer) { + final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); + read(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, + new SparseTableContentReaderInterface() { + @Override + public void read(final DictBuffer buffer) { + while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, + // remaining bigram entries are ignored. + final int bigramFlags = buffer.readUnsignedByte(); + final int targetTerminalId = buffer.readUnsignedInt24(); + terminalAddressTableBuffer.position(targetTerminalId + * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + final int targetAddress = + terminalAddressTableBuffer.readUnsignedInt24(); + bigrams.add(new PendingAttribute(bigramFlags + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, + targetAddress)); + if (0 == (bigramFlags + & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { + break; + } + } + if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { + throw new RuntimeException("Too many bigrams in a PtNode (" + + bigrams.size() + " but max is " + + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); + } + } + }); + if (bigrams.isEmpty()) return null; + return bigrams; + } } - // TODO: Let's have something like SparseTableContentsReader in this class. - private void loadShortcutAddressSparseTable() throws IOException { - final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - final File contentFile = new File(mDictDirectory, mDictDirectory.getName() - + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX - + FormatSpec.SHORTCUT_CONTENT_ID); - mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, - new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); + /** + * An auxiliary class for reading shortcuts. + */ + protected static class ShortcutContentReader extends SparseTableContentReader { + public ShortcutContentReader(final String name, final File baseDir, + final DictionaryBufferFactory factory) { + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, + new String[] { FormatSpec.SHORTCUT_CONTENT_ID }, factory); + } + + public ArrayList<WeightedString> readShortcuts(final int terminalId) { + final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); + read(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, + new SparseTableContentReaderInterface() { + @Override + public void read(final DictBuffer buffer) { + while (true) { + final int flags = buffer.readUnsignedByte(); + final String word = CharEncoding.readString(buffer); + shortcuts.add(new WeightedString(word, + flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); + if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) { + break; + } + } + } + }); + if (shortcuts.isEmpty()) return null; + return shortcuts; + } } protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { @@ -168,102 +277,82 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } } - private ArrayList<WeightedString> readShortcuts(final int terminalId) { - if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null; - - final ArrayList<WeightedString> ret = CollectionUtils.newArrayList(); - final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, - terminalId); - mShortcutBuffer.position(posOfShortcuts); - while (true) { - final int flags = mShortcutBuffer.readUnsignedByte(); - final String word = CharEncoding.readString(mShortcutBuffer); - ret.add(new WeightedString(word, - flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); - if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - return ret; - } + private final int[] mCharacterBufferForReadingVer4PtNodeInfo + = new int[FormatSpec.MAX_WORD_LENGTH]; + /** + * Reads PtNode from ptNodePos in the trie file and returns Ver4PtNodeInfo. + * + * @param ptNodePos the position of PtNode. + * @param options the format options. + * @return Ver4PtNodeInfo. + */ // TODO: Make this buffer thread safe. // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH. - private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; - @Override - public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { - int addressPointer = ptNodePos; + protected Ver4PtNodeInfo readVer4PtNodeInfo(final int ptNodePos, final FormatOptions options) { + int readingPos = ptNodePos; final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); - addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; + readingPos += FormatSpec.PTNODE_FLAGS_SIZE; - final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); + final int parentPos = PtNodeReader.readParentAddress(mDictBuffer, options); if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; + readingPos += FormatSpec.PARENT_ADDRESS_SIZE; } final int characters[]; if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); while (FormatSpec.INVALID_CHARACTER != character && index < FormatSpec.MAX_WORD_LENGTH) { - mCharacterBuffer[index++] = character; + mCharacterBufferForReadingVer4PtNodeInfo[index++] = character; character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); } - characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); + characters = Arrays.copyOfRange(mCharacterBufferForReadingVer4PtNodeInfo, 0, index); } else { final int character = CharEncoding.readChar(mDictBuffer); - addressPointer += CharEncoding.getCharSize(character); + readingPos += CharEncoding.getCharSize(character); characters = new int[] { character }; } final int terminalId; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { terminalId = PtNodeReader.readTerminalId(mDictBuffer); - addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; + readingPos += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { terminalId = PtNode.NOT_A_TERMINAL; } + int childrenPos = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); + if (childrenPos != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenPos += readingPos; + } + readingPos += BinaryDictIOUtils.getChildrenAddressSize(flags, options); + + return new Ver4PtNodeInfo(flags, characters, terminalId, childrenPos, parentPos, + readingPos - ptNodePos); + } + + @Override + public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { + final Ver4PtNodeInfo nodeInfo = readVer4PtNodeInfo(ptNodePos, options); + final int frequency; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); + if (0 != (FormatSpec.FLAG_IS_TERMINAL & nodeInfo.mFlags)) { + frequency = PtNodeReader.readFrequency(mFrequencyBuffer, nodeInfo.mTerminalId); } else { frequency = PtNode.NOT_A_TERMINAL; } - int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); - final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); - - final ArrayList<PendingAttribute> bigrams; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList<PendingAttribute>(); - final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); - mBigramBuffer.position(posOfBigrams); - while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, - // remaining bigram entries are ignored. - final int bigramFlags = mBigramBuffer.readUnsignedByte(); - final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); - mTerminalAddressTableBuffer.position( - targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); - final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); - bigrams.add(new PendingAttribute( - bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, - targetAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; - } - if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { - throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size() - + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); - } - } else { - bigrams = null; - } - return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); + + final ArrayList<WeightedString> shortcutTargets = mShortcutReader.readShortcuts( + nodeInfo.mTerminalId); + final ArrayList<PendingAttribute> bigrams = mBigramReader.readTargetsAndFrequencies( + nodeInfo.mTerminalId, mTerminalAddressTableBuffer); + + return new PtNodeInfo(ptNodePos, ptNodePos + nodeInfo.mNodeSize, nodeInfo.mFlags, + nodeInfo.mCharacters, frequency, nodeInfo.mParentPos, nodeInfo.mChildrenPos, + shortcutTargets, bigrams); } private void deleteDictFiles() { @@ -314,10 +403,14 @@ public class Ver4DictDecoder extends AbstractDictDecoder { @Override public boolean readAndFollowForwardLink() { - final int nextAddress = mDictBuffer.readUnsignedInt24(); - if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) { - mDictBuffer.position(nextAddress); - return true; + final int forwardLinkPos = mDictBuffer.position(); + int nextRelativePos = BinaryDictDecoderUtils.readSInt24(mDictBuffer); + if (nextRelativePos != FormatSpec.NO_FORWARD_LINK_ADDRESS) { + final int nextPos = forwardLinkPos + nextRelativePos; + if (nextPos >= 0 && nextPos < mDictBuffer.limit()) { + mDictBuffer.position(nextPos); + return true; + } } return false; } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index f9dcacf77..842427434 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -45,6 +45,7 @@ public class Ver4DictEncoder implements DictEncoder { private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mUnigramTimestampOutStream; private OutputStream mTerminalAddressTableOutStream; private File mDictDir; private String mBaseFilename; @@ -56,73 +57,41 @@ public class Ver4DictEncoder implements DictEncoder { mDictPlacedDir = dictPlacedDir; } - private interface SparseTableContentWriterInterface { - public void write(final OutputStream outStream) throws IOException; - } + private static class BigramContentWriter extends SparseTableContentWriter { + private final boolean mWriteTimestamp; - private static class SparseTableContentWriter { - private final int mContentCount; - private final SparseTable mSparseTable; - private final File mLookupTableFile; - protected final File mBaseDir; - private final File[] mAddressTableFiles; - private final File[] mContentFiles; - protected final OutputStream[] mContentOutStreams; - - public SparseTableContentWriter(final String name, final int contentCount, - final int initialCapacity, final int blockSize, final File baseDir, - final String[] contentFilenames, final String[] contentIds) { - if (contentFilenames.length != contentIds.length) { - throw new RuntimeException("The length of contentFilenames and the length of" - + " contentIds are different " + contentFilenames.length + ", " - + contentIds.length); - } - mContentCount = contentCount; - mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount); - mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); - mAddressTableFiles = new File[mContentCount]; - mContentFiles = new File[mContentCount]; - mBaseDir = baseDir; - for (int i = 0; i < mContentCount; ++i) { - mAddressTableFiles[i] = new File(mBaseDir, - name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]); - mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]); - } - mContentOutStreams = new OutputStream[mContentCount]; + public BigramContentWriter(final String name, final int initialCapacity, + final File baseDir, final boolean writeTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp)); + mWriteTimestamp = writeTimestamp; } - public void openStreams() throws FileNotFoundException { - for (int i = 0; i < mContentCount; ++i) { - mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]); + private static String[] getContentFilenames(final String name, + final boolean writeTimestamp) { + final String[] contentFilenames; + if (writeTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; } + return contentFilenames; } - protected void write(final int contentIndex, final int index, - final SparseTableContentWriterInterface writer) throws IOException { - mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length()); - writer.write(mContentOutStreams[contentIndex]); - mContentOutStreams[contentIndex].flush(); - } - - public void closeStreams() throws IOException { - mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles); - for (int i = 0; i < mContentCount; ++i) { - mContentOutStreams[i].close(); + private static String[] getContentIds(final boolean writeTimestamp) { + final String[] contentIds; + if (writeTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; } - } - } - - private static class BigramContentWriter extends SparseTableContentWriter { - - public BigramContentWriter(final String name, final int initialCapacity, - final File baseDir) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT, - initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }, - new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }); + return contentIds; } - public void writeBigramsForOneWord(final int terminalId, + public void writeBigramsForOneWord(final int terminalId, final int bigramCount, final Iterator<WeightedString> bigramIterator, final FusionDictionary dict) throws IOException { write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, @@ -130,8 +99,16 @@ public class Ver4DictEncoder implements DictEncoder { @Override public void write(final OutputStream outStream) throws IOException { writeBigramsForOneWordInternal(outStream, bigramIterator, dict); - } - }); + }}); + if (mWriteTimestamp) { + write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId, + new SparseTableContentWriterInterface() { + @Override + public void write(final OutputStream outStream) throws IOException { + initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream, + bigramCount); + }}); + } } private void writeBigramsForOneWordInternal(final OutputStream outStream, @@ -151,13 +128,26 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } + + private void initBigramTimestampsCountersAndLevelsForOneWordInternal( + final OutputStream outStream, final int bigramCount) throws IOException { + for (int i = 0; i < bigramCount; ++i) { + // TODO: Figure out what initial values should be. + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_TIMESTAMP_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_COUNTER_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_LEVEL_SIZE); + } + } } private static class ShortcutContentWriter extends SparseTableContentWriter { public ShortcutContentWriter(final String name, final int initialCapacity, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT, - initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, new String[] { FormatSpec.SHORTCUT_CONTENT_ID }); } @@ -193,18 +183,20 @@ public class Ver4DictEncoder implements DictEncoder { mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); + final File timestampFile = new File(mDictDir, + mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } - if (!trieFile.exists()) trieFile.createNewFile(); - if (!freqFile.exists()) freqFile.createNewFile(); - if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); + if (formatOptions.mHasTimestamp) { + mUnigramTimestampOutStream = new FileOutputStream(timestampFile); + } } private void close() throws IOException { @@ -218,6 +210,9 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } + if (mUnigramTimestampOutStream != null) { + mUnigramTimestampOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; @@ -257,7 +252,11 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); - mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir); + if (formatOptions.mHasTimestamp) { + initUnigramTimestamps(terminalCount); + } + mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, + formatOptions.mHasTimestamp); writeBigrams(flatNodes, dict); mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir); writeShortcuts(flatNodes); @@ -348,7 +347,7 @@ public class Ver4DictEncoder implements DictEncoder { for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { - mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, + mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(), ptNode.mBigrams.iterator(), dict); } } @@ -408,4 +407,11 @@ public class Ver4DictEncoder implements DictEncoder { mFreqOutStream.write(freqBuf); mTerminalAddressTableOutStream.write(terminalAddressTableBuf); } + + private void initUnigramTimestamps(final int terminalCount) throws IOException { + // Initial value of time stamps for each word is 0. + final byte[] unigramTimestampBuf = + new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE]; + mUnigramTimestampOutStream.write(unigramTimestampBuf); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java index 102a41b4e..fdbe81ab6 100644 --- a/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/SubtypeLocaleUtils.java @@ -197,7 +197,9 @@ public final class SubtypeLocaleUtils { // es_US spanish F Español (EE.UU.) exception // fr azerty F Français // fr_CA qwerty F Français (Canada) + // fr_CH swiss F Français (Suisse) // de qwertz F Deutsch + // de_CH swiss T Deutsch (Schweiz) // zz qwerty F No language (QWERTY) in system locale // fr qwertz T Français (QWERTZ) // de qwerty T Deutsch (QWERTY) @@ -298,7 +300,9 @@ public final class SubtypeLocaleUtils { // es_US spanish F Es Español Español (EE.UU.) exception // fr azerty F Fr Français Français // fr_CA qwerty F Fr Français Français (Canada) + // fr_CH swiss F Fr Français Français (Suisse) // de qwertz F De Deutsch Deutsch + // de_CH swiss T De Deutsch Deutsch (Schweiz) // zz qwerty F QWERTY QWERTY // fr qwertz T Fr Français Français // de qwerty T De Deutsch Deutsch |