diff options
Diffstat (limited to 'java/src')
7 files changed, 139 insertions, 42 deletions
diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index c4f96016c..9a9653094 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -174,6 +174,7 @@ public final class Constants { public static final int CODE_SLASH = '/'; public static final int CODE_COMMERCIAL_AT = '@'; public static final int CODE_PLUS = '+'; + public static final int CODE_PERCENT = '%'; public static final int CODE_CLOSING_PARENTHESIS = ')'; public static final int CODE_CLOSING_SQUARE_BRACKET = ']'; public static final int CODE_CLOSING_CURLY_BRACKET = '}'; diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index dc96cd750..fac595ea7 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1501,6 +1501,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen || codePoint == Constants.CODE_CLOSING_CURLY_BRACKET || codePoint == Constants.CODE_CLOSING_ANGLE_BRACKET || codePoint == Constants.CODE_PLUS + || codePoint == Constants.CODE_PERCENT || Character.getType(codePoint) == Character.OTHER_SYMBOL; } @@ -2304,9 +2305,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen if (!mRecapitalizeStatus.isSetAt(mLastSelectionStart, mLastSelectionEnd)) { mLastSelectionStart = mRecapitalizeStatus.getNewCursorStart(); mLastSelectionEnd = mRecapitalizeStatus.getNewCursorEnd(); - mConnection.setSelection(mLastSelectionStart, mLastSelectionEnd); } } + mConnection.finishComposingText(); mRecapitalizeStatus.rotate(); final int numCharsDeleted = mLastSelectionEnd - mLastSelectionStart; mConnection.setSelection(mLastSelectionEnd, mLastSelectionEnd); diff --git a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java index 9f7f502ea..fda97dafc 100644 --- a/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java @@ -60,7 +60,8 @@ public abstract class AbstractDictDecoder implements DictDecoder { 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE), + 0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG))); return header; } diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 336277196..28da9ffdd 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.utils.CollectionUtils; import java.io.IOException; import java.io.OutputStream; @@ -217,6 +218,25 @@ public final class DynamicBinaryDictIOUtils { } /** + * Converts a list of WeightedString to a list of PendingAttribute. + */ + public static ArrayList<PendingAttribute> resolveBigramPositions(final DictUpdater dictUpdater, + final ArrayList<WeightedString> bigramStrings) + throws IOException, UnsupportedFormatException { + if (bigramStrings == null) return CollectionUtils.newArrayList(); + final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList(); + for (final WeightedString bigram : bigramStrings) { + final int pos = dictUpdater.getTerminalPosition(bigram.mWord); + if (pos == FormatSpec.NOT_VALID_WORD) { + // TODO: figure out what is the correct thing to do here. + } else { + bigrams.add(new PendingAttribute(bigram.mFrequency, pos)); + } + } + return bigrams; + } + + /** * Insert a word into a binary dictionary. * * @param dictUpdater the dict updater. @@ -238,18 +258,9 @@ public final class DynamicBinaryDictIOUtils { final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { - final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); + final ArrayList<PendingAttribute> bigrams = resolveBigramPositions(dictUpdater, + bigramStrings); final DictBuffer dictBuffer = dictUpdater.getDictBuffer(); - if (bigramStrings != null) { - for (final WeightedString bigram : bigramStrings) { - int position = dictUpdater.getTerminalPosition(bigram.mWord); - if (position == FormatSpec.NOT_VALID_WORD) { - // TODO: figure out what is the correct thing to do here. - } else { - bigrams.add(new PendingAttribute(bigram.mFrequency, position)); - } - } - } final boolean isTerminal = true; final boolean hasBigrams = !bigrams.isEmpty(); diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 5a5d7af6b..6d5827023 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -37,13 +37,15 @@ public final class FormatSpec { * sion * * o | - * p | not used 4 bits - * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG - * i | FRENCH_LIGATURE_PROCESSING_FLAG - * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE - * n | GERMAN_UMLAUT_PROCESSING_FLAG - * f | - * lags + * p | not used 3 bits + * t | each unigram and bigram entry has a time stamp? + * i | 1 bit, 1 = yes, 0 = no : CONTAINS_TIMESTAMP_FLAG + * o | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG + * n | FRENCH_LIGATURE_PROCESSING_FLAG + * f | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE + * l | GERMAN_UMLAUT_PROCESSING_FLAG + * a | + * gs * * h | * e | size of the file header, 4bytes @@ -211,6 +213,7 @@ public final class FormatSpec { static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; static final int CONTAINS_BIGRAMS_FLAG = 0x8; + static final int CONTAINS_TIMESTAMP_FLAG = 0x10; // TODO: Make this value adaptative to content data, store it in the header, and // use it in the reading code. @@ -263,6 +266,7 @@ public final class FormatSpec { // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; + static final String UNIGRAM_TIMESTAMP_FILE_EXTENSION = ".timestamp"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; @@ -271,14 +275,20 @@ public final class FormatSpec { static final String CONTENT_TABLE_FILE_SUFFIX = "_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; + static final int UNIGRAM_TIMESTAMP_SIZE = 4; // With the English main dictionary as of October 2013, the size of bigram address table is // is 584KB with the block size being 4. // This is 91% of that of full address table. static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; - static final int BIGRAM_CONTENT_COUNT = 1; + static final int BIGRAM_CONTENT_COUNT = 2; static final int BIGRAM_FREQ_CONTENT_INDEX = 0; + static final int BIGRAM_TIMESTAMP_CONTENT_INDEX = 1; static final String BIGRAM_FREQ_CONTENT_ID = "_freq"; + static final String BIGRAM_TIMESTAMP_CONTENT_ID = "_timestamp"; + static final int BIGRAM_TIMESTAMP_SIZE = 4; + static final int BIGRAM_COUNTER_SIZE = 1; + static final int BIGRAM_LEVEL_SIZE = 1; static final int SHORTCUT_CONTENT_COUNT = 1; static final int SHORTCUT_CONTENT_INDEX = 0; @@ -321,6 +331,7 @@ public final class FormatSpec { public final int mVersion; public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; + public final boolean mHasTimestamp; @UsedForTesting public FormatOptions(final int version) { this(version, false); @@ -328,6 +339,11 @@ public final class FormatSpec { @UsedForTesting public FormatOptions(final int version, final boolean supportsDynamicUpdate) { + this(version, supportsDynamicUpdate, false /* hasTimestamp */); + } + + public FormatOptions(final int version, final boolean supportsDynamicUpdate, + final boolean hasTimestamp) { mVersion = version; if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { throw new RuntimeException("Dynamic updates are only supported with versions " @@ -335,6 +351,7 @@ public final class FormatSpec { } mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); + mHasTimestamp = hasTimestamp; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 53729075f..734223ec2 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -153,8 +153,12 @@ public class Ver4DictDecoder extends AbstractDictDecoder { final File contentFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + FormatSpec.SHORTCUT_CONTENT_ID); + final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName() + + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + + FormatSpec.SHORTCUT_CONTENT_ID); mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile, - new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); + new File[] { contentFile, timestampsFile }, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index f9dcacf77..5d5ab0462 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -45,6 +45,7 @@ public class Ver4DictEncoder implements DictEncoder { private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mUnigramTimestampOutStream; private OutputStream mTerminalAddressTableOutStream; private File mDictDir; private String mBaseFilename; @@ -69,16 +70,16 @@ public class Ver4DictEncoder implements DictEncoder { private final File[] mContentFiles; protected final OutputStream[] mContentOutStreams; - public SparseTableContentWriter(final String name, final int contentCount, - final int initialCapacity, final int blockSize, final File baseDir, - final String[] contentFilenames, final String[] contentIds) { + public SparseTableContentWriter(final String name, final int initialCapacity, + final int blockSize, final File baseDir, final String[] contentFilenames, + final String[] contentIds) { if (contentFilenames.length != contentIds.length) { throw new RuntimeException("The length of contentFilenames and the length of" + " contentIds are different " + contentFilenames.length + ", " + contentIds.length); } - mContentCount = contentCount; - mSparseTable = new SparseTable(initialCapacity, blockSize, contentCount); + mContentCount = contentFilenames.length; + mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount); mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX); mAddressTableFiles = new File[mContentCount]; mContentFiles = new File[mContentCount]; @@ -113,16 +114,40 @@ public class Ver4DictEncoder implements DictEncoder { } private static class BigramContentWriter extends SparseTableContentWriter { + private final boolean mWriteTimestamp; public BigramContentWriter(final String name, final int initialCapacity, - final File baseDir) { - super(name + FormatSpec.BIGRAM_FILE_EXTENSION, FormatSpec.BIGRAM_CONTENT_COUNT, - initialCapacity, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, - new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }, - new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }); + final File baseDir, final boolean writeTimestamp) { + super(name + FormatSpec.BIGRAM_FILE_EXTENSION, initialCapacity, + FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + getContentFilenames(name, writeTimestamp), getContentIds(writeTimestamp)); + mWriteTimestamp = writeTimestamp; } - public void writeBigramsForOneWord(final int terminalId, + private static String[] getContentFilenames(final String name, + final boolean writeTimestamp) { + final String[] contentFilenames; + if (writeTimestamp) { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION, + name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } else { + contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION }; + } + return contentFilenames; + } + + private static String[] getContentIds(final boolean writeTimestamp) { + final String[] contentIds; + if (writeTimestamp) { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID, + FormatSpec.BIGRAM_TIMESTAMP_CONTENT_ID }; + } else { + contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID }; + } + return contentIds; + } + + public void writeBigramsForOneWord(final int terminalId, final int bigramCount, final Iterator<WeightedString> bigramIterator, final FusionDictionary dict) throws IOException { write(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, @@ -130,8 +155,16 @@ public class Ver4DictEncoder implements DictEncoder { @Override public void write(final OutputStream outStream) throws IOException { writeBigramsForOneWordInternal(outStream, bigramIterator, dict); - } - }); + }}); + if (mWriteTimestamp) { + write(FormatSpec.BIGRAM_TIMESTAMP_CONTENT_INDEX, terminalId, + new SparseTableContentWriterInterface() { + @Override + public void write(final OutputStream outStream) throws IOException { + initBigramTimestampsCountersAndLevelsForOneWordInternal(outStream, + bigramCount); + }}); + } } private void writeBigramsForOneWordInternal(final OutputStream outStream, @@ -151,13 +184,26 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } + + private void initBigramTimestampsCountersAndLevelsForOneWordInternal( + final OutputStream outStream, final int bigramCount) throws IOException { + for (int i = 0; i < bigramCount; ++i) { + // TODO: Figure out what initial values should be. + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_TIMESTAMP_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_COUNTER_SIZE); + BinaryDictEncoderUtils.writeUIntToStream(outStream, 0 /* value */, + FormatSpec.BIGRAM_LEVEL_SIZE); + } + } } private static class ShortcutContentWriter extends SparseTableContentWriter { public ShortcutContentWriter(final String name, final int initialCapacity, final File baseDir) { - super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, FormatSpec.SHORTCUT_CONTENT_COUNT, - initialCapacity, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, + super(name + FormatSpec.SHORTCUT_FILE_EXTENSION, initialCapacity, + FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir, new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION }, new String[] { FormatSpec.SHORTCUT_CONTENT_ID }); } @@ -193,18 +239,20 @@ public class Ver4DictEncoder implements DictEncoder { mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); + final File timestampFile = new File(mDictDir, + mBaseFilename + FormatSpec.UNIGRAM_TIMESTAMP_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } - if (!trieFile.exists()) trieFile.createNewFile(); - if (!freqFile.exists()) freqFile.createNewFile(); - if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); + if (formatOptions.mHasTimestamp) { + mUnigramTimestampOutStream = new FileOutputStream(timestampFile); + } } private void close() throws IOException { @@ -218,6 +266,9 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } + if (mUnigramTimestampOutStream != null) { + mUnigramTimestampOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; @@ -257,7 +308,11 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); - mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir); + if (formatOptions.mHasTimestamp) { + initUnigramTimestamps(terminalCount); + } + mBigramWriter = new BigramContentWriter(mBaseFilename, terminalCount, mDictDir, + formatOptions.mHasTimestamp); writeBigrams(flatNodes, dict); mShortcutWriter = new ShortcutContentWriter(mBaseFilename, terminalCount, mDictDir); writeShortcuts(flatNodes); @@ -348,7 +403,7 @@ public class Ver4DictEncoder implements DictEncoder { for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { - mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, + mBigramWriter.writeBigramsForOneWord(ptNode.mTerminalId, ptNode.mBigrams.size(), ptNode.mBigrams.iterator(), dict); } } @@ -408,4 +463,11 @@ public class Ver4DictEncoder implements DictEncoder { mFreqOutStream.write(freqBuf); mTerminalAddressTableOutStream.write(terminalAddressTableBuf); } + + private void initUnigramTimestamps(final int terminalCount) throws IOException { + // Initial value of time stamps for each word is 0. + final byte[] unigramTimestampBuf = + new byte[terminalCount * FormatSpec.UNIGRAM_TIMESTAMP_SIZE]; + mUnigramTimestampOutStream.write(unigramTimestampBuf); + } } |