diff options
26 files changed, 583 insertions, 306 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 61ccfcfad..fdde98da1 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -35,6 +35,7 @@ import java.util.Locale; /** * Implements a static, compacted, binary dictionary of standard words. */ +// TODO: All methods which should be locked need to have a suffix "Locked". public final class BinaryDictionary extends Dictionary { private static final String TAG = BinaryDictionary.class.getSimpleName(); @@ -283,22 +284,23 @@ public final class BinaryDictionary extends Dictionary { removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); } - public void flush() { - if (!isValidDictionary()) return; - flushNative(mNativeDict, mDictFilePath); - closeNative(mNativeDict); + private void reopen() { + close(); final File dictFile = new File(mDictFilePath); mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, dictFile.length(), true /* isUpdatable */); } + public void flush() { + if (!isValidDictionary()) return; + flushNative(mNativeDict, mDictFilePath); + reopen(); + } + public void flushWithGC() { if (!isValidDictionary()) return; flushWithGCNative(mNativeDict, mDictFilePath); - closeNative(mNativeDict); - final File dictFile = new File(mDictFilePath); - mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */, - dictFile.length(), true /* isUpdatable */); + reopen(); } public boolean needsToRunGC() { @@ -338,21 +340,23 @@ public final class BinaryDictionary extends Dictionary { traverseSession.close(); } } + mDicTraverseSessions.clear(); } - closeInternal(); + closeInternalLocked(); } - private synchronized void closeInternal() { + private synchronized void closeInternalLocked() { if (mNativeDict != 0) { closeNative(mNativeDict); mNativeDict = 0; } } + // TODO: Manage BinaryDictionary instances without using WeakReference or something. @Override protected void finalize() throws Throwable { try { - closeInternal(); + closeInternalLocked(); } finally { super.finalize(); } diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 925381b50..8580a6e54 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -30,6 +30,7 @@ import com.android.inputmethod.latin.define.ProductionFlag; import com.android.inputmethod.latin.settings.SettingsValues; import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.DebugLogUtils; +import com.android.inputmethod.latin.utils.SpannableStringUtils; import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.TextRange; import com.android.inputmethod.research.ResearchLogger; @@ -610,8 +611,9 @@ public final class RichInputConnection { // We don't use TextUtils#concat because it copies all spans without respect to their // nature. If the text includes a PARAGRAPH span and it has been split, then // TextUtils#concat will crash when it tries to concat both sides of it. - return new TextRange(StringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after), - startIndexInBefore, before.length() + endIndexInAfter, before.length()); + return new TextRange( + SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(before, after), + startIndexInBefore, before.length() + endIndexInAfter, before.length()); } public boolean isCursorTouchingWord(final SettingsValues settingsValues) { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java index 4dba8e5cf..3b1d2427b 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java @@ -882,8 +882,9 @@ public class BinaryDictEncoderUtils { * @param destination the stream to write the file header to. * @param dict the dictionary to write. * @param formatOptions file format options. + * @return the size of the header. */ - /* package */ static void writeDictionaryHeader(final OutputStream destination, + /* package */ static int writeDictionaryHeader(final OutputStream destination, final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { final int version = formatOptions.mVersion; @@ -932,5 +933,6 @@ public class BinaryDictEncoderUtils { destination.write(bytes); headerBuffer.close(); + return size; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index 51b89a02a..aa5129ccb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -263,7 +263,10 @@ public final class FormatSpec { // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; + // tat = Terminal Address Table + static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; + static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 36c5a2720..4c8ff8ea4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -41,11 +41,13 @@ public class Ver4DictDecoder extends DictDecoder { private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_FREQUENCY = 2; + private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; private final File mDictDirectory; private final DictionaryBufferFactory mBufferFactory; private DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; + private DictBuffer mTerminalAddressTableBuffer; @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { @@ -77,6 +79,9 @@ public class Ver4DictDecoder extends DictDecoder { } else if (fileType == FILETYPE_FREQUENCY) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION); + } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { + return new File(mDictDirectory, + mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); } else { throw new RuntimeException("Unsupported kind of file : " + fileType); } @@ -87,6 +92,8 @@ public class Ver4DictDecoder extends DictDecoder { final String filename = mDictDirectory.getName(); mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE)); mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); + mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( + getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); } @Override diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 75b75ae2e..4fb89671f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -41,10 +41,11 @@ import java.util.Iterator; public class Ver4DictEncoder implements DictEncoder { private final File mDictPlacedDir; private byte[] mTrieBuf; - private byte[] mFreqBuf; private int mTriePos; + private int mHeaderSize; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; + private OutputStream mTerminalAddressTableOutStream; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { @@ -58,14 +59,18 @@ public class Ver4DictEncoder implements DictEncoder { final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); + final File terminalAddressTableFile = new File(mDictDir, + filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } if (!trieFile.exists()) trieFile.createNewFile(); if (!freqFile.exists()) freqFile.createNewFile(); + if (!terminalAddressTableFile.exists()) terminalAddressTableFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); + mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); } private void close() throws IOException { @@ -76,9 +81,13 @@ public class Ver4DictEncoder implements DictEncoder { if (mFreqOutStream != null) { mFreqOutStream.close(); } + if (mTerminalAddressTableOutStream != null) { + mTerminalAddressTableOutStream.close(); + } } finally { mTrieOutStream = null; mFreqOutStream = null; + mTerminalAddressTableOutStream = null; } } @@ -97,7 +106,8 @@ public class Ver4DictEncoder implements DictEncoder { openStreams(formatOptions, dict.mOptions); } - BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions); + mHeaderSize = BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, + formatOptions); MakedictLog.i("Flattening the tree..."); ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); @@ -112,10 +122,11 @@ public class Ver4DictEncoder implements DictEncoder { BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); + writeTerminalData(flatNodes, terminalCount); + final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; mTrieBuf = new byte[bufferSize]; - mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; MakedictLog.i("Writing file..."); for (PtNodeArray nodeArray : flatNodes) { @@ -126,7 +137,6 @@ public class Ver4DictEncoder implements DictEncoder { MakedictLog.i("has " + terminalCount + " terminals."); } mTrieOutStream.write(mTrieBuf); - mFreqOutStream.write(mFreqBuf); MakedictLog.i("Done"); close(); @@ -185,12 +195,6 @@ public class Ver4DictEncoder implements DictEncoder { FormatSpec.PTNODE_TERMINAL_ID_SIZE); } - private void writeFrequency(final int frequency, final int terminalId) { - final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE; - BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency, - FormatSpec.FREQUENCY_AND_FLAGS_SIZE); - } - private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); if (formatOptions.mSupportsDynamicUpdate) { @@ -260,10 +264,31 @@ public class Ver4DictEncoder implements DictEncoder { writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); if (ptNode.isTerminal()) { writeTerminalId(ptNode.mTerminalId); - writeFrequency(ptNode.mFrequency, ptNode.mTerminalId); } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } + + private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes, + final int terminalCount) throws IOException { + final byte[] freqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; + final byte[] terminalAddressTableBuf = + new byte[terminalCount * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE]; + for (final PtNodeArray nodeArray : flatNodes) { + for (final PtNode ptNode : nodeArray.mData) { + if (ptNode.isTerminal()) { + BinaryDictEncoderUtils.writeUIntToBuffer(freqBuf, + ptNode.mTerminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE, + ptNode.mFrequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); + BinaryDictEncoderUtils.writeUIntToBuffer(terminalAddressTableBuf, + ptNode.mTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, + ptNode.mCachedAddressAfterUpdate + mHeaderSize, + FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); + } + } + } + mFreqOutStream.write(freqBuf); + mTerminalAddressTableOutStream.write(terminalAddressTableBuf); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java b/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java new file mode 100644 index 000000000..b51fd9377 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/utils/SpannableStringUtils.java @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import android.text.Spannable; +import android.text.SpannableString; +import android.text.Spanned; +import android.text.SpannedString; +import android.text.TextUtils; +import android.text.style.SuggestionSpan; + +public final class SpannableStringUtils { + /** + * Copies the spans from the region <code>start...end</code> in + * <code>source</code> to the region + * <code>destoff...destoff+end-start</code> in <code>dest</code>. + * Spans in <code>source</code> that begin before <code>start</code> + * or end after <code>end</code> but overlap this range are trimmed + * as if they began at <code>start</code> or ended at <code>end</code>. + * Only SuggestionSpans that don't have the SPAN_PARAGRAPH span are copied. + * + * This code is almost entirely taken from {@link TextUtils#copySpansFrom}, except for the + * kind of span that is copied. + * + * @throws IndexOutOfBoundsException if any of the copied spans + * are out of range in <code>dest</code>. + */ + public static void copyNonParagraphSuggestionSpansFrom(Spanned source, int start, int end, + Spannable dest, int destoff) { + Object[] spans = source.getSpans(start, end, SuggestionSpan.class); + + for (int i = 0; i < spans.length; i++) { + int fl = source.getSpanFlags(spans[i]); + if (0 != (fl & Spannable.SPAN_PARAGRAPH)) continue; + + int st = source.getSpanStart(spans[i]); + int en = source.getSpanEnd(spans[i]); + + if (st < start) + st = start; + if (en > end) + en = end; + + dest.setSpan(spans[i], st - start + destoff, en - start + destoff, + fl); + } + } + + /** + * Returns a CharSequence concatenating the specified CharSequences, retaining their + * SuggestionSpans that don't have the PARAGRAPH flag, but not other spans. + * + * This code is almost entirely taken from {@link TextUtils#concat(CharSequence...)}, except + * it calls copyNonParagraphSuggestionSpansFrom instead of {@link TextUtils#copySpansFrom}. + */ + public static CharSequence concatWithNonParagraphSuggestionSpansOnly(CharSequence... text) { + if (text.length == 0) { + return ""; + } + + if (text.length == 1) { + return text[0]; + } + + boolean spanned = false; + for (int i = 0; i < text.length; i++) { + if (text[i] instanceof Spanned) { + spanned = true; + break; + } + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < text.length; i++) { + sb.append(text[i]); + } + + if (!spanned) { + return sb.toString(); + } + + SpannableString ss = new SpannableString(sb); + int off = 0; + for (int i = 0; i < text.length; i++) { + int len = text[i].length(); + + if (text[i] instanceof Spanned) { + copyNonParagraphSuggestionSpansFrom((Spanned) text[i], 0, len, ss, off); + } + + off += len; + } + + return new SpannedString(ss); + } +} diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java index 327780ad0..121aecf0f 100644 --- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java @@ -20,12 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.settings.SettingsValues; -import android.text.Spannable; -import android.text.SpannableString; -import android.text.Spanned; -import android.text.SpannedString; import android.text.TextUtils; -import android.text.style.SuggestionSpan; import android.util.JsonReader; import android.util.JsonWriter; import android.util.Log; @@ -467,88 +462,4 @@ public final class StringUtils { } return ""; } - - /** - * Copies the spans from the region <code>start...end</code> in - * <code>source</code> to the region - * <code>destoff...destoff+end-start</code> in <code>dest</code>. - * Spans in <code>source</code> that begin before <code>start</code> - * or end after <code>end</code> but overlap this range are trimmed - * as if they began at <code>start</code> or ended at <code>end</code>. - * Only SuggestionSpans that don't have the SPAN_PARAGRAPH span are copied. - * - * This code is almost entirely taken from {@link TextUtils#copySpansFrom}, except for the - * kind of span that is copied. - * - * @throws IndexOutOfBoundsException if any of the copied spans - * are out of range in <code>dest</code>. - */ - public static void copyNonParagraphSuggestionSpansFrom(Spanned source, int start, int end, - Spannable dest, int destoff) { - Object[] spans = source.getSpans(start, end, SuggestionSpan.class); - - for (int i = 0; i < spans.length; i++) { - int fl = source.getSpanFlags(spans[i]); - if (0 != (fl & Spannable.SPAN_PARAGRAPH)) continue; - - int st = source.getSpanStart(spans[i]); - int en = source.getSpanEnd(spans[i]); - - if (st < start) - st = start; - if (en > end) - en = end; - - dest.setSpan(spans[i], st - start + destoff, en - start + destoff, - fl); - } - } - - /** - * Returns a CharSequence concatenating the specified CharSequences, retaining their - * SuggestionSpans that don't have the PARAGRAPH flag, but not other spans. - * - * This code is almost entirely taken from {@link TextUtils#concat(CharSequence...)}, except - * it calls copyNonParagraphSuggestionSpansFrom instead of {@link TextUtils#copySpansFrom}. - */ - public static CharSequence concatWithNonParagraphSuggestionSpansOnly(CharSequence... text) { - if (text.length == 0) { - return ""; - } - - if (text.length == 1) { - return text[0]; - } - - boolean spanned = false; - for (int i = 0; i < text.length; i++) { - if (text[i] instanceof Spanned) { - spanned = true; - break; - } - } - - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < text.length; i++) { - sb.append(text[i]); - } - - if (!spanned) { - return sb.toString(); - } - - SpannableString ss = new SpannableString(sb); - int off = 0; - for (int i = 0; i < text.length; i++) { - int len = text[i].length(); - - if (text[i] instanceof Spanned) { - copyNonParagraphSuggestionSpansFrom((Spanned) text[i], 0, len, ss, off); - } - - off += len; - } - - return new SpannedString(ss); - } } diff --git a/native/jni/Android.mk b/native/jni/Android.mk index c2070327e..0594ddff0 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ + dict_file_writing_utils.cpp \ format_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index bc2f5ee58..29307b56a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -325,7 +325,7 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { - AKLOGE("Bigram link is invalid. start position: %d", bigramPos); + AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index b033eee05..c6d8ddcf7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -263,7 +263,7 @@ class DynamicPatriciaTrieReadingHelper { AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { - AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); + AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a51ae5e1d..578645cd5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -16,9 +16,6 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" -#include <cstdio> -#include <cstring> - #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" @@ -28,13 +25,12 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "utils/hash_map_compat.h" namespace latinime { const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; -const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = - ".tmp"; // TODO: Make MAX_DICTIONARY_SIZE 8MB. const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; @@ -147,7 +143,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { return; } - flushAllToFile(fileName, &headerBuffer, mBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); } void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, @@ -161,7 +157,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) { return; } - flushAllToFile(fileName, &headerBuffer, &newDictBuffer); + DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( @@ -463,60 +459,6 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return true; } -// TODO: Create a struct which contains header, body and etc... and use here as an argument. -void DynamicPatriciaTrieWritingHelper::flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const { - const int tmpFileNameBufSize = strlen(fileName) - + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; - // Name of a temporary file used for writing that is a connected string of original name and - // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. - char tmpFileName[tmpFileNameBufSize]; - snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - FILE *const file = fopen(tmpFileName, "wb"); - if (!file) { - AKLOGI("Dictionary file %s cannnot be opened.", tmpFileName); - ASSERT(false); - return; - } - // Write the dictionary header. - if (!writeBufferToFilePointer(file, dictHeader)) { - remove(tmpFileName); - AKLOGI("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); - ASSERT(false); - return; - } - // Write the dictionary body. - if (!writeBufferToFilePointer(file, dictBody)) { - remove(tmpFileName); - AKLOGI("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); - ASSERT(false); - return; - } - fclose(file); - rename(tmpFileName, fileName); -} - -// This closes file pointer when an error is caused and returns whether the writing was succeeded -// or not. -bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const { - const int originalBufSize = buffer->getOriginalBufferSize(); - if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), - originalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); - if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), - additionalBufSize, 1, file) < 1) { - fclose(file); - return false; - } - return true; -} - bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 028fa6075..fe1b2437a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -17,7 +17,6 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H -#include <cstdio> #include <stdint.h> #include "defines.h" @@ -85,7 +84,6 @@ class DynamicPatriciaTrieWritingHelper { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; static const size_t MAX_DICTIONARY_SIZE; BufferWithExtendableBuffer *const mBuffer; @@ -124,13 +122,6 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - void flushAllToFile(const char *const fileName, - BufferWithExtendableBuffer *const dictHeader, - BufferWithExtendableBuffer *const dictBody) const; - - bool writeBufferToFilePointer(FILE *const file, - const BufferWithExtendableBuffer *const buffer) const; - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite); }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index 5a3983776..30ff10cd6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -36,6 +36,16 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000; const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1; const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; +/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( + BufferWithExtendableBuffer *const buffer, const int rootPos) { + int writingPos = rootPos; + if (!writePtNodeArraySizeAndAdvancePosition(buffer, 0 /* arraySize */, &writingPos)) { + return false; + } + return writeForwardLinkPositionAndAdvancePosition(buffer, NOT_A_DICT_POS /* forwardLinkPos */, + &writingPos); +} + /* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index a37e9fb3d..af76bc6b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -30,6 +30,8 @@ class DynamicPatriciaTrieWritingUtils { public: static const int NODE_FLAG_FIELD_SIZE; + static bool writeEmptyDictionary(BufferWithExtendableBuffer *const buffer, const int rootPos); + static bool writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 47ace23a1..9a32f64d4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -25,7 +25,7 @@ namespace latinime { const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; -const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; +const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; // Used for logging. Question mark is used to indicate that the key is not found. @@ -37,7 +37,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out return; } std::vector<int> keyCodePointVector; - insertCharactersIntoVector(key, &keyCodePointVector); + HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. @@ -53,47 +53,29 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } float HeaderPolicy::readMultipleWordCostMultiplier() const { - int attributeValue = 0; - if (getAttributeValueAsInt(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &attributeValue)) { - if (attributeValue <= 0) { - return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); - } - return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(attributeValue); - } else { - return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector); + const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, + &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); + if (demotionRate <= 0) { + return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } + return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); } bool HeaderPolicy::readUsesForgettingCurveFlag() const { - int attributeValue = 0; - if (getAttributeValueAsInt(USES_FORGETTING_CURVE_KEY, &attributeValue)) { - return attributeValue != 0; - } else { - return false; - } + std::vector<int> keyVector; + HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + false /* defaultValue */); } -// Returns S_INT_MIN when the key is not found or the value is invalid. +// Returns current time when the key is not found or the value is invalid. int HeaderPolicy::readLastUpdatedTime() const { - int attributeValue = 0; - if (getAttributeValueAsInt(LAST_UPDATED_TIME_KEY, &attributeValue)) { - return attributeValue; - } else { - return S_INT_MIN; - } -} - -// Returns whether the key is found or not and stores the found value into outValue. -bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector<int> keyVector; - insertCharactersIntoVector(key, &keyVector); - HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); - if (it == mAttributeMap.end()) { - // The key was not found. - return false; - } - *outValue = parseIntAttributeValue(&(it->second)); - return true; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector); + return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector, + time(0) /* defaultValue */); } bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -117,13 +99,8 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT // Set current time as a last updated time. HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); std::vector<int> updatedTimekey; - insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); - const time_t currentTime = time(NULL); - std::vector<int> updatedTimeValue; - char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; - snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); - insertCharactersIntoVector(charBuf, &updatedTimeValue); - attributeMapTowrite[updatedTimekey] = updatedTimeValue; + HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0)); if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, &writingPos)) { return false; @@ -149,30 +126,4 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT return attributeMap; } -/* static */ int HeaderPolicy::parseIntAttributeValue( - const std::vector<int> *const attributeValue) { - int value = 0; - bool isNegative = false; - for (size_t i = 0; i < attributeValue->size(); ++i) { - if (i == 0 && attributeValue->at(i) == '-') { - isNegative = true; - } else { - if (!isdigit(attributeValue->at(i))) { - // If not a number, return S_INT_MIN - return S_INT_MIN; - } - value *= 10; - value += attributeValue->at(i) - '0'; - } - } - return isNegative ? -value : value; -} - -/* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, - std::vector<int> *const vector) { - for (int i = 0; characters[i]; ++i) { - vector->push_back(characters[i]); - } -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 6b396f3f2..e97c08ca4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -17,7 +17,6 @@ #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H -#include <cctype> #include <stdint.h> #include "defines.h" @@ -29,16 +28,26 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictBuf(dictBuf), - mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + // Reads information from existing dictionary buffer. + HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), - mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), + mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), mLastUpdatedTime(readLastUpdatedTime()) {} + // Constructs header information using an attribute map. + HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) + : mDictFormatVersion(dictFormatVersion), + mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + attributeMap)), mSize(0), mAttributeMap(*attributeMap), + mMultiWordCostMultiplier(readUsesForgettingCurveFlag()), + mUsesForgettingCurve(readUsesForgettingCurveFlag()), + mLastUpdatedTime(readLastUpdatedTime()) {} + ~HeaderPolicy() {} AK_FORCE_INLINE int getSize() const { @@ -81,10 +90,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const USES_FORGETTING_CURVE_KEY; static const char *const LAST_UPDATED_TIME_KEY; - static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; + static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - const uint8_t *const mDictBuf; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; @@ -99,15 +107,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { int readLastUpdatedTime() const; - bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); - - static int parseIntAttributeValue(const std::vector<int> *const attributeValue); - - static void insertCharactersIntoVector( - const char *const characters, std::vector<int> *const vector); }; } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 80fe88671..e6547675c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -16,6 +16,8 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include <cctype> +#include <cstdio> #include <vector> #include "defines.h" @@ -43,6 +45,12 @@ const HeaderReadWriteUtils::DictionaryFlags const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +const char *const HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_KEY = "SUPPORTS_DYNAMIC_UPDATE"; +const char *const HeaderReadWriteUtils::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = + "REQUIRES_GERMAN_UMLAUT_PROCESSING"; +const char *const HeaderReadWriteUtils::REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY = + "REQUIRES_FRENCH_LIGATURE_PROCESSING"; + /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() @@ -56,6 +64,28 @@ const HeaderReadWriteUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + AttributeMap::key_type key; + insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key); + const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key); + const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + key.clear(); + insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key); + const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key, + false /* defaultValue */); + DictionaryFlags dictflags = NO_FLAGS; + dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0; + dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0; + dictflags |= supportsDynamicUpdate ? SUPPORTS_DYNAMIC_UPDATE_FLAG : 0; + return dictflags; +} + /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); @@ -128,4 +158,57 @@ const HeaderReadWriteUtils::DictionaryFlags return true; } +/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value) { + setIntAttribute(headerAttributes, key, value ? 1 : 0); +} + +/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value) { + AttributeMap::mapped_type valueVector; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%d", value); + insertCharactersIntoVector(charBuf, &valueVector); + (*headerAttributes)[*key] = valueVector; +} + +/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const bool defaultValue) { + const int intDefaultValue = defaultValue ? 1 : 0; + const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue); + return intValue != 0; +} + +/* static */ int HeaderReadWriteUtils::readIntAttributeValue( + const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key, + const int defaultValue) { + AttributeMap::const_iterator it = headerAttributes->find(*key); + if (it != headerAttributes->end()) { + int value = 0; + bool isNegative = false; + for (size_t i = 0; i < it->second.size(); ++i) { + if (i == 0 && it->second.at(i) == '-') { + isNegative = true; + } else { + if (!isdigit(it->second.at(i))) { + // If not a number. + return defaultValue; + } + value *= 10; + value += it->second.at(i) - '0'; + } + } + return isNegative ? -value : value; + } + return defaultValue; +} + +/* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters, + std::vector<int> *const vector) { + for (int i = 0; characters[i]; ++i) { + vector->push_back(characters[i]); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 6cce73375..caa5097f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -54,6 +54,9 @@ class HeaderReadWriteUtils { + HEADER_SIZE_FIELD_SIZE; } + static DictionaryFlags createAndGetDictionaryFlagsUsingAttributeMap( + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); @@ -69,6 +72,24 @@ class HeaderReadWriteUtils { static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, int *const writingPos); + /** + * Methods for header attributes. + */ + static void setBoolAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool value); + + static void setIntAttribute(AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int value); + + static bool readBoolAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const bool defaultValue); + + static int readIntAttributeValue(const AttributeMap *const headerAttributes, + const AttributeMap::key_type *const key, const int defaultValue); + + static void insertCharactersIntoVector(const char *const characters, + AttributeMap::key_type *const key); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); @@ -87,7 +108,10 @@ class HeaderReadWriteUtils { static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; - static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY; + static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; + static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY; }; } #endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 5269795a4..8a84bd261 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -31,9 +31,21 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", + nextPos, mDictBufferSize); + ASSERT(false); + return; + } const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { + if (nextPos < 0 || nextPos >= mDictBufferSize) { + AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", + nextPos, mDictBufferSize, i, childCount); + ASSERT(false); + return; + } nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); } } @@ -404,6 +416,11 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod if (PatriciaTrieReadingUtils::hasBigrams(flags)) { getBigramsStructurePolicy()->skipAllBigrams(&pos); } + if (mergedNodeCodePointCount <= 0) { + AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount); + ASSERT(false); + return pos; + } childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 19155f938..f1de914cb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -36,6 +36,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { PatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), + mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() { @@ -118,6 +119,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; const uint8_t *const mDictRoot; + const int mDictBufferSize; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp index 1316b425f..7df55815f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -71,8 +71,17 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, pos); } else { - if (maxLength > 0) { - outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + const int codePoint = getCodePointAndAdvancePosition(buffer, pos); + if (codePoint == NOT_A_CODE_POINT) { + // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is + // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR + // when the PtNode has a single code point. + length = 0; + AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", + *pos - 1, codePoint, buffer[*pos - 1]); + ASSERT(false); + } else if (maxLength > 0) { + outBuffer[0] = codePoint; length = 1; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp new file mode 100644 index 000000000..2e4ec2e1d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" + +#include <cstdio> +#include <cstring> + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" + +namespace latinime { + +const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp"; + +/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath, + const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + switch (dictVersion) { + case 3: + return createEmptyV3DictFile(filePath, attributeMap); + default: + // Only version 3 dictionary is supported for now. + return false; + } +} + +/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); + headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */); + BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { + return false; + } + return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); +} + +/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { + const int tmpFileNameBufSize = strlen(filePath) + + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; + // Name of a temporary file used for writing that is a connected string of original name and + // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. + char tmpFileName[tmpFileNameBufSize]; + snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + FILE *const file = fopen(tmpFileName, "wb"); + if (!file) { + AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); + ASSERT(false); + return false; + } + // Write the dictionary header. + if (!writeBufferToFile(file, dictHeader)) { + remove(tmpFileName); + AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); + ASSERT(false); + return false; + } + // Write the dictionary body. + if (!writeBufferToFile(file, dictBody)) { + remove(tmpFileName); + AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); + ASSERT(false); + return false; + } + fclose(file); + rename(tmpFileName, filePath); + return true; +} + +// This closes file pointer when an error is caused and returns whether the writing was succeeded +// or not. +/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer) { + const int originalBufSize = buffer->getOriginalBufferSize(); + if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */), + originalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); + if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), + additionalBufSize, 1, file) < 1) { + fclose(file); + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h new file mode 100644 index 000000000..bd4ac66fd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICT_FILE_WRITING_UTILS_H +#define LATINIME_DICT_FILE_WRITING_UTILS_H + +#include <cstdio> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" + +namespace latinime { + +class BufferWithExtendableBuffer; + +class DictFileWritingUtils { + public: + static bool createEmptyDictFile(const char *const filePath, const int dictVersion, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool flushAllHeaderAndBodyToFile(const char *const filePath, + BufferWithExtendableBuffer *const dictHeader, + BufferWithExtendableBuffer *const dictBody); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); + + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + + static bool createEmptyV3DictFile(const char *const filePath, + const HeaderReadWriteUtils::AttributeMap *const attributeMap); + + static bool writeBufferToFile(FILE *const file, + const BufferWithExtendableBuffer *const buffer); +}; +} // namespace latinime +#endif /* LATINIME_DICT_FILE_WRITING_UTILS_H */ diff --git a/tests/src/com/android/inputmethod/latin/utils/SpannableStringUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/SpannableStringUtilsTests.java new file mode 100644 index 000000000..fa6ad16c1 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/utils/SpannableStringUtilsTests.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import android.test.AndroidTestCase; +import android.test.suitebuilder.annotation.SmallTest; +import android.text.style.SuggestionSpan; +import android.text.style.URLSpan; +import android.text.SpannableStringBuilder; +import android.text.Spannable; +import android.text.Spanned; + +@SmallTest +public class SpannableStringUtilsTests extends AndroidTestCase { + public void testConcatWithSuggestionSpansOnly() { + SpannableStringBuilder s = new SpannableStringBuilder("test string\ntest string\n" + + "test string\ntest string\ntest string\ntest string\ntest string\ntest string\n" + + "test string\ntest string\n"); + final int N = 10; + for (int i = 0; i < N; ++i) { + // Put a PARAGRAPH-flagged span that should not be found in the result. + s.setSpan(new SuggestionSpan(getContext(), + new String[] {"" + i}, Spannable.SPAN_PARAGRAPH), + i * 12, i * 12 + 12, Spannable.SPAN_PARAGRAPH); + // Put a normal suggestion span that should be found in the result. + s.setSpan(new SuggestionSpan(getContext(), new String[] {"" + i}, 0), i, i * 2, 0); + // Put a URL span than should not be found in the result. + s.setSpan(new URLSpan("http://a"), i, i * 2, 0); + } + + final CharSequence a = s.subSequence(0, 15); + final CharSequence b = s.subSequence(15, s.length()); + final Spanned result = + (Spanned)SpannableStringUtils.concatWithNonParagraphSuggestionSpansOnly(a, b); + + Object[] spans = result.getSpans(0, result.length(), SuggestionSpan.class); + for (int i = 0; i < spans.length; i++) { + final int flags = result.getSpanFlags(spans[i]); + assertEquals("Should not find a span with PARAGRAPH flag", + flags & Spannable.SPAN_PARAGRAPH, 0); + assertTrue("Should be a SuggestionSpan", spans[i] instanceof SuggestionSpan); + } + } +} diff --git a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java index eb9fb984e..4e396a1cf 100644 --- a/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/StringUtilsTests.java @@ -20,11 +20,6 @@ import com.android.inputmethod.latin.settings.SettingsValues; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.SmallTest; -import android.text.style.SuggestionSpan; -import android.text.style.URLSpan; -import android.text.SpannableStringBuilder; -import android.text.Spannable; -import android.text.Spanned; import java.util.Arrays; import java.util.List; @@ -285,34 +280,4 @@ public class StringUtilsTests extends AndroidTestCase { assertEquals(objs[i], newObjArray.get(i)); } } - - public void testConcatWithSuggestionSpansOnly() { - SpannableStringBuilder s = new SpannableStringBuilder("test string\ntest string\n" - + "test string\ntest string\ntest string\ntest string\ntest string\ntest string\n" - + "test string\ntest string\n"); - final int N = 10; - for (int i = 0; i < N; ++i) { - // Put a PARAGRAPH-flagged span that should not be found in the result. - s.setSpan(new SuggestionSpan(getContext(), - new String[] {"" + i}, Spannable.SPAN_PARAGRAPH), - i * 12, i * 12 + 12, Spannable.SPAN_PARAGRAPH); - // Put a normal suggestion span that should be found in the result. - s.setSpan(new SuggestionSpan(getContext(), new String[] {"" + i}, 0), i, i * 2, 0); - // Put a URL span than should not be found in the result. - s.setSpan(new URLSpan("http://a"), i, i * 2, 0); - } - - final CharSequence a = s.subSequence(0, 15); - final CharSequence b = s.subSequence(15, s.length()); - final Spanned result = - (Spanned)StringUtils.concatWithNonParagraphSuggestionSpansOnly(a, b); - - Object[] spans = result.getSpans(0, result.length(), SuggestionSpan.class); - for (int i = 0; i < spans.length; i++) { - final int flags = result.getSpanFlags(spans[i]); - assertEquals("Should not find a span with PARAGRAPH flag", - flags & Spannable.SPAN_PARAGRAPH, 0); - assertTrue("Should be a SuggestionSpan", spans[i] instanceof SuggestionSpan); - } - } } |