diff options
17 files changed, 254 insertions, 141 deletions
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java index 566184244..181ad17ea 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java @@ -286,7 +286,8 @@ final public class BinaryDictionaryGetter { } if (!dictPackSettings.isWordListActive(wordListId)) continue; if (canUse) { - fileList.add(AssetFileAddress.makeFromFileName(f.getPath())); + final AssetFileAddress afa = AssetFileAddress.makeFromFileName(f.getPath()); + if (null != afa) fileList.add(afa); } else { Log.e(TAG, "Found a cached dictionary file but cannot read or use it"); } diff --git a/java/src/com/android/inputmethod/latin/Constants.java b/java/src/com/android/inputmethod/latin/Constants.java index 58cae7dd5..28d9e8652 100644 --- a/java/src/com/android/inputmethod/latin/Constants.java +++ b/java/src/com/android/inputmethod/latin/Constants.java @@ -169,9 +169,7 @@ public final class Constants { public static final int CODE_EXCLAMATION_MARK = '!'; public static final int CODE_SLASH = '/'; public static final int CODE_COMMERCIAL_AT = '@'; - // TODO: Check how this should work for right-to-left languages. It seems to stand - // that for rtl languages, a closing parenthesis is a left parenthesis. Is this - // managed by the font? Or is it a different char? + public static final int CODE_PLUS = '+'; public static final int CODE_CLOSING_PARENTHESIS = ')'; public static final int CODE_CLOSING_SQUARE_BRACKET = ']'; public static final int CODE_CLOSING_CURLY_BRACKET = '}'; diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index bfb904422..9f779eb43 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1394,7 +1394,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen || codePoint == Constants.CODE_CLOSING_PARENTHESIS || codePoint == Constants.CODE_CLOSING_SQUARE_BRACKET || codePoint == Constants.CODE_CLOSING_CURLY_BRACKET - || codePoint == Constants.CODE_CLOSING_ANGLE_BRACKET; + || codePoint == Constants.CODE_CLOSING_ANGLE_BRACKET + || codePoint == Constants.CODE_PLUS; } // Callback for the {@link SuggestionStripView}, to call when the "add to dictionary" hint is diff --git a/native/jni/Android.mk b/native/jni/Android.mk index d83bdadfa..ee93b4248 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -70,7 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \ bigram/bigram_list_read_write_utils.cpp \ bigram/dynamic_bigram_list_policy.cpp \ header/header_policy.cpp \ - header/header_reading_utils.cpp \ + header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ dictionary_structure_with_buffer_policy_factory.cpp \ dynamic_patricia_trie_node_reader.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 2198a13c9..f91dd0e56 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -250,7 +250,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - writingHelper.writeToDictFile(filePath, mBuffer->getBuffer(), mHeaderPolicy.getSize()); + writingHelper.writeToDictFile(filePath, &mHeaderPolicy); } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 2cbb0ff3b..ebe1f3212 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -33,7 +33,7 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a67c0d94a..31178fb5c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" @@ -137,7 +138,11 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, - const uint8_t *const headerBuf, const int headerSize) { + const HeaderPolicy *const headerPolicy) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { + return; + } const int tmpFileNameBufSize = strlen(fileName) + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1; char tmpFileName[tmpFileNameBufSize]; @@ -148,7 +153,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam return; } // Write header. - if (fwrite(headerBuf, headerSize, 1, file) < 1) { + if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */), + headerBuffer.getTailPosition(), 1, file) < 1) { fclose(file); remove(tmpFileName); return; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index faf7a4e1b..219ea9857 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -28,6 +28,7 @@ class DynamicBigramListPolicy; class DynamicPatriciaTrieNodeReader; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; +class HeaderPolicy; class DynamicPatriciaTrieWritingHelper { public: @@ -48,8 +49,9 @@ class DynamicPatriciaTrieWritingHelper { // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); - void writeToDictFile(const char *const fileName, const uint8_t *const headerBuf, - const int headerSize); + void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy); + + void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 196da5c97..47ace23a1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -17,6 +17,8 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include <cstddef> +#include <cstdio> +#include <ctime> namespace latinime { @@ -36,7 +38,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } std::vector<int> keyCodePointVector; insertCharactersIntoVector(key, &keyCodePointVector); - HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); + HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. outValue[0] = '?'; @@ -85,7 +87,7 @@ int HeaderPolicy::readLastUpdatedTime() const { bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector<int> keyVector; insertCharactersIntoVector(key, &keyVector); - HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); + HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); if (it == mAttributeMap.end()) { // The key was not found. return false; @@ -94,10 +96,56 @@ bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outV return true; } -/* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes( - const uint8_t *const dictBuf) { - HeaderReadingUtils::AttributeMap attributeMap; - HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); +bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const bool updatesLastUpdatedTime) const { + int writingPos = 0; + if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, + &writingPos)) { + return false; + } + if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags, + &writingPos)) { + return false; + } + // Temporarily writes a dummy header size. + int headerSizeFieldPos = writingPos; + if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */, + &writingPos)) { + return false; + } + if (updatesLastUpdatedTime) { + // Set current time as a last updated time. + HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); + std::vector<int> updatedTimekey; + insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + const time_t currentTime = time(NULL); + std::vector<int> updatedTimeValue; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); + insertCharactersIntoVector(charBuf, &updatedTimeValue); + attributeMapTowrite[updatedTimekey] = updatedTimeValue; + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, + &writingPos)) { + return false; + } + } else { + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap, + &writingPos)) { + return false; + } + } + // Writes an actual header size. + if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos, + &headerSizeFieldPos)) { + return false; + } + return true; +} + +/* static */ HeaderReadWriteUtils::AttributeMap + HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) { + HeaderReadWriteUtils::AttributeMap attributeMap; + HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); return attributeMap; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 930b475c7..6b396f3f2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -22,15 +22,18 @@ #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" -#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf) - : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), - mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), + explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictBuf(dictBuf), + mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), + mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), @@ -43,16 +46,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { } AK_FORCE_INLINE bool supportsDynamicUpdate() const { - return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); + return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags); } AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags); } AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { - return HeaderReadingUtils::requiresFrenchLigatureProcessing( - mDictionaryFlags); + return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags); } AK_FORCE_INLINE float getMultiWordCostMultiplier() const { @@ -70,6 +72,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; + bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const bool updatesLastUpdatedTime) const; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); @@ -80,9 +85,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; const uint8_t *const mDictBuf; - const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; + const FormatUtils::FORMAT_VERSION mDictFormatVersion; + const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; - HeaderReadingUtils::AttributeMap mAttributeMap; + HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; const bool mUsesForgettingCurve; const int mLastUpdatedTime; @@ -95,7 +101,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes( + static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); static int parseIntAttributeValue(const std::vector<int> *const attributeValue); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp new file mode 100644 index 000000000..80fe88671 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" + +#include <vector> + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +namespace latinime { + +const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; +const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; + +const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4; +const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; +const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; +const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; + +const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; +// Flags for special processing +// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or +// something very bad (like, the apocalypse) will happen. Please update both at the same time. +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; + +/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { + // See the format of the header in the comment in + // BinaryDictionaryFormatUtils::detectFormatVersion() + return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE + + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); +} + +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) { + return ByteArrayUtils::readUint16(dictBuf, + HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); +} + +/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, + AttributeMap *const headerAttributes) { + const int headerSize = getHeaderSize(dictBuf); + int pos = getHeaderOptionsPosition(); + if (pos == NOT_A_DICT_POS) { + // The header doesn't have header options. + return; + } + int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; + int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; + while (pos < headerSize) { + const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, + MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); + std::vector<int> key; + key.insert(key.end(), keyBuffer, keyBuffer + keyLength); + const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, + MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); + std::vector<int> value; + value.insert(value.end(), valueBuffer, valueBuffer + valueLength); + headerAttributes->insert(AttributeMap::value_type(key, value)); + } +} + +/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion( + BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, + int *const writingPos) { + if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE, + writingPos)) { + return false; + } + switch (version) { + case FormatUtils::VERSION_2: + // Version 2 dictionary writing is not supported. + return false; + case FormatUtils::VERSION_3: + return buffer->writeUintAndAdvancePosition(3 /* data */, + HEADER_DICTIONARY_VERSION_SIZE, writingPos); + default: + return false; + } +} + +/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags( + BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags, + int *const writingPos) { + return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos); +} + +/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize( + BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) { + return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos); +} + +/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes( + BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, + int *const writingPos) { + for (AttributeMap::const_iterator it = headerAttributes->begin(); + it != headerAttributes->end(); ++it) { + // Write a key. + if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(), + true /* writesTerminator */, writingPos)) { + return false; + } + // Write a value. + if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(), + true /* writesTerminator */, writingPos)) { + return false; + } + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h index 5716198fb..6cce73375 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h @@ -14,18 +14,21 @@ * limitations under the License. */ -#ifndef LATINIME_HEADER_READING_UTILS_H -#define LATINIME_HEADER_READING_UTILS_H +#ifndef LATINIME_HEADER_READ_WRITE_UTILS_H +#define LATINIME_HEADER_READ_WRITE_UTILS_H #include <map> #include <stdint.h> #include <vector> #include "defines.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { -class HeaderReadingUtils { +class BufferWithExtendableBuffer; + +class HeaderReadWriteUtils { public: typedef uint16_t DictionaryFlags; typedef std::map<std::vector<int>, std::vector<int> > AttributeMap; @@ -54,8 +57,20 @@ class HeaderReadingUtils { static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); + static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer, + const FormatUtils::FORMAT_VERSION version, int *const writingPos); + + static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer, + const DictionaryFlags flags, int *const writingPos); + + static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer, + const int size, int *const writingPos); + + static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, + const AttributeMap *const headerAttributes, int *const writingPos); + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); static const int MAX_ATTRIBUTE_KEY_LENGTH; static const int MAX_ATTRIBUTE_VALUE_LENGTH; @@ -75,4 +90,4 @@ class HeaderReadingUtils { static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; }; } -#endif /* LATINIME_HEADER_READING_UTILS_H */ +#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp deleted file mode 100644 index 186c043c1..000000000 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2013, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "suggest/policyimpl/dictionary/header/header_reading_utils.h" - -#include <vector> - -#include "defines.h" -#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" - -namespace latinime { - -const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; -const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; - -const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; -const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; -const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; -const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; - -const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0; -// Flags for special processing -// Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or -// something very bad (like, the apocalypse) will happen. Please update both at the same time. -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; - -/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { - // See the format of the header in the comment in - // BinaryDictionaryFormatUtils::detectFormatVersion() - return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE - + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); -} - -/* static */ HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { - return ByteArrayUtils::readUint16(dictBuf, - HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); -} - -/* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, - AttributeMap *const headerAttributes) { - const int headerSize = getHeaderSize(dictBuf); - int pos = getHeaderOptionsPosition(); - if (pos == NOT_A_DICT_POS) { - // The header doesn't have header options. - return; - } - int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; - int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; - while (pos < headerSize) { - const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, - MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); - std::vector<int> key; - key.insert(key.end(), keyBuffer, keyBuffer + keyLength); - const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, - MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); - std::vector<int> value; - value.insert(value.end(), valueBuffer, valueBuffer + valueLength); - headerAttributes->insert(AttributeMap::value_type(key, value)); - } -} - -} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index cee3e4ab2..697d0159c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -34,7 +34,7 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 3796c7b7b..1d77d5c27 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -20,20 +20,10 @@ namespace latinime { -/** - * Dictionary size - */ -// Any file smaller than this is not a dictionary. -const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4; +const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE; -/** - * Format versions - */ -// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported -// or obsolete dictionary formats. -const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; -// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 -const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; +// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12 +const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; /* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion( const uint8_t *const dict, const int dictSize) { @@ -45,16 +35,10 @@ const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { - case HEADER_VERSION_2_MAGIC_NUMBER: - // Version 2 header are at least 12 bytes long. - // If this header has the version 2 magic number but is less than 12 bytes long, - // then it's an unknown format and we need to avoid confidently reading the next bytes. - if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { - return UNKNOWN_VERSION; - } + case MAGIC_NUMBER: // Version 2 header is as follows: // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE - // Version number (2 bytes) + // Dictionary format version number (2 bytes) // Options (2 bytes) // Header size (4 bytes) : integer, big endian if (ByteArrayUtils::readUint16(dict, 4) == 2) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index f84321577..79ed0de29 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -34,14 +34,16 @@ class FormatUtils { UNKNOWN_VERSION }; + // 32 bit magic number is stored at the beginning of the dictionary header to reject + // unsupported or obsolete dictionary formats. + static const uint32_t MAGIC_NUMBER; + static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); private: DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils); static const int DICTIONARY_MINIMUM_SIZE; - static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; - static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_FORMAT_UTILS_H */ diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index d8105ba38..00d76c990 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -358,8 +358,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertEquals(-1, binaryDictionary.getFrequency("aaa")); - assertEquals(-1, binaryDictionary.getFrequency("abcd")); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); binaryDictionary.addUnigramWord("aaa", probability); binaryDictionary.addUnigramWord("abcd", probability); |