diff options
Diffstat (limited to 'native')
10 files changed, 336 insertions, 33 deletions
diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 7a732a588..4a89b2b63 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -128,6 +128,7 @@ LATIN_IME_CORE_TEST_FILES := \ suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp \ suggest/policyimpl/dictionary/structure/v4/content/probability_entry_test.cpp \ suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer_test.cpp \ + suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp \ suggest/policyimpl/dictionary/utils/trie_map_test.cpp \ utils/autocorrection_threshold_utils_test.cpp \ utils/int_array_view_test.cpp diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp index bbcea2ee0..ea2d24e67 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp @@ -16,6 +16,11 @@ #include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h" +#include <algorithm> +#include <cstring> + +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + namespace latinime { bool LanguageModelDictContent::save(FILE *const file) const { @@ -66,6 +71,19 @@ bool LanguageModelDictContent::removeNgramProbabilityEntry(const WordIdArrayView return mTrieMap.remove(wordId, bitmapEntryIndex); } +bool LanguageModelDictContent::truncateEntries(const int *const entryCounts, + const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy) { + for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) { + if (entryCounts[i] <= maxEntryCounts[i]) { + continue; + } + if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i)) { + return false; + } + } + return true; +} + bool LanguageModelDictContent::runGCInner( const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, const TrieMap::TrieMapRange trieMapRange, @@ -118,4 +136,129 @@ int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWord return bitmapEntryIndex; } +bool LanguageModelDictContent::updateAllProbabilityEntriesInner(const int bitmapEntryIndex, + const int level, const HeaderPolicy *const headerPolicy, int *const outEntryCounts) { + for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { + if (level > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + AKLOGE("Invalid level. level: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.", + level, MAX_PREV_WORD_COUNT_FOR_N_GRAM); + return false; + } + const ProbabilityEntry probabilityEntry = + ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo); + if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence()) { + const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( + probabilityEntry.getHistoricalInfo(), headerPolicy); + if (ForgettingCurveUtils::needsToKeep(&historicalInfo, headerPolicy)) { + // Update the entry. + const ProbabilityEntry updatedEntry(probabilityEntry.getFlags(), &historicalInfo); + if (!mTrieMap.put(entry.key(), updatedEntry.encode(mHasHistoricalInfo), + bitmapEntryIndex)) { + return false; + } + } else { + // Remove the entry. + if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) { + return false; + } + continue; + } + } + if (!probabilityEntry.representsBeginningOfSentence()) { + outEntryCounts[level] += 1; + } + if (!entry.hasNextLevelMap()) { + continue; + } + if (!updateAllProbabilityEntriesInner(entry.getNextLevelBitmapEntryIndex(), level + 1, + headerPolicy, outEntryCounts)) { + return false; + } + } + return true; +} + +bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel( + const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel) { + std::vector<int> prevWordIds; + std::vector<EntryInfoToTurncate> entryInfoVector; + if (!getEntryInfo(headerPolicy, targetLevel, mTrieMap.getRootBitmapEntryIndex(), + &prevWordIds, &entryInfoVector)) { + return false; + } + if (static_cast<int>(entryInfoVector.size()) <= maxEntryCount) { + return true; + } + const int entryCountToRemove = static_cast<int>(entryInfoVector.size()) - maxEntryCount; + std::partial_sort(entryInfoVector.begin(), entryInfoVector.begin() + entryCountToRemove, + entryInfoVector.end(), + EntryInfoToTurncate::Comparator()); + for (int i = 0; i < entryCountToRemove; ++i) { + const EntryInfoToTurncate &entryInfo = entryInfoVector[i]; + if (!removeNgramProbabilityEntry( + WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mEntryLevel), entryInfo.mKey)) { + return false; + } + } + return true; +} + +bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPolicy, + const int targetLevel, const int bitmapEntryIndex, std::vector<int> *const prevWordIds, + std::vector<EntryInfoToTurncate> *const outEntryInfo) const { + const int currentLevel = prevWordIds->size(); + for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) { + if (currentLevel < targetLevel) { + if (!entry.hasNextLevelMap()) { + continue; + } + prevWordIds->push_back(entry.key()); + if (!getEntryInfo(headerPolicy, targetLevel, entry.getNextLevelBitmapEntryIndex(), + prevWordIds, outEntryInfo)) { + return false; + } + prevWordIds->pop_back(); + continue; + } + const ProbabilityEntry probabilityEntry = + ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo); + const int probability = (mHasHistoricalInfo) ? + ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), + headerPolicy) : probabilityEntry.getProbability(); + outEntryInfo->emplace_back(probability, + probabilityEntry.getHistoricalInfo()->getTimeStamp(), + entry.key(), targetLevel, prevWordIds->data()); + } + return true; +} + +bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()( + const EntryInfoToTurncate &left, const EntryInfoToTurncate &right) const { + if (left.mProbability != right.mProbability) { + return left.mProbability < right.mProbability; + } + if (left.mTimestamp != right.mTimestamp) { + return left.mTimestamp > right.mTimestamp; + } + if (left.mKey != right.mKey) { + return left.mKey < right.mKey; + } + if (left.mEntryLevel != right.mEntryLevel) { + return left.mEntryLevel > right.mEntryLevel; + } + for (int i = 0; i < left.mEntryLevel; ++i) { + if (left.mPrevWordIds[i] != right.mPrevWordIds[i]) { + return left.mPrevWordIds[i] < right.mPrevWordIds[i]; + } + } + // left and rigth represent the same entry. + return false; +} + +LanguageModelDictContent::EntryInfoToTurncate::EntryInfoToTurncate(const int probability, + const int timestamp, const int key, const int entryLevel, const int *const prevWordIds) + : mProbability(probability), mTimestamp(timestamp), mKey(key), mEntryLevel(entryLevel) { + memmove(mPrevWordIds, prevWordIds, mEntryLevel * sizeof(mPrevWordIds[0])); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h index bd07f2f62..43b2aab66 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h @@ -18,6 +18,7 @@ #define LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H #include <cstdio> +#include <vector> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" @@ -29,6 +30,8 @@ namespace latinime { +class HeaderPolicy; + /** * Class representing language model. * @@ -73,9 +76,45 @@ class LanguageModelDictContent { bool removeNgramProbabilityEntry(const WordIdArrayView prevWordIds, const int wordId); + bool updateAllProbabilityEntries(const HeaderPolicy *const headerPolicy, + int *const outEntryCounts) { + for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) { + outEntryCounts[i] = 0; + } + return updateAllProbabilityEntriesInner(mTrieMap.getRootBitmapEntryIndex(), 0 /* level */, + headerPolicy, outEntryCounts); + } + + // entryCounts should be created by updateAllProbabilityEntries. + bool truncateEntries(const int *const entryCounts, const int *const maxEntryCounts, + const HeaderPolicy *const headerPolicy); + private: DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent); + class EntryInfoToTurncate { + public: + class Comparator { + public: + bool operator()(const EntryInfoToTurncate &left, + const EntryInfoToTurncate &right) const; + private: + DISALLOW_ASSIGNMENT_OPERATOR(Comparator); + }; + + EntryInfoToTurncate(const int probability, const int timestamp, const int key, + const int entryLevel, const int *const prevWordIds); + + int mProbability; + int mTimestamp; + int mKey; + int mEntryLevel; + int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(EntryInfoToTurncate); + }; + TrieMap mTrieMap; const bool mHasHistoricalInfo; @@ -84,6 +123,13 @@ class LanguageModelDictContent { int *const outNgramCount); int createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds); int getBitmapEntryIndex(const WordIdArrayView prevWordIds) const; + bool updateAllProbabilityEntriesInner(const int bitmapEntryIndex, const int level, + const HeaderPolicy *const headerPolicy, int *const outEntryCounts); + bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy, + const int maxEntryCount, const int targetLevel); + bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel, + const int bitmapEntryIndex, std::vector<int> *const prevWordIds, + std::vector<EntryInfoToTurncate> *const outEntryInfo) const; }; } // namespace latinime #endif /* LATINIME_LANGUAGE_MODEL_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index fb6840ba6..b7c31bf75 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -161,29 +161,15 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA const ProbabilityEntry originalProbabilityEntry = mBuffers->getLanguageModelDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); - if (originalProbabilityEntry.hasHistoricalInfo()) { - const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( - originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy); - const ProbabilityEntry probabilityEntry(originalProbabilityEntry.getFlags(), - &historicalInfo); - if (!mBuffers->getMutableLanguageModelDictContent()->setProbabilityEntry( - toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { - AKLOGE("Cannot write updated probability entry. terminalId: %d", - toBeUpdatedPtNodeParams->getTerminalId()); - return false; - } - const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy); - if (!isValid) { - if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { - AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); - return false; - } - } - *outNeedsToKeepPtNode = isValid; - } else { - // No need to update probability. + if (originalProbabilityEntry.isValid()) { *outNeedsToKeepPtNode = true; + return true; + } + if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { + AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); + return false; } + *outNeedsToKeepPtNode = false; return true; } @@ -380,6 +366,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( isTerminal, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); } +// TODO: Move probability handling code to LanguageModelDictContent. const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const ProbabilityEntry *const originalProbabilityEntry, const ProbabilityEntry *const probabilityEntry) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 04e3018da..2ea248e86 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -127,21 +127,28 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtN if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } - const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); + const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } if (prevWordsPtNodePos) { - const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); - BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == ptNodePos - && bigramsIt.getProbability() != NOT_A_PROBABILITY) { - return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); - } + // TODO: Support n-gram. + const PtNodeParams prevWordPtNodeParams = + mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordsPtNodePos[0]); + const int prevWordTerminalId = prevWordPtNodeParams.getTerminalId(); + const ProbabilityEntry probabilityEntry = + mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry( + IntArrayView::fromObject(&prevWordTerminalId), + ptNodeParams.getTerminalId()); + if (!probabilityEntry.isValid()) { + return NOT_A_PROBABILITY; + } + if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + return ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), + mHeaderPolicy); + } else { + return probabilityEntry.getProbability(); } - return NOT_A_PROBABILITY; } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 4220312e0..d53575aa7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -85,6 +85,27 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); + int entryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; + if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntries(headerPolicy, + entryCountTable)) { + AKLOGE("Failed to update probabilities in language model dict content."); + return false; + } + if (headerPolicy->isDecayingDict()) { + int maxEntryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; + maxEntryCountTable[0] = headerPolicy->getMaxUnigramCount(); + maxEntryCountTable[1] = headerPolicy->getMaxBigramCount(); + for (size_t i = 2; i < NELEMS(maxEntryCountTable); ++i) { + // TODO: Have max n-gram count. + maxEntryCountTable[i] = headerPolicy->getMaxBigramCount(); + } + if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(entryCountTable, + maxEntryCountTable, headerPolicy)) { + AKLOGE("Failed to truncate entries in language model dict content."); + return false; + } + } + DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPtGcEventListeners @@ -187,6 +208,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return true; } +// TODO: Remove. bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( const Ver4PatriciaTrieNodeReader *const ptNodeReader, Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) { @@ -227,6 +249,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( return true; } +// TODO: Remove. bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) { const TerminalPositionLookupTable *const terminalPosLookupTable = mBuffers->getTerminalPositionLookupTable(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 833063c17..ecbe7922c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -31,7 +31,7 @@ uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) con uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size, int *const pos) const { - const int value = readUint(size, *pos); + const uint32_t value = readUint(size, *pos); *pos += size; return value; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index c0a9fcb1d..4b3c98988 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -114,7 +114,7 @@ class ByteArrayUtils { return buffer[(*pos)++]; } - static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, + static AK_FORCE_INLINE uint32_t readUint(const uint8_t *const buffer, const int size, const int pos) { // size must be in 1 to 4. ASSERT(size >= 1 && size <= 4); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h index 6d91790b2..c2aeac211 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/trie_map.h @@ -84,6 +84,10 @@ class TrieMap { return mValue; } + AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const { + return mNextLevelBitmapEntryIndex; + } + private: const TrieMap *const mTrieMap; const int mKey; diff --git a/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp new file mode 100644 index 000000000..a1c310d8a --- /dev/null +++ b/native/jni/tests/suggest/policyimpl/dictionary/utils/byte_array_utils_test.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +#include <gtest/gtest.h> + +#include <cstdint> + +namespace latinime { +namespace { + +TEST(ByteArrayUtilsTest, TestReadInt) { + const uint8_t buffer[] = { 0x1u, 0x8Au, 0x0u, 0xAAu }; + + EXPECT_EQ(0x01u, ByteArrayUtils::readUint8(buffer, 0)); + EXPECT_EQ(0x8Au, ByteArrayUtils::readUint8(buffer, 1)); + EXPECT_EQ(0x0u, ByteArrayUtils::readUint8(buffer, 2)); + EXPECT_EQ(0xAAu, ByteArrayUtils::readUint8(buffer, 3)); + + EXPECT_EQ(0x018Au, ByteArrayUtils::readUint16(buffer, 0)); + EXPECT_EQ(0x8A00u, ByteArrayUtils::readUint16(buffer, 1)); + EXPECT_EQ(0xAAu, ByteArrayUtils::readUint16(buffer, 2)); + + EXPECT_EQ(0x18A00AAu, ByteArrayUtils::readUint32(buffer, 0)); + + int pos = 0; + EXPECT_EQ(0x18A00, ByteArrayUtils::readSint24AndAdvancePosition(buffer, &pos)); + pos = 1; + EXPECT_EQ(-0xA00AA, ByteArrayUtils::readSint24AndAdvancePosition(buffer, &pos)); +} + +TEST(ByteArrayUtilsTest, TestWriteAndReadInt) { + uint8_t buffer[4]; + + int pos = 0; + const uint8_t data_1B = 0xC8; + ByteArrayUtils::writeUintAndAdvancePosition(buffer, data_1B, 1, &pos); + EXPECT_EQ(data_1B, ByteArrayUtils::readUint(buffer, 1, 0)); + + pos = 0; + const uint32_t data_4B = 0xABCD1234; + ByteArrayUtils::writeUintAndAdvancePosition(buffer, data_4B, 4, &pos); + EXPECT_EQ(data_4B, ByteArrayUtils::readUint(buffer, 4, 0)); +} + +TEST(ByteArrayUtilsTest, TestReadCodePoint) { + const uint8_t buffer[] = { 0x10, 0xFF, 0x00u, 0x20u, 0x41u, 0x1Fu, 0x60 }; + + EXPECT_EQ(0x10FF00, ByteArrayUtils::readCodePoint(buffer, 0)); + EXPECT_EQ(0x20, ByteArrayUtils::readCodePoint(buffer, 3)); + EXPECT_EQ(0x41, ByteArrayUtils::readCodePoint(buffer, 4)); + EXPECT_EQ(NOT_A_CODE_POINT, ByteArrayUtils::readCodePoint(buffer, 5)); + + int pos = 0; + int codePointArray[3]; + EXPECT_EQ(3, ByteArrayUtils::readStringAndAdvancePosition(buffer, MAX_WORD_LENGTH, + codePointArray, &pos)); + EXPECT_EQ(0x10FF00, codePointArray[0]); + EXPECT_EQ(0x20, codePointArray[1]); + EXPECT_EQ(0x41, codePointArray[2]); + EXPECT_EQ(0x60, ByteArrayUtils::readCodePoint(buffer, pos)); +} + +TEST(ByteArrayUtilsTest, TestWriteAndReadCodePoint) { + uint8_t buffer[10]; + + const int codePointArray[] = { 0x10FF00, 0x20, 0x41 }; + int pos = 0; + ByteArrayUtils::writeCodePointsAndAdvancePosition(buffer, codePointArray, 3, + true /* writesTerminator */, &pos); + EXPECT_EQ(0x10FF00, ByteArrayUtils::readCodePoint(buffer, 0)); + EXPECT_EQ(0x20, ByteArrayUtils::readCodePoint(buffer, 3)); + EXPECT_EQ(0x41, ByteArrayUtils::readCodePoint(buffer, 4)); + EXPECT_EQ(NOT_A_CODE_POINT, ByteArrayUtils::readCodePoint(buffer, 5)); +} + +} // namespace +} // namespace latinime |