aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-08-27 11:22:37 +0000
committerAndroid Git Automerger <android-git-automerger@android.com>2014-08-27 11:22:37 +0000
commit276ce6c0fd232c56bcb795cd0223ec26de48901b (patch)
tree17c6a749b86620859fc798a65031c605238bbffa
parent445f5536b6f8a77131cc12a467d040f0f85337b3 (diff)
parent758d09364457b9d3d0c514a7fcfc8a6e317c9222 (diff)
downloadlatinime-276ce6c0fd232c56bcb795cd0223ec26de48901b.tar.gz
latinime-276ce6c0fd232c56bcb795cd0223ec26de48901b.tar.xz
latinime-276ce6c0fd232c56bcb795cd0223ec26de48901b.zip
am 758d0936: Get entry count after truncation using LanguageModelDictContent.
* commit '758d09364457b9d3d0c514a7fcfc8a6e317c9222': Get entry count after truncation using LanguageModelDictContent.
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp13
3 files changed, 26 insertions, 9 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
index eb2b1ec3e..d5749e9eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp
@@ -23,6 +23,9 @@
namespace latinime {
+const int LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE = 0;
+const int LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE = 1;
+
bool LanguageModelDictContent::save(FILE *const file) const {
return mTrieMap.save(file);
}
@@ -78,12 +81,15 @@ LanguageModelDictContent::EntryRange LanguageModelDictContent::getProbabilityEnt
}
bool LanguageModelDictContent::truncateEntries(const int *const entryCounts,
- const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy) {
+ const int *const maxEntryCounts, const HeaderPolicy *const headerPolicy,
+ int *const outEntryCounts) {
for (int i = 0; i <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++i) {
if (entryCounts[i] <= maxEntryCounts[i]) {
+ outEntryCounts[i] = entryCounts[i];
continue;
}
- if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i)) {
+ if (!turncateEntriesInSpecifiedLevel(headerPolicy, maxEntryCounts[i], i,
+ &outEntryCounts[i])) {
return false;
}
}
@@ -185,7 +191,8 @@ bool LanguageModelDictContent::updateAllProbabilityEntriesInner(const int bitmap
}
bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
- const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel) {
+ const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel,
+ int *const outEntryCount) {
std::vector<int> prevWordIds;
std::vector<EntryInfoToTurncate> entryInfoVector;
if (!getEntryInfo(headerPolicy, targetLevel, mTrieMap.getRootBitmapEntryIndex(),
@@ -193,8 +200,10 @@ bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
return false;
}
if (static_cast<int>(entryInfoVector.size()) <= maxEntryCount) {
+ *outEntryCount = static_cast<int>(entryInfoVector.size());
return true;
}
+ *outEntryCount = maxEntryCount;
const int entryCountToRemove = static_cast<int>(entryInfoVector.size()) - maxEntryCount;
std::partial_sort(entryInfoVector.begin(), entryInfoVector.begin() + entryCountToRemove,
entryInfoVector.end(),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
index 961637679..aa612e35a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h
@@ -39,6 +39,9 @@ class HeaderPolicy;
*/
class LanguageModelDictContent {
public:
+ static const int UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE;
+ static const int BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE;
+
// Pair of word id and probability entry used for iteration.
class WordIdAndProbabilityEntry {
public:
@@ -158,7 +161,7 @@ class LanguageModelDictContent {
// entryCounts should be created by updateAllProbabilityEntries.
bool truncateEntries(const int *const entryCounts, const int *const maxEntryCounts,
- const HeaderPolicy *const headerPolicy);
+ const HeaderPolicy *const headerPolicy, int *const outEntryCounts);
private:
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
@@ -197,7 +200,7 @@ class LanguageModelDictContent {
bool updateAllProbabilityEntriesInner(const int bitmapEntryIndex, const int level,
const HeaderPolicy *const headerPolicy, int *const outEntryCounts);
bool turncateEntriesInSpecifiedLevel(const HeaderPolicy *const headerPolicy,
- const int maxEntryCount, const int targetLevel);
+ const int maxEntryCount, const int targetLevel, int *const outEntryCount);
bool getEntryInfo(const HeaderPolicy *const headerPolicy, const int targetLevel,
const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
std::vector<EntryInfoToTurncate> *const outEntryInfo) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index d53575aa7..1986f4f0e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -93,14 +93,16 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
}
if (headerPolicy->isDecayingDict()) {
int maxEntryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
- maxEntryCountTable[0] = headerPolicy->getMaxUnigramCount();
- maxEntryCountTable[1] = headerPolicy->getMaxBigramCount();
+ maxEntryCountTable[LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE] =
+ headerPolicy->getMaxUnigramCount();
+ maxEntryCountTable[LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE] =
+ headerPolicy->getMaxBigramCount();
for (size_t i = 2; i < NELEMS(maxEntryCountTable); ++i) {
// TODO: Have max n-gram count.
maxEntryCountTable[i] = headerPolicy->getMaxBigramCount();
}
if (!mBuffers->getMutableLanguageModelDictContent()->truncateEntries(entryCountTable,
- maxEntryCountTable, headerPolicy)) {
+ maxEntryCountTable, headerPolicy, entryCountTable)) {
AKLOGE("Failed to truncate entries in language model dict content.");
return false;
}
@@ -204,7 +206,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
return false;
}
- *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ *outUnigramCount =
+ entryCountTable[LanguageModelDictContent::UNIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE];
+ *outBigramCount =
+ entryCountTable[LanguageModelDictContent::BIGRAM_COUNT_INDEX_IN_ENTRY_COUNT_TABLE];
return true;
}