aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2014-08-25 21:07:10 +0900
committerKeisuke Kuroyanagi <ksk@google.com>2014-08-26 12:05:02 +0900
commit8b4409f4b92db9496bc5983ee6b76d8acdcc4216 (patch)
treefe504638ccd3d66759ff2c0759ad7bd7c0820788 /native/jni/src
parent1c14effab5a4f3d4ca9399e03f897bc56392d2d4 (diff)
downloadlatinime-8b4409f4b92db9496bc5983ee6b76d8acdcc4216.tar.gz
latinime-8b4409f4b92db9496bc5983ee6b76d8acdcc4216.tar.xz
latinime-8b4409f4b92db9496bc5983ee6b76d8acdcc4216.zip
Use LanguageModelDictContent in getWordProperty().
Bug: 14425059 Change-Id: Ic230f764ff5570f24ce6ce930023798718f326df
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp71
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h2
2 files changed, 25 insertions, 48 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 461397b2c..b7411d6f9 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -188,18 +188,6 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId());
}
-int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
- if (ptNodePos == NOT_A_DICT_POS) {
- return NOT_A_DICT_POS;
- }
- const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
- if (ptNodeParams.isDeleted()) {
- return NOT_A_DICT_POS;
- }
- return mBuffers->getBigramDictContent()->getBigramListHeadPos(
- ptNodeParams.getTerminalId());
-}
-
bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) {
@@ -480,41 +468,32 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
+ // TODO: Support n-gram.
std::vector<BigramProperty> bigrams;
- const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
- if (bigramListPos != NOT_A_DICT_POS) {
- int bigramWord1CodePoints[MAX_WORD_LENGTH];
- const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
- const TerminalPositionLookupTable *const terminalPositionLookupTable =
- mBuffers->getTerminalPositionLookupTable();
- bool hasNext = true;
- int readingPos = bigramListPos;
- while (hasNext) {
- const BigramEntry bigramEntry =
- bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
- hasNext = bigramEntry.hasNext();
- const int word1TerminalId = bigramEntry.getTargetTerminalId();
- const int word1TerminalPtNodePos =
- terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
- if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
- continue;
- }
- // Word (unigram) probability
- int word1Probability = NOT_A_PROBABILITY;
- const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
- word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
- &word1Probability);
- const std::vector<int> word1(bigramWord1CodePoints,
- bigramWord1CodePoints + codePointCount);
- const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
- const int probability = bigramEntry.hasHistoricalInfo() ?
- ForgettingCurveUtils::decodeProbability(
- bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
- bigramEntry.getProbability();
- bigrams.emplace_back(&word1, probability,
- historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
- historicalInfo->getCount());
- }
+ const int wordId = ptNodeParams.getTerminalId();
+ const WordIdArrayView prevWordIds = WordIdArrayView::fromObject(&wordId);
+ const TerminalPositionLookupTable *const terminalPositionLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ int bigramWord1CodePoints[MAX_WORD_LENGTH];
+ for (const auto entry : mBuffers->getLanguageModelDictContent()->getProbabilityEntries(
+ prevWordIds)) {
+ const int word1TerminalPtNodePos =
+ terminalPositionLookupTable->getTerminalPtNodePosition(entry.getWordId());
+ // Word (unigram) probability
+ int word1Probability = NOT_A_PROBABILITY;
+ const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
+ word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
+ &word1Probability);
+ const std::vector<int> word1(bigramWord1CodePoints,
+ bigramWord1CodePoints + codePointCount);
+ const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
+ const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
+ probabilityEntry.getProbability();
+ bigrams.emplace_back(&word1, probability,
+ historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
+ historicalInfo->getCount());
}
// Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index faad4290d..343c9ad27 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -143,8 +143,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
-
- int getBigramsPositionOfPtNode(const int ptNodePos) const;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H