diff options
author | 2014-10-06 22:03:11 +0900 | |
---|---|---|
committer | 2014-10-06 22:03:11 +0900 | |
commit | aae1a062eb98e7634fb4da996ec604bbefd4ddf5 (patch) | |
tree | 26ef38496cd7e4658e442f8b7859404f9dbf75d4 /native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h | |
parent | 36c4eaadfb7a9cb833c8cca8afb49b2aec347b1d (diff) | |
download | latinime-aae1a062eb98e7634fb4da996ec604bbefd4ddf5.tar.gz latinime-aae1a062eb98e7634fb4da996ec604bbefd4ddf5.tar.xz latinime-aae1a062eb98e7634fb4da996ec604bbefd4ddf5.zip |
Improve bigram probability computation for decaying dicts.
Without personalization:
Total words: 1079345, Success Num: 819749, Success Percentage: 75.949%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1754, Bad Failure Percentage: 0.163%
Failures, with auto-correction (F-C): 28463, F-C Percentage: 2.637%
Max Keystrokes: 6074285, Min Keystrokes: 4649326, Keystroke Saving Percentage:23.459%
With current probability computing logic:
Total words: 1079382, Success Num: 838329, Success Percentage: 77.667%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1332, Bad Failure Percentage: 0.123%
Failures, with auto-correction (F-C): 28558, F-C Percentage: 2.646%
Max Keystrokes: 6074503, Min Keystrokes: 4474102, Keystroke Saving Percentage:26.346%
Remove isof files.
With new probability computing logic:
Total words: 1079356, Success Num: 844954, Success Percentage: 78.283%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1306, Bad Failure Percentage: 0.121%
Failures, with auto-correction (F-C): 27214, F-C Percentage: 2.521%
Max Keystrokes: 6074477, Min Keystrokes: 4243021, Keystroke Saving Percentage:30.150%
Remove isof files.
Bug: 16547409
Change-Id: I3d2a49c7aaa2c0f6835c52ef72d22466ee225789
Diffstat (limited to 'native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h')
-rw-r--r-- | native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h | 31 |
1 files changed, 1 insertions, 30 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index daf40d4f9..bc8eaded3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -53,15 +53,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), - mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, - DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), - mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, - DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( @@ -86,15 +80,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), - mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, - DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), - mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( - &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, - DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( @@ -113,12 +101,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount), mExtendedRegionSize(headerPolicy->mExtendedRegionSize), mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), - mForgettingCurveOccurrencesToLevelUp( - headerPolicy->mForgettingCurveOccurrencesToLevelUp), mForgettingCurveProbabilityValuesTableId( headerPolicy->mForgettingCurveProbabilityValuesTableId), - mForgettingCurveDurationToLevelDown( - headerPolicy->mForgettingCurveDurationToLevelDown), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} @@ -130,8 +114,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), - mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), - mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0), + mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} @@ -217,18 +200,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return &mAttributeMap; } - AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const { - return mForgettingCurveOccurrencesToLevelUp; - } - AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const { return mForgettingCurveProbabilityValuesTableId; } - AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const { - return mForgettingCurveDurationToLevelDown; - } - AK_FORCE_INLINE int getMaxUnigramCount() const { return mMaxUnigramCount; } @@ -280,9 +255,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const MAX_BIGRAM_COUNT_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; - static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP; static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; - static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS; static const int DEFAULT_MAX_UNIGRAM_COUNT; static const int DEFAULT_MAX_BIGRAM_COUNT; @@ -300,9 +273,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mBigramCount; const int mExtendedRegionSize; const bool mHasHistoricalInfoOfWords; - const int mForgettingCurveOccurrencesToLevelUp; const int mForgettingCurveProbabilityValuesTableId; - const int mForgettingCurveDurationToLevelDown; const int mMaxUnigramCount; const int mMaxBigramCount; const int *const mCodePointTable; |