aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp
blob: 942a74238c287758ba5b2d8b2f365f379a86d15d (about) (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
 * Copyright (C) 2013, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"

#include "suggest/policyimpl/dictionary/utils/probability_utils.h"

namespace latinime {

const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;

const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;

/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
        const int encodedBigramProbabilityDelta) {
    if (encodedUnigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
        const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
                encodedUnigramProbability));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
    } else {
        const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
                decodeUnigramProbability(encodedUnigramProbability),
                decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
    }
}

/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
        const int newProbability) {
    if (originalEncodedProbability == NOT_A_PROBABILITY) {
        // The unigram is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The unigram is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
        return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
    }
}

/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
    return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
}

/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
    return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
}

/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
        const int originalEncodedProbabilityDelta, const int newProbability) {
    if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
        // The bigram relation is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The bigram target is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
        }
        return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
                MAX_BIGRAM_PROBABILITY_DELTA);
    }
}

/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
    return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
}

/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
    return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
}

/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
    const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
    if (probability < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probability, MAX_UNIGRAM_PROBABILITY);
    }
}

/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
    const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
    if (probabilityDelta < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
    }
}

/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
    return rawProbability;
}

} // namespace latinime