1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbabilityDelta) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
encodedUnigramProbability));
return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
} else {
const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
decodeUnigramProbability(encodedUnigramProbability),
decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
}
}
/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
const int newProbability) {
if (originalEncodedProbability == NOT_A_PROBABILITY) {
// The unigram is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) {
// The unigram is not in other dictionaries.
return 0;
} else {
return MIN_VALID_UNIGRAM_PROBABILITY;
}
} else {
if (newProbability != NOT_A_PROBABILITY
&& originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
return MIN_VALID_UNIGRAM_PROBABILITY;
}
return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
}
}
/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
}
/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
}
/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
const int originalEncodedProbabilityDelta, const int newProbability) {
if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
// The bigram relation is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) {
// The bigram target is not in other dictionaries.
return 0;
} else {
return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
}
} else {
if (newProbability != NOT_A_PROBABILITY
&& originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
}
return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
MAX_BIGRAM_PROBABILITY_DELTA);
}
}
/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
}
/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
}
/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
if (probability < 0) {
return NOT_A_PROBABILITY;
} else {
return min(probability, MAX_UNIGRAM_PROBABILITY);
}
}
/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
if (probabilityDelta < 0) {
return NOT_A_PROBABILITY;
} else {
return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
}
}
/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
return rawProbability;
}
} // namespace latinime
|