aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src/proximity_info_state_utils.cpp
diff options
context:
space:
mode:
authorSatoshi Kataoka <satok@google.com>2013-01-22 13:18:40 +0900
committerSatoshi Kataoka <satok@google.com>2013-01-22 15:43:05 +0900
commitd4828d5053ac30476b884c177235be0cac982c92 (patch)
treed20dab02035378444d905e300e5e819ddf51308f /native/jni/src/proximity_info_state_utils.cpp
parent6c22439bf80da08576e86c1282afc5cfa431e235 (diff)
downloadlatinime-d4828d5053ac30476b884c177235be0cac982c92.tar.gz
latinime-d4828d5053ac30476b884c177235be0cac982c92.tar.xz
latinime-d4828d5053ac30476b884c177235be0cac982c92.zip
Refactor proximity info state
Change-Id: I30cc0d8f2e48d70e214739a073eabf3a8ea73618
Diffstat (limited to 'native/jni/src/proximity_info_state_utils.cpp')
-rw-r--r--native/jni/src/proximity_info_state_utils.cpp368
1 files changed, 368 insertions, 0 deletions
diff --git a/native/jni/src/proximity_info_state_utils.cpp b/native/jni/src/proximity_info_state_utils.cpp
index 65a258309..e5567f7a7 100644
--- a/native/jni/src/proximity_info_state_utils.cpp
+++ b/native/jni/src/proximity_info_state_utils.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <sstream> // for debug prints
#include <vector>
#include "defines.h"
@@ -481,4 +482,371 @@ namespace latinime {
// TODO: Detect double letter more smartly
return 0.01f + static_cast<float>(beelineDistance) / static_cast<float>(time) / averageSpeed;
}
+
+/* static */ float ProximityInfoStateUtils::getPointAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs, const int index) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index <= 0 || index >= sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index - 1, index);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index, index + 1);
+ const float directionDiff = getAngleDiff(previousDirection, nextDirection);
+ return directionDiff;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointsAngle(
+ const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const int index0, const int index1, const int index2) {
+ if (!sampledInputXs || !sampledInputYs) {
+ return 0.0f;
+ }
+ const int sampledInputSize = sampledInputXs->size();
+ if (index0 < 0 || index0 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ if (index2 < 0 || index2 > sampledInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index0, index1);
+ const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index1, index2);
+ return getAngleDiff(previousDirection, nextDirection);
+}
+
+// TODO: Remove the "scale" parameter
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const distanceCache_G, const int keyCount,
+ const int inputIndex, const int keyId, const float scale) {
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * keyCount + keyId;
+ return min((*distanceCache_G)[index] * scale, maxPointToKeyLength);
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+}
+
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+ const std::vector<float> *const distanceCache_G, const int keyCount,
+ const int inputIndex, const int keyId) {
+ return getPointToKeyByIdLength(maxPointToKeyLength, distanceCache_G, keyCount, inputIndex,
+ keyId, 1.0f);
+}
+
+// Updates probabilities of aligning to some keys and skipping.
+// Word suggestion should be based on this probabilities.
+/* static */ void ProximityInfoStateUtils::updateAlignPointProbabilities(
+ const float maxPointToKeyLength, const int mostCommonKeyWidth, const int keyCount,
+ const int start, const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+ const std::vector<int> *const sampledInputYs,
+ const std::vector<float> *const sampledSpeedRates,
+ const std::vector<int> *const sampledLengthCache,
+ const std::vector<float> *const distanceCache_G,
+ std::vector<NearKeycodesSet> *nearKeysVector,
+ std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ static const float MIN_PROBABILITY = 0.000001f;
+ static const float MAX_SKIP_PROBABILITY = 0.95f;
+ static const float SKIP_FIRST_POINT_PROBABILITY = 0.01f;
+ static const float SKIP_LAST_POINT_PROBABILITY = 0.1f;
+ static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f;
+ static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f;
+ static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f;
+ static const float NEAREST_DISTANCE_WEIGHT = 0.5f;
+ static const float NEAREST_DISTANCE_BIAS = 0.5f;
+ static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f;
+ static const float NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f;
+
+ static const float ANGLE_WEIGHT = 0.90f;
+ static const float DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f;
+ static const float SKIP_DEEP_CORNER_PROBABILITY = 0.1f;
+ static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f;
+ static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f;
+ static const float SKIP_CORNER_PROBABILITY = 0.4f;
+ static const float SPEED_MARGIN = 0.1f;
+ static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f;
+
+ charProbabilities->resize(sampledInputSize);
+ // Calculates probabilities of using a point as a correlated point with the character
+ // for each point.
+ for (int i = start; i < sampledInputSize; ++i) {
+ (*charProbabilities)[i].clear();
+ // First, calculates skip probability. Starts form MIN_SKIP_PROBABILITY.
+ // Note that all values that are multiplied to this probability should be in [0.0, 1.0];
+ float skipProbability = MAX_SKIP_PROBABILITY;
+
+ const float currentAngle = getPointAngle(sampledInputXs, sampledInputYs, i);
+ const float speedRate = (*sampledSpeedRates)[i];
+
+ float nearestKeyDistance = static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*nearKeysVector)[i].test(j)) {
+ const float distance = getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i, j);
+ if (distance < nearestKeyDistance) {
+ nearestKeyDistance = distance;
+ }
+ }
+ }
+
+ if (i == 0) {
+ skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT
+ + NEAREST_DISTANCE_BIAS);
+ // Promote the first point
+ skipProbability *= SKIP_FIRST_POINT_PROBABILITY;
+ } else if (i == sampledInputSize - 1) {
+ skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT_FOR_LAST
+ + NEAREST_DISTANCE_BIAS_FOR_LAST);
+ // Promote the last point
+ skipProbability *= SKIP_LAST_POINT_PROBABILITY;
+ } else {
+ // If the current speed is relatively slower than adjacent keys, we promote this point.
+ if ((*sampledSpeedRates)[i - 1] - SPEED_MARGIN > speedRate
+ && speedRate < (*sampledSpeedRates)[i + 1] - SPEED_MARGIN) {
+ if (currentAngle < CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= min(1.0f, speedRate
+ * SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
+ } else {
+ // If the angle is small enough, we promote this point more. (e.g. pit vs put)
+ skipProbability *= min(1.0f, speedRate * SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+ + MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
+ }
+ }
+
+ skipProbability *= min(1.0f, speedRate * nearestKeyDistance *
+ NEAREST_DISTANCE_WEIGHT + NEAREST_DISTANCE_BIAS);
+
+ // Adjusts skip probability by a rate depending on angle.
+ // ANGLE_RATE of skipProbability is adjusted by current angle.
+ skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ANGLE_WEIGHT
+ + (1.0f - ANGLE_WEIGHT);
+ if (currentAngle > DEEP_CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= SKIP_DEEP_CORNER_PROBABILITY;
+ }
+ // We assume the angle of this point is the angle for point[i], point[i - 2]
+ // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1]
+ // and point[i - 2] is this angle can be more affected by the noise.
+ const float prevAngle = getPointsAngle(sampledInputXs, sampledInputYs, i, i - 2, i - 3);
+ if (i >= 3 && prevAngle < STRAIGHT_ANGLE_THRESHOLD
+ && currentAngle > CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= SKIP_CORNER_PROBABILITY;
+ }
+ }
+
+ // probabilities must be in [0.0, MAX_SKIP_PROBABILITY];
+ ASSERT(skipProbability >= 0.0f);
+ ASSERT(skipProbability <= MAX_SKIP_PROBABILITY);
+ (*charProbabilities)[i][NOT_AN_INDEX] = skipProbability;
+
+ // Second, calculates key probabilities by dividing the rest probability
+ // (1.0f - skipProbability).
+ const float inputCharProbability = 1.0f - skipProbability;
+
+ // TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine
+ // learning or something would be efficient.
+ static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f;
+ static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f;
+ static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f;
+ static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f;
+ static const float MIN_STANDERD_DIVIATION = 0.37f;
+
+ const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F
+ * SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION,
+ MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION);
+ const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance
+ * SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION,
+ MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION);
+ const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate + MIN_STANDERD_DIVIATION;
+
+ ProximityInfoUtils::NormalDistribution
+ distribution(CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma);
+ static const float PREV_DISTANCE_WEIGHT = 0.5f;
+ static const float NEXT_DISTANCE_WEIGHT = 0.6f;
+ // Summing up probability densities of all near keys.
+ float sumOfProbabilityDensities = 0.0f;
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*nearKeysVector)[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i, j));
+ if (i == 0 && i != sampledInputSize - 1) {
+ // For the first point, weighted average of distances from first point and the
+ // next point to the key is used as a point to key distance.
+ const float nextDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i + 1, j));
+ if (nextDistance < distance) {
+ // The distance of the first point tends to bigger than continuing
+ // points because the first touch by the user can be sloppy.
+ // So we promote the first point if the distance of that point is larger
+ // than the distance of the next point.
+ distance = (distance + nextDistance * NEXT_DISTANCE_WEIGHT)
+ / (1.0f + NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == sampledInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float previousDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i - 1, j));
+ if (previousDistance < distance) {
+ // The distance of the last point tends to bigger than continuing points
+ // because the last touch by the user can be sloppy. So we promote the
+ // last point if the distance of that point is larger than the distance of
+ // the previous point.
+ distance = (distance + previousDistance * PREV_DISTANCE_WEIGHT)
+ / (1.0f + PREV_DISTANCE_WEIGHT);
+ }
+ }
+ // TODO: Promote the first point when the extended line from the next input is near
+ // from a key. Also, promote the last point as well.
+ sumOfProbabilityDensities += distribution.getProbabilityDensity(distance);
+ }
+ }
+
+ // Split the probability of an input point to keys that are close to the input point.
+ for (int j = 0; j < keyCount; ++j) {
+ if ((*nearKeysVector)[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i, j));
+ if (i == 0 && i != sampledInputSize - 1) {
+ // For the first point, weighted average of distances from the first point and
+ // the next point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i + 1, j));
+ if (prevDistance < distance) {
+ distance = (distance + prevDistance * NEXT_DISTANCE_WEIGHT)
+ / (1.0f + NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == sampledInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(
+ maxPointToKeyLength, distanceCache_G, keyCount, i - 1, j));
+ if (prevDistance < distance) {
+ distance = (distance + prevDistance * PREV_DISTANCE_WEIGHT)
+ / (1.0f + PREV_DISTANCE_WEIGHT);
+ }
+ }
+ const float probabilityDensity = distribution.getProbabilityDensity(distance);
+ const float probability = inputCharProbability * probabilityDensity
+ / sumOfProbabilityDensities;
+ (*charProbabilities)[i][j] = probability;
+ }
+ }
+ }
+
+
+ if (DEBUG_POINTS_PROBABILITY) {
+ for (int i = 0; i < sampledInputSize; ++i) {
+ std::stringstream sstream;
+ sstream << i << ", ";
+ sstream << "(" << (*sampledInputXs)[i] << ", " << (*sampledInputYs)[i] << "), ";
+ sstream << "Speed: "<< (*sampledSpeedRates)[i] << ", ";
+ sstream << "Angle: "<< getPointAngle(sampledInputXs, sampledInputYs, i) << ", \n";
+
+ for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].begin();
+ it != (*charProbabilities)[i].end(); ++it) {
+ if (it->first == NOT_AN_INDEX) {
+ sstream << it->first
+ << "(skip):"
+ << it->second
+ << "\n";
+ } else {
+ sstream << it->first
+ << "("
+ //<< static_cast<char>(mProximityInfo->getCodePointOf(it->first))
+ << "):"
+ << it->second
+ << "\n";
+ }
+ }
+ AKLOGI("%s", sstream.str().c_str());
+ }
+ }
+
+ // Decrease key probabilities of points which don't have the highest probability of that key
+ // among nearby points. Probabilities of the first point and the last point are not suppressed.
+ for (int i = max(start, 1); i < sampledInputSize; ++i) {
+ for (int j = i + 1; j < sampledInputSize; ++j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ for (int j = i - 1; j >= max(start, 0); --j) {
+ if (!suppressCharProbabilities(
+ mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+ charProbabilities)) {
+ break;
+ }
+ }
+ }
+
+ // Converting from raw probabilities to log probabilities to calculate spatial distance.
+ for (int i = start; i < sampledInputSize; ++i) {
+ for (int j = 0; j < keyCount; ++j) {
+ hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].find(j);
+ if (it == (*charProbabilities)[i].end()){
+ (*nearKeysVector)[i].reset(j);
+ } else if(it->second < MIN_PROBABILITY) {
+ // Erases from near keys vector because it has very low probability.
+ (*nearKeysVector)[i].reset(j);
+ (*charProbabilities)[i].erase(j);
+ } else {
+ it->second = -logf(it->second);
+ }
+ }
+ (*charProbabilities)[i][NOT_AN_INDEX] = -logf((*charProbabilities)[i][NOT_AN_INDEX]);
+ }
+}
+
+// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and
+// increases char probabilities of index1 by checking probabilities of index0.
+/* static */ bool ProximityInfoStateUtils::suppressCharProbabilities(const int mostCommonKeyWidth,
+ const int sampledInputSize, const std::vector<int> *const lengthCache,
+ const int index0, const int index1,
+ std::vector<hash_map_compat<int, float> > *charProbabilities) {
+ ASSERT(0 <= index0 && index0 < sampledInputSize);
+ ASSERT(0 <= index1 && index1 < sampledInputSize);
+
+ static const float SUPPRESSION_LENGTH_WEIGHT = 1.5f;
+ static const float MIN_SUPPRESSION_RATE = 0.1f;
+ static const float SUPPRESSION_WEIGHT = 0.5f;
+ static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f;
+ static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f;
+
+ const float keyWidthFloat = static_cast<float>(mostCommonKeyWidth);
+ const float diff = fabsf(static_cast<float>((*lengthCache)[index0] - (*lengthCache)[index1]));
+ if (diff > keyWidthFloat * SUPPRESSION_LENGTH_WEIGHT) {
+ return false;
+ }
+ const float suppressionRate = MIN_SUPPRESSION_RATE
+ + diff / keyWidthFloat / SUPPRESSION_LENGTH_WEIGHT * SUPPRESSION_WEIGHT;
+ for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[index0].begin();
+ it != (*charProbabilities)[index0].end(); ++it) {
+ hash_map_compat<int, float>::iterator it2 = (*charProbabilities)[index1].find(it->first);
+ if (it2 != (*charProbabilities)[index1].end() && it->second < it2->second) {
+ const float newProbability = it->second * suppressionRate;
+ const float suppression = it->second - newProbability;
+ it->second = newProbability;
+ // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point.
+ (*charProbabilities)[index0][NOT_AN_INDEX] += suppression;
+
+ // Add the probability of the same key nearby index1
+ const float probabilityGain = min(suppression * SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
+ (*charProbabilities)[index1][NOT_AN_INDEX]
+ * SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
+ it2->second += probabilityGain;
+ (*charProbabilities)[index1][NOT_AN_INDEX] -= probabilityGain;
+ }
+ }
+ return true;
+}
} // namespace latinime