Refactor proximity info state

Change-Id: I30cc0d8f2e48d70e214739a073eabf3a8ea73618
author: Satoshi Kataoka <satok@google.com> 2013-01-22 13:18:40 +0900
committer: Satoshi Kataoka <satok@google.com> 2013-01-22 15:43:05 +0900
commit: d4828d5053ac30476b884c177235be0cac982c92 (patch)
tree: d20dab02035378444d905e300e5e819ddf51308f /native/jni/src/proximity_info_state_utils.cpp
parent: 6c22439bf80da08576e86c1282afc5cfa431e235 (diff)
download: latinime-d4828d5053ac30476b884c177235be0cac982c92.tar.gz
latinime-d4828d5053ac30476b884c177235be0cac982c92.tar.xz
latinime-d4828d5053ac30476b884c177235be0cac982c92.zip
1 files changed, 368 insertions, 0 deletions
diff --git a/native/jni/src/proximity_info_state_utils.cpp b/native/jni/src/proximity_info_state_utils.cpp
index 65a258309..e5567f7a7 100644
--- a/native/jni/src/proximity_info_state_utils.cpp
+++ b/native/jni/src/proximity_info_state_utils.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <sstream> // for debug prints
 #include <vector>
 
 #include "defines.h"
@@ -481,4 +482,371 @@ namespace latinime {
     // TODO: Detect double letter more smartly
     return 0.01f + static_cast<float>(beelineDistance) / static_cast<float>(time) / averageSpeed;
 }
+
+/* static */ float ProximityInfoStateUtils::getPointAngle(
+        const std::vector<int> *const sampledInputXs,
+        const std::vector<int> *const sampledInputYs, const int index) {
+    if (!sampledInputXs || !sampledInputYs) {
+        return 0.0f;
+    }
+    const int sampledInputSize = sampledInputXs->size();
+    if (index <= 0 || index >= sampledInputSize - 1) {
+        return 0.0f;
+    }
+    const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index - 1, index);
+    const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index, index + 1);
+    const float directionDiff = getAngleDiff(previousDirection, nextDirection);
+    return directionDiff;
+}
+
+/* static */ float ProximityInfoStateUtils::getPointsAngle(
+        const std::vector<int> *const sampledInputXs,
+        const std::vector<int> *const sampledInputYs,
+        const int index0, const int index1, const int index2) {
+    if (!sampledInputXs || !sampledInputYs) {
+        return 0.0f;
+    }
+    const int sampledInputSize = sampledInputXs->size();
+    if (index0 < 0 || index0 > sampledInputSize - 1) {
+        return 0.0f;
+    }
+    if (index1 < 0 || index1 > sampledInputSize - 1) {
+        return 0.0f;
+    }
+    if (index2 < 0 || index2 > sampledInputSize - 1) {
+        return 0.0f;
+    }
+    const float previousDirection = getDirection(sampledInputXs, sampledInputYs, index0, index1);
+    const float nextDirection = getDirection(sampledInputXs, sampledInputYs, index1, index2);
+    return getAngleDiff(previousDirection, nextDirection);
+}
+
+// TODO: Remove the "scale" parameter
+// This function basically converts from a length to an edit distance. Accordingly, it's obviously
+// wrong to compare with mMaxPointToKeyLength.
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+        const std::vector<float> *const distanceCache_G, const int keyCount,
+        const int inputIndex, const int keyId, const float scale) {
+    if (keyId != NOT_AN_INDEX) {
+        const int index = inputIndex * keyCount + keyId;
+        return min((*distanceCache_G)[index] * scale, maxPointToKeyLength);
+    }
+    // If the char is not a key on the keyboard then return the max length.
+    return static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+}
+
+/* static */ float ProximityInfoStateUtils::getPointToKeyByIdLength(const float maxPointToKeyLength,
+        const std::vector<float> *const distanceCache_G, const int keyCount,
+        const int inputIndex, const int keyId) {
+    return getPointToKeyByIdLength(maxPointToKeyLength, distanceCache_G, keyCount, inputIndex,
+            keyId, 1.0f);
+}
+
+// Updates probabilities of aligning to some keys and skipping.
+// Word suggestion should be based on this probabilities.
+/* static */ void ProximityInfoStateUtils::updateAlignPointProbabilities(
+        const float maxPointToKeyLength, const int mostCommonKeyWidth, const int keyCount,
+        const int start, const int sampledInputSize, const std::vector<int> *const sampledInputXs,
+        const std::vector<int> *const sampledInputYs,
+        const std::vector<float> *const sampledSpeedRates,
+        const std::vector<int> *const sampledLengthCache,
+        const std::vector<float> *const distanceCache_G,
+        std::vector<NearKeycodesSet> *nearKeysVector,
+        std::vector<hash_map_compat<int, float> > *charProbabilities) {
+    static const float MIN_PROBABILITY = 0.000001f;
+    static const float MAX_SKIP_PROBABILITY = 0.95f;
+    static const float SKIP_FIRST_POINT_PROBABILITY = 0.01f;
+    static const float SKIP_LAST_POINT_PROBABILITY = 0.1f;
+    static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f;
+    static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f;
+    static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f;
+    static const float NEAREST_DISTANCE_WEIGHT = 0.5f;
+    static const float NEAREST_DISTANCE_BIAS = 0.5f;
+    static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f;
+    static const float NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f;
+
+    static const float ANGLE_WEIGHT = 0.90f;
+    static const float DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f;
+    static const float SKIP_DEEP_CORNER_PROBABILITY = 0.1f;
+    static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f;
+    static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f;
+    static const float SKIP_CORNER_PROBABILITY = 0.4f;
+    static const float SPEED_MARGIN = 0.1f;
+    static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f;
+
+    charProbabilities->resize(sampledInputSize);
+    // Calculates probabilities of using a point as a correlated point with the character
+    // for each point.
+    for (int i = start; i < sampledInputSize; ++i) {
+        (*charProbabilities)[i].clear();
+        // First, calculates skip probability. Starts form MIN_SKIP_PROBABILITY.
+        // Note that all values that are multiplied to this probability should be in [0.0, 1.0];
+        float skipProbability = MAX_SKIP_PROBABILITY;
+
+        const float currentAngle = getPointAngle(sampledInputXs, sampledInputYs, i);
+        const float speedRate = (*sampledSpeedRates)[i];
+
+        float nearestKeyDistance = static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+        for (int j = 0; j < keyCount; ++j) {
+            if ((*nearKeysVector)[i].test(j)) {
+                const float distance = getPointToKeyByIdLength(
+                        maxPointToKeyLength, distanceCache_G, keyCount, i, j);
+                if (distance < nearestKeyDistance) {
+                    nearestKeyDistance = distance;
+                }
+            }
+        }
+
+        if (i == 0) {
+            skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT
+                    + NEAREST_DISTANCE_BIAS);
+            // Promote the first point
+            skipProbability *= SKIP_FIRST_POINT_PROBABILITY;
+        } else if (i == sampledInputSize - 1) {
+            skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT_FOR_LAST
+                    + NEAREST_DISTANCE_BIAS_FOR_LAST);
+            // Promote the last point
+            skipProbability *= SKIP_LAST_POINT_PROBABILITY;
+        } else {
+            // If the current speed is relatively slower than adjacent keys, we promote this point.
+            if ((*sampledSpeedRates)[i - 1] - SPEED_MARGIN > speedRate
+                    && speedRate < (*sampledSpeedRates)[i + 1] - SPEED_MARGIN) {
+                if (currentAngle < CORNER_ANGLE_THRESHOLD) {
+                    skipProbability *= min(1.0f, speedRate
+                            * SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
+                } else {
+                    // If the angle is small enough, we promote this point more. (e.g. pit vs put)
+                    skipProbability *= min(1.0f, speedRate * SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+                            + MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
+                }
+            }
+
+            skipProbability *= min(1.0f, speedRate * nearestKeyDistance *
+                    NEAREST_DISTANCE_WEIGHT + NEAREST_DISTANCE_BIAS);
+
+            // Adjusts skip probability by a rate depending on angle.
+            // ANGLE_RATE of skipProbability is adjusted by current angle.
+            skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ANGLE_WEIGHT
+                    + (1.0f - ANGLE_WEIGHT);
+            if (currentAngle > DEEP_CORNER_ANGLE_THRESHOLD) {
+                skipProbability *= SKIP_DEEP_CORNER_PROBABILITY;
+            }
+            // We assume the angle of this point is the angle for point[i], point[i - 2]
+            // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1]
+            // and point[i - 2] is this angle can be more affected by the noise.
+            const float prevAngle = getPointsAngle(sampledInputXs, sampledInputYs, i, i - 2, i - 3);
+            if (i >= 3 && prevAngle < STRAIGHT_ANGLE_THRESHOLD
+                    && currentAngle > CORNER_ANGLE_THRESHOLD) {
+                skipProbability *= SKIP_CORNER_PROBABILITY;
+            }
+        }
+
+        // probabilities must be in [0.0, MAX_SKIP_PROBABILITY];
+        ASSERT(skipProbability >= 0.0f);
+        ASSERT(skipProbability <= MAX_SKIP_PROBABILITY);
+        (*charProbabilities)[i][NOT_AN_INDEX] = skipProbability;
+
+        // Second, calculates key probabilities by dividing the rest probability
+        // (1.0f - skipProbability).
+        const float inputCharProbability = 1.0f - skipProbability;
+
+        // TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine
+        // learning or something would be efficient.
+        static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f;
+        static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f;
+        static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f;
+        static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f;
+        static const float MIN_STANDERD_DIVIATION = 0.37f;
+
+        const float speedxAngleRate = min(speedRate * currentAngle / M_PI_F
+                * SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION,
+                        MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION);
+        const float speedxNearestKeyDistanceRate = min(speedRate * nearestKeyDistance
+                * SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION,
+                        MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION);
+        const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate + MIN_STANDERD_DIVIATION;
+
+        ProximityInfoUtils::NormalDistribution
+                distribution(CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma);
+        static const float PREV_DISTANCE_WEIGHT = 0.5f;
+        static const float NEXT_DISTANCE_WEIGHT = 0.6f;
+        // Summing up probability densities of all near keys.
+        float sumOfProbabilityDensities = 0.0f;
+        for (int j = 0; j < keyCount; ++j) {
+            if ((*nearKeysVector)[i].test(j)) {
+                float distance = sqrtf(getPointToKeyByIdLength(
+                        maxPointToKeyLength, distanceCache_G, keyCount, i, j));
+                if (i == 0 && i != sampledInputSize - 1) {
+                    // For the first point, weighted average of distances from first point and the
+                    // next point to the key is used as a point to key distance.
+                    const float nextDistance = sqrtf(getPointToKeyByIdLength(
+                            maxPointToKeyLength, distanceCache_G, keyCount, i + 1, j));
+                    if (nextDistance < distance) {
+                        // The distance of the first point tends to bigger than continuing
+                        // points because the first touch by the user can be sloppy.
+                        // So we promote the first point if the distance of that point is larger
+                        // than the distance of the next point.
+                        distance = (distance + nextDistance * NEXT_DISTANCE_WEIGHT)
+                                / (1.0f + NEXT_DISTANCE_WEIGHT);
+                    }
+                } else if (i != 0 && i == sampledInputSize - 1) {
+                    // For the first point, weighted average of distances from last point and
+                    // the previous point to the key is used as a point to key distance.
+                    const float previousDistance = sqrtf(getPointToKeyByIdLength(
+                            maxPointToKeyLength, distanceCache_G, keyCount, i - 1, j));
+                    if (previousDistance < distance) {
+                        // The distance of the last point tends to bigger than continuing points
+                        // because the last touch by the user can be sloppy. So we promote the
+                        // last point if the distance of that point is larger than the distance of
+                        // the previous point.
+                        distance = (distance + previousDistance * PREV_DISTANCE_WEIGHT)
+                                / (1.0f + PREV_DISTANCE_WEIGHT);
+                    }
+                }
+                // TODO: Promote the first point when the extended line from the next input is near
+                // from a key. Also, promote the last point as well.
+                sumOfProbabilityDensities += distribution.getProbabilityDensity(distance);
+            }
+        }
+
+        // Split the probability of an input point to keys that are close to the input point.
+        for (int j = 0; j < keyCount; ++j) {
+            if ((*nearKeysVector)[i].test(j)) {
+                float distance = sqrtf(getPointToKeyByIdLength(
+                        maxPointToKeyLength, distanceCache_G, keyCount, i, j));
+                if (i == 0 && i != sampledInputSize - 1) {
+                    // For the first point, weighted average of distances from the first point and
+                    // the next point to the key is used as a point to key distance.
+                    const float prevDistance = sqrtf(getPointToKeyByIdLength(
+                            maxPointToKeyLength, distanceCache_G, keyCount, i + 1, j));
+                    if (prevDistance < distance) {
+                        distance = (distance + prevDistance * NEXT_DISTANCE_WEIGHT)
+                                / (1.0f + NEXT_DISTANCE_WEIGHT);
+                    }
+                } else if (i != 0 && i == sampledInputSize - 1) {
+                    // For the first point, weighted average of distances from last point and
+                    // the previous point to the key is used as a point to key distance.
+                    const float prevDistance = sqrtf(getPointToKeyByIdLength(
+                            maxPointToKeyLength, distanceCache_G, keyCount, i - 1, j));
+                    if (prevDistance < distance) {
+                        distance = (distance + prevDistance * PREV_DISTANCE_WEIGHT)
+                                / (1.0f + PREV_DISTANCE_WEIGHT);
+                    }
+                }
+                const float probabilityDensity = distribution.getProbabilityDensity(distance);
+                const float probability = inputCharProbability * probabilityDensity
+                        / sumOfProbabilityDensities;
+                (*charProbabilities)[i][j] = probability;
+            }
+        }
+    }
+
+
+    if (DEBUG_POINTS_PROBABILITY) {
+        for (int i = 0; i < sampledInputSize; ++i) {
+            std::stringstream sstream;
+            sstream << i << ", ";
+            sstream << "(" << (*sampledInputXs)[i] << ", " << (*sampledInputYs)[i] << "), ";
+            sstream << "Speed: "<< (*sampledSpeedRates)[i] << ", ";
+            sstream << "Angle: "<< getPointAngle(sampledInputXs, sampledInputYs, i) << ", \n";
+
+            for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].begin();
+                    it != (*charProbabilities)[i].end(); ++it) {
+                if (it->first == NOT_AN_INDEX) {
+                    sstream << it->first
+                            << "(skip):"
+                            << it->second
+                            << "\n";
+                } else {
+                    sstream << it->first
+                            << "("
+                            //<< static_cast<char>(mProximityInfo->getCodePointOf(it->first))
+                            << "):"
+                            << it->second
+                            << "\n";
+                }
+            }
+            AKLOGI("%s", sstream.str().c_str());
+        }
+    }
+
+    // Decrease key probabilities of points which don't have the highest probability of that key
+    // among nearby points. Probabilities of the first point and the last point are not suppressed.
+    for (int i = max(start, 1); i < sampledInputSize; ++i) {
+        for (int j = i + 1; j < sampledInputSize; ++j) {
+            if (!suppressCharProbabilities(
+                    mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+                    charProbabilities)) {
+                break;
+            }
+        }
+        for (int j = i - 1; j >= max(start, 0); --j) {
+            if (!suppressCharProbabilities(
+                    mostCommonKeyWidth, sampledInputSize, sampledLengthCache, i, j,
+                    charProbabilities)) {
+                break;
+            }
+        }
+    }
+
+    // Converting from raw probabilities to log probabilities to calculate spatial distance.
+    for (int i = start; i < sampledInputSize; ++i) {
+        for (int j = 0; j < keyCount; ++j) {
+            hash_map_compat<int, float>::iterator it = (*charProbabilities)[i].find(j);
+            if (it == (*charProbabilities)[i].end()){
+                (*nearKeysVector)[i].reset(j);
+            } else if(it->second < MIN_PROBABILITY) {
+                // Erases from near keys vector because it has very low probability.
+                (*nearKeysVector)[i].reset(j);
+                (*charProbabilities)[i].erase(j);
+            } else {
+                it->second = -logf(it->second);
+            }
+        }
+        (*charProbabilities)[i][NOT_AN_INDEX] = -logf((*charProbabilities)[i][NOT_AN_INDEX]);
+    }
+}
+
+// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and
+// increases char probabilities of index1 by checking probabilities of index0.
+/* static */ bool ProximityInfoStateUtils::suppressCharProbabilities(const int mostCommonKeyWidth,
+        const int sampledInputSize, const std::vector<int> *const lengthCache,
+        const int index0, const int index1,
+        std::vector<hash_map_compat<int, float> > *charProbabilities) {
+    ASSERT(0 <= index0 && index0 < sampledInputSize);
+    ASSERT(0 <= index1 && index1 < sampledInputSize);
+
+    static const float SUPPRESSION_LENGTH_WEIGHT = 1.5f;
+    static const float MIN_SUPPRESSION_RATE = 0.1f;
+    static const float SUPPRESSION_WEIGHT = 0.5f;
+    static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f;
+    static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f;
+
+    const float keyWidthFloat = static_cast<float>(mostCommonKeyWidth);
+    const float diff = fabsf(static_cast<float>((*lengthCache)[index0] - (*lengthCache)[index1]));
+    if (diff > keyWidthFloat * SUPPRESSION_LENGTH_WEIGHT) {
+        return false;
+    }
+    const float suppressionRate = MIN_SUPPRESSION_RATE
+            + diff / keyWidthFloat / SUPPRESSION_LENGTH_WEIGHT * SUPPRESSION_WEIGHT;
+    for (hash_map_compat<int, float>::iterator it = (*charProbabilities)[index0].begin();
+            it != (*charProbabilities)[index0].end(); ++it) {
+        hash_map_compat<int, float>::iterator it2 =  (*charProbabilities)[index1].find(it->first);
+        if (it2 != (*charProbabilities)[index1].end() && it->second < it2->second) {
+            const float newProbability = it->second * suppressionRate;
+            const float suppression = it->second - newProbability;
+            it->second = newProbability;
+            // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point.
+            (*charProbabilities)[index0][NOT_AN_INDEX] += suppression;
+
+            // Add the probability of the same key nearby index1
+            const float probabilityGain = min(suppression * SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
+                    (*charProbabilities)[index1][NOT_AN_INDEX]
+                            * SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
+            it2->second += probabilityGain;
+            (*charProbabilities)[index1][NOT_AN_INDEX] -= probabilityGain;
+        }
+    }
+    return true;
+}
 } // namespace latinime
author	Satoshi Kataoka <satok@google.com>	2013-01-22 13:18:40 +0900
committer	Satoshi Kataoka <satok@google.com>	2013-01-22 15:43:05 +0900
commit	d4828d5053ac30476b884c177235be0cac982c92 (patch)
tree	d20dab02035378444d905e300e5e819ddf51308f /native/jni/src/proximity_info_state_utils.cpp
parent	6c22439bf80da08576e86c1282afc5cfa431e235 (diff)
download	latinime-d4828d5053ac30476b884c177235be0cac982c92.tar.gz latinime-d4828d5053ac30476b884c177235be0cac982c92.tar.xz latinime-d4828d5053ac30476b884c177235be0cac982c92.zip