aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/additional_proximity_chars.h2
-rw-r--r--native/jni/src/basechars.cpp194
-rw-r--r--native/jni/src/bigram_dictionary.cpp31
-rw-r--r--native/jni/src/bigram_dictionary.h16
-rw-r--r--native/jni/src/binary_format.h114
-rw-r--r--native/jni/src/char_utils.cpp184
-rw-r--r--native/jni/src/char_utils.h30
-rw-r--r--native/jni/src/correction.cpp155
-rw-r--r--native/jni/src/correction.h159
-rw-r--r--native/jni/src/defines.h59
-rw-r--r--native/jni/src/dic_traverse_wrapper.h1
-rw-r--r--native/jni/src/dictionary.cpp18
-rw-r--r--native/jni/src/dictionary.h16
-rw-r--r--native/jni/src/geometry_utils.h50
-rw-r--r--native/jni/src/gesture/gesture_decoder_wrapper.cpp4
-rw-r--r--native/jni/src/gesture/gesture_decoder_wrapper.h15
-rw-r--r--native/jni/src/gesture/incremental_decoder_interface.h7
-rw-r--r--native/jni/src/gesture/incremental_decoder_wrapper.cpp4
-rw-r--r--native/jni/src/gesture/incremental_decoder_wrapper.h15
-rw-r--r--native/jni/src/proximity_info.cpp26
-rw-r--r--native/jni/src/proximity_info.h2
-rw-r--r--native/jni/src/proximity_info_state.cpp747
-rw-r--r--native/jni/src/proximity_info_state.h149
-rw-r--r--native/jni/src/terminal_attributes.h10
-rw-r--r--native/jni/src/unigram_dictionary.cpp125
-rw-r--r--native/jni/src/unigram_dictionary.h29
-rw-r--r--native/jni/src/words_priority_queue.cpp76
-rw-r--r--native/jni/src/words_priority_queue.h104
-rw-r--r--native/jni/src/words_priority_queue_pool.h5
29 files changed, 1409 insertions, 938 deletions
diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h
index d420c4664..0333c2dbd 100644
--- a/native/jni/src/additional_proximity_chars.h
+++ b/native/jni/src/additional_proximity_chars.h
@@ -39,7 +39,7 @@ class AdditionalProximityChars {
static const int EN_US_ADDITIONAL_U_SIZE = 4;
static const int32_t EN_US_ADDITIONAL_U[];
- static bool isEnLocale(const char *localeStr) {
+ AK_FORCE_INLINE static bool isEnLocale(const char *localeStr) {
const size_t LOCALE_EN_US_SIZE = strlen(LOCALE_EN_US);
return localeStr && strlen(localeStr) >= LOCALE_EN_US_SIZE
&& strncmp(localeStr, LOCALE_EN_US, LOCALE_EN_US_SIZE) == 0;
diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp
deleted file mode 100644
index 379cb6226..000000000
--- a/native/jni/src/basechars.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdint.h>
-
-#include "char_utils.h"
-
-namespace latinime {
-
-/*
- * Table mapping most combined Latin, Greek, and Cyrillic characters
- * to their base characters. If c is in range, BASE_CHARS[c] == c
- * if c is not a combined character, or the base character if it
- * is combined.
- */
-const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
- 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
- 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
- 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
- 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
- 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
- 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
- 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
- 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
- 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
- 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
- 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
- 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
- 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
- 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
- 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
- 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
- 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
- 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
- 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
- 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
- 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
- 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
- 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
- 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
- // Manually changed df to 73
- 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
- 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
- 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
- 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
- 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
- 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
- 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
- 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
- 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
- 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
- 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
- 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
- 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
- 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
- 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
- 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
- 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
- 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
- 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
- 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
- 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
- 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
- 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
- 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
- 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
- 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
- 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
- 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
- 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
- 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
- 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
- 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
- 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
- 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
- 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
- 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
- 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
- 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
- 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
- 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
- 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
- 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
- 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
- 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
- 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
- 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
- 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
- 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
- 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
- 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
- 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
- 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
- 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
- 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
- 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
- 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
- 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
- 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
- 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
- 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
- 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
- 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
- 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
- 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
- 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
- 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
- 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
- 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
- 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
- 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
- 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
- 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
- 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
- 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
- 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
- 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
- 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
- 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
- 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
- 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
- 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
- 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
- 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
- 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
- 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
- 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
- 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
- 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
- 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
- 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
- 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
- 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
- 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
- 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
- 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
- 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
- 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
- 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
- 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
- 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
- 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
- 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
- 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
- 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
- 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
- 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
- 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
- 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
- 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
- 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
- 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
- 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
- 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
- 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
- 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
- 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
- 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
- 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
- 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
- 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
- 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
- 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
- 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
- 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
- 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
- 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
- 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
- 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
- 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
- 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
- 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
- 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
-};
-// generated with:
-// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
-} // namespace latinime
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index dade4f16b..f89dd1615 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength,
BigramDictionary::~BigramDictionary() {
}
-bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency,
- int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const {
+bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq,
+ int *bigramCodePoints, int *outputTypes) const {
word[length] = 0;
if (DEBUG_DICT) {
#ifdef FLAG_DBG
char s[length + 1];
- for (int i = 0; i <= length; i++) s[i] = word[i];
+ for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
#endif
}
@@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
int insertAt = 0;
while (insertAt < MAX_PREDICTIONS) {
if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency
- && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) {
+ && length < Dictionary::wideStrLen(
+ bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
break;
}
insertAt++;
@@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
(MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0]));
bigramFreq[insertAt] = frequency;
outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
- memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH,
- bigramChars + insertAt * MAX_WORD_LENGTH,
- (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH);
- unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH;
+ memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
+ bigramCodePoints + insertAt * MAX_WORD_LENGTH,
+ (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH);
+ int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH;
while (length--) {
*dest++ = *word++;
}
@@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* prevWordLength: its length.
* inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions.
* codesSize: the size of the codes array.
- * bigramChars: an array for output, at the same format as outwords for getSuggestions.
+ * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* bigramFreq: an array to output frequencies.
* outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
@@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
-int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes,
- int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const {
+int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes,
+ int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
@@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
int bigramCount = 0;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- uint16_t bigramBuffer[MAX_WORD_LENGTH];
+ int bigramBuffer[MAX_WORD_LENGTH];
int unigramFreq = 0;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
@@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// here, but it can't get too bad.
const int frequency =
BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
- if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars,
+ if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
outputTypes)) {
++bigramCount;
}
@@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p
} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
-bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const {
+bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
int maxAlt = MAX_ALTERNATIVES;
- const unsigned short firstBaseChar = toBaseLowerCase(*word);
+ const int firstBaseChar = toBaseLowerCase(*word);
while (maxAlt > 0) {
if (toBaseLowerCase(*inputCodes) == firstBaseChar) {
return true;
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 5f11ae822..150192de2 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -27,23 +27,23 @@ namespace latinime {
class BigramDictionary {
public:
BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions);
- int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const;
- void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
+ int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords,
+ int *frequencies, int *outputTypes) const;
+ void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter) const;
- bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
+ bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
- bool addWordBigram(unsigned short *word, int length, int frequency,
- int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const;
+ bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints,
+ int *outputTypes) const;
int getBigramAddress(int *pos, bool advance);
int getBigramFreq(int *pos);
void searchForTerminalNode(int addressLookingFor, int frequency);
bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; }
- bool checkFirstCharacter(unsigned short *word, int *inputCodes) const;
- int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength,
+ bool checkFirstCharacter(int *word, int *inputCodes) const;
+ int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
const unsigned char *DICT;
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index eec52e323..9a8c315f7 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -28,10 +28,6 @@ class BinaryFormat {
public:
// Mask and flags for children address type selection.
static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
- static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
- static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
- static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
- static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
// Flag for single/multiple char group
static const int FLAG_HAS_MULTIPLE_CHARS = 0x20;
@@ -61,36 +57,21 @@ class BinaryFormat {
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
- static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
- const static int UNKNOWN_FORMAT = -1;
- // Originally, format version 1 had a 16-bit magic number, then the version number `01'
- // then options that must be 0. Hence the first 32-bits of the format are always as follow
- // and it's okay to consider them a magic number as a whole.
- const static uint32_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
- const static unsigned int FORMAT_VERSION_1_HEADER_SIZE = 5;
- // The versions of Latin IME that only handle format version 1 only test for the magic
- // number, so we had to change it so that version 2 files would be rejected by older
- // implementations. On this occasion, we made the magic number 32 bits long.
- const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
-
- const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
- const static int SHORTCUT_LIST_SIZE_SIZE = 2;
+ static const int UNKNOWN_FORMAT = -1;
+ static const int SHORTCUT_LIST_SIZE_SIZE = 2;
static int detectFormat(const uint8_t *const dict);
static unsigned int getHeaderSize(const uint8_t *const dict);
static unsigned int getFlags(const uint8_t *const dict);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
- static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
+ static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
static int skipOtherCharacters(const uint8_t *const dict, const int pos);
static int skipChildrenPosition(const uint8_t flags, const int pos);
static int skipFrequency(const uint8_t flags, const int pos);
static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
- static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos);
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
@@ -98,10 +79,10 @@ class BinaryFormat {
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeFrequencyFromFlags(const int flags);
- static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord,
+ static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
- uint16_t *outWord, int *outUnigramFrequency);
+ int *outWord, int *outUnigramFrequency);
static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
static int getProbability(const int position, const std::map<int, int> *bigramMap,
const uint8_t *bigramFilter, const int unigramFreq);
@@ -113,17 +94,37 @@ class BinaryFormat {
REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1,
REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4
};
- const static unsigned int NO_FLAGS = 0;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
- const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
- const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
- const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+ static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
+ static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
+ static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
+ static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
+ static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
+
+ // Originally, format version 1 had a 16-bit magic number, then the version number `01'
+ // then options that must be 0. Hence the first 32-bits of the format are always as follow
+ // and it's okay to consider them a magic number as a whole.
+ static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100;
+ static const unsigned int FORMAT_VERSION_1_HEADER_SIZE = 5;
+ // The versions of Latin IME that only handle format version 1 only test for the magic
+ // number, so we had to change it so that version 2 files would be rejected by older
+ // implementations. On this occasion, we made the magic number 32 bits long.
+ static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
+
+ static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
+ static const int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
+ static const int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F;
+ static const int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2;
+ static const unsigned int NO_FLAGS = 0;
static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos);
+ static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
};
-inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
+AK_FORCE_INLINE int BinaryFormat::detectFormat(const uint8_t *const dict) {
// The magic number is stored big-endian.
const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3];
switch (magicNumber) {
@@ -148,7 +149,7 @@ inline int BinaryFormat::detectFormat(const uint8_t *const dict) {
inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) {
switch (detectFormat(dict)) {
case 1:
- return NO_FLAGS;
+ return NO_FLAGS; // TODO: NO_FLAGS is unused anywhere else?
default:
return (dict[6] << 8) + dict[7];
}
@@ -166,7 +167,8 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) {
}
}
-inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) {
+AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
+ int *pos) {
const int msb = dict[(*pos)++];
if (msb < 0x80) return msb;
return ((msb & 0x7F) << 8) | dict[(*pos)++];
@@ -176,17 +178,18 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
return dict[(*pos)++];
}
-inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) {
+AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict,
+ int *pos) {
const int origin = *pos;
- const int32_t codePoint = dict[origin];
+ const int codePoint = dict[origin];
if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
*pos = origin + 1;
return NOT_A_CODE_POINT;
} else {
*pos = origin + 3;
- const int32_t char_1 = codePoint << 16;
- const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
+ const int char_1 = codePoint << 16;
+ const int char_2 = char_1 + (dict[origin + 1] << 8);
return char_2 + dict[origin + 2];
}
} else {
@@ -200,9 +203,9 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const
return dict[pos];
}
-inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
+AK_FORCE_INLINE int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) {
int currentPos = pos;
- int32_t character = dict[currentPos++];
+ int character = dict[currentPos++];
while (CHARACTER_ARRAY_TERMINATOR != character) {
if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE;
@@ -226,7 +229,7 @@ static inline int attributeAddressSize(const uint8_t flags) {
*/
}
-static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) {
+static AK_FORCE_INLINE int skipExistingBigrams(const uint8_t *const dict, const int pos) {
int currentPos = pos;
uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) {
@@ -255,7 +258,7 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
}
-inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags,
const int pos) {
if (FLAG_HAS_SHORTCUT_TARGETS & flags) {
return pos + shortcutByteSize(dict, pos);
@@ -264,7 +267,7 @@ inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t
}
}
-inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags,
const int pos) {
if (FLAG_HAS_BIGRAMS & flags) {
return skipExistingBigrams(dict, pos);
@@ -273,7 +276,7 @@ inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t fl
}
}
-inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
+AK_FORCE_INLINE int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags,
const int pos) {
// This function skips all attributes: shortcuts and bigrams.
int newPos = pos;
@@ -282,7 +285,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint
return newPos;
}
-inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
+AK_FORCE_INLINE int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
const uint8_t flags, const int pos) {
int currentPos = pos;
currentPos = skipChildrenPosition(flags, currentPos);
@@ -290,8 +293,8 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict,
return currentPos;
}
-inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags,
- const int pos) {
+AK_FORCE_INLINE int BinaryFormat::readChildrenPosition(const uint8_t *const dict,
+ const uint8_t flags, const int pos) {
int offset = 0;
switch (MASK_GROUP_ADDRESS_TYPE & flags) {
case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
@@ -318,7 +321,7 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}
-inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
+AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
const uint8_t flags, int *pos) {
int offset = 0;
const int origin = *pos;
@@ -352,8 +355,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
-inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
- const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) {
+AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
+ const int *const inWord, const int length, const bool forceLowerCaseSearch) {
int pos = 0;
int wordPos = 0;
@@ -362,14 +365,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
// there was no match (or we would have found it).
if (wordPos >= length) return NOT_VALID_WORD;
int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
- const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
+ const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
// If there are no more character groups in this node, it means we could not
// find a matching character for this depth, therefore there is no match.
if (0 >= charGroupCount) return NOT_VALID_WORD;
const int charGroupPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
- int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
if (character == wChar) {
// This is the correct node. Only one character group may start with the same
// char within a node, so either we found our match in this node, or there is
@@ -438,8 +441,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
* outUnigramFrequency: a pointer to an int to write the frequency into.
* Return value : the length of the word, of 0 if the word was not found.
*/
-inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
- const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) {
+AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
+ const int maxDepth, int *outWord, int *outUnigramFrequency) {
int pos = 0;
int wordPos = 0;
@@ -457,13 +460,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
--charGroupCount) {
const int startPos = pos;
const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
- const int32_t character = getCodePointAndForwardPointer(root, &pos);
+ const int character = getCodePointAndForwardPointer(root, &pos);
if (address == startPos) {
// We found the address. Copy the rest of the word in the buffer and return
// the length.
outWord[wordPos] = character;
if (FLAG_HAS_MULTIPLE_CHARS & flags) {
- int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
+ int nextChar = getCodePointAndForwardPointer(root, &pos);
// We count chars in order to avoid infinite loops if the file is broken or
// if there is some other bug
int charCount = maxDepth;
@@ -522,13 +525,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
if (0 != lastCandidateGroupPos) {
const uint8_t lastFlags =
getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
- const int32_t lastChar =
+ const int lastChar =
getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
// We copy all the characters in this group to the buffer
outWord[wordPos] = lastChar;
if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
- int32_t nextChar =
- getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
+ int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
int charCount = maxDepth;
while (-1 != nextChar && --charCount > 0) {
outWord[++wordPos] = nextChar;
diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp
index d0547a982..ff05f68d6 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@@ -15,8 +15,10 @@
*/
#include <cstdlib>
+#include <stdint.h>
#include "char_utils.h"
+#include "defines.h"
namespace latinime {
@@ -33,7 +35,7 @@ struct LatinCapitalSmallPair {
//
// unsigned short c, cc, ccc, ccc2;
// for (c = 0; c < 0xFFFF ; c++) {
-// if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
+// if (c < NELEMS(BASE_CHARS)) {
// cc = BASE_CHARS[c];
// } else {
// cc = c;
@@ -894,9 +896,183 @@ static int compare_pair_capital(const void *a, const void *b) {
unsigned short latin_tolower(const unsigned short c) {
struct LatinCapitalSmallPair *p =
static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP,
- sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]),
- sizeof(SORTED_CHAR_MAP[0]),
- compare_pair_capital));
+ NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital));
return p ? p->small : c;
}
+
+/*
+ * Table mapping most combined Latin, Greek, and Cyrillic characters
+ * to their base characters. If c is in range, BASE_CHARS[c] == c
+ * if c is not a combined character, or the base character if it
+ * is combined.
+ *
+ * Generated with:
+ * cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; \
+ * $base[hex($foo[0])] = hex($foo[5]);} \
+ * for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { \
+ * printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
+ */
+const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+ 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+ 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+ 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+ 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+ 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+ 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
+ 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
+ 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf,
+ 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043,
+ 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
+ 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7,
+ 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
+ // Manually changed df to 73
+ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
+ 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
+ 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
+ 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
+ 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063,
+ 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064,
+ 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065,
+ 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067,
+ 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127,
+ 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069,
+ 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b,
+ 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c,
+ 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e,
+ 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f,
+ 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072,
+ 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073,
+ 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167,
+ 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075,
+ 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079,
+ 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073,
+ 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
+ 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
+ 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
+ 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
+ 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
+ 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055,
+ 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
+ 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
+ 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c,
+ 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049,
+ 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc,
+ 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4,
+ 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067,
+ 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292,
+ 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7,
+ 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8,
+ 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065,
+ 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f,
+ 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075,
+ 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068,
+ 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061,
+ 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f,
+ 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237,
+ 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
+ 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
+ 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
+ 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
+ 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
+ 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
+ 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
+ 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
+ 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
+ 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
+ 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
+ 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
+ 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
+ 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
+ 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
+ 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077,
+ 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
+ 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
+ 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
+ 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
+ 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df,
+ 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7,
+ 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
+ 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
+ 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
+ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
+ 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
+ 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
+ 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
+ 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
+ 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
+ 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
+ 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
+ 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347,
+ 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
+ 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
+ 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
+ 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
+ 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
+ 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377,
+ 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f,
+ 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7,
+ 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9,
+ 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
+ 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
+ 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9,
+ 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
+ 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf,
+ 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7,
+ 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
+ 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
+ 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
+ 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7,
+ 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
+ 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406,
+ 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f,
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
+ 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f,
+ 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
+ 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
+ 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475,
+ 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
+ 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
+ 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
+ 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
+ 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
+ 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
+ 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
+ 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
+ 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
+ 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
+ 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
+ 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435,
+ 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437,
+ 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e,
+ 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443,
+ 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7,
+ 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
+};
} // namespace latinime
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
index 9008e364c..c632b79b8 100644
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@@ -18,22 +18,23 @@
#define LATINIME_CHAR_UTILS_H
#include <cctype>
-#include <stdint.h>
+
+#include "defines.h"
namespace latinime {
-inline static bool isAsciiUpper(unsigned short c) {
+inline static bool isAsciiUpper(int c) {
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
-inline static unsigned short toAsciiLower(unsigned short c) {
+inline static int toAsciiLower(int c) {
return c - 'A' + 'a';
}
-inline static bool isAscii(unsigned short c) {
- return isascii(static_cast<int>(c)) != 0;
+inline static bool isAscii(int c) {
+ return isascii(c) != 0;
}
unsigned short latin_tolower(const unsigned short c);
@@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c);
* if c is not a combined character, or the base character if it
* is combined.
*/
-
static const int BASE_CHARS_SIZE = 0x0500;
-extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE];
+extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
-inline static unsigned short toBaseChar(unsigned short c) {
+inline static int toBaseCodePoint(int c) {
if (c < BASE_CHARS_SIZE) {
- return BASE_CHARS[c];
+ return static_cast<int>(BASE_CHARS[c]);
}
return c;
}
-inline static unsigned short toLowerCase(const unsigned short c) {
+AK_FORCE_INLINE static int toLowerCase(const int c) {
if (isAsciiUpper(c)) {
return toAsciiLower(c);
} else if (isAscii(c)) {
return c;
}
- return latin_tolower(c);
+ return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
}
-inline static unsigned short toBaseLowerCase(const unsigned short c) {
- return toLowerCase(toBaseChar(c));
+AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
+ return toLowerCase(toBaseCodePoint(c));
}
-inline static bool isSkippableChar(const uint16_t character) {
+inline static bool isSkippableCodePoint(const int codePoint) {
// TODO: Do not hardcode here
- return character == '\'' || character == '-';
+ return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
}
} // namespace latinime
diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp
index 524abe9a1..50f33fe23 100644
--- a/native/jni/src/correction.cpp
+++ b/native/jni/src/correction.cpp
@@ -60,29 +60,6 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable,
}
}
-inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input,
- const int inputSize, const unsigned short *output, const int outputLength) {
- // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
- // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
- // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
- // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
- int *const current = editDistanceTable + outputLength * (inputSize + 1);
- const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
- const int *const prevprev =
- outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
- current[0] = outputLength;
- const uint32_t co = toBaseLowerCase(output[outputLength - 1]);
- const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
- for (int i = 1; i <= inputSize; ++i) {
- const uint32_t ci = toBaseLowerCase(input[i - 1]);
- const uint16_t cost = (ci == co) ? 0 : 1;
- current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
- if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
- current[i] = min(current[i], prevprev[i - 2] + 1);
- }
- }
-}
-
inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth,
const int outputLength, const int inputSize) {
if (DEBUG_EDIT_DISTANCE) {
@@ -91,16 +68,6 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD
return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize];
}
-//////////////////////
-// inline functions //
-//////////////////////
-static const char SINGLE_QUOTE = '\'';
-
-inline bool Correction::isSingleQuote(const unsigned short c) {
- const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex);
- return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE);
-}
-
////////////////
// Correction //
////////////////
@@ -162,31 +129,20 @@ bool Correction::sameAsTyped() {
}
int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
- const int wordCount, const bool isSpaceProximity, const unsigned short *word) {
+ const int wordCount, const bool isSpaceProximity, const int *word) {
return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray,
wordCount, this, isSpaceProximity, word);
}
-int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) {
+int Correction::getFinalProbability(const int probability, int **word, int *wordLength) {
return getFinalProbabilityInternal(probability, word, wordLength, mInputSize);
}
-int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputSize) {
+int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
+ const int inputSize) {
return getFinalProbabilityInternal(probability, word, wordLength, inputSize);
}
-int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputSize) {
- const int outputIndex = mTerminalOutputIndex;
- const int inputIndex = mTerminalInputIndex;
- *wordLength = outputIndex + 1;
- *word = mWord;
- int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
- inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
- return finalProbability;
-}
-
bool Correction::initProcessState(const int outputIndex) {
if (mCorrectionStates[outputIndex].mChildCount <= 0) {
return false;
@@ -230,42 +186,6 @@ int Correction::getInputIndex() const {
return mInputIndex;
}
-void Correction::incrementInputIndex() {
- ++mInputIndex;
-}
-
-void Correction::incrementOutputIndex() {
- ++mOutputIndex;
- mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
- mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
- mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
- mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
- mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
-
- mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
- mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
- mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
- mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
- mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
-
- mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
- mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
- mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
-
- mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
-
- mCorrectionStates[mOutputIndex].mMatching = mMatching;
- mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
- mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
- mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
- mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
- mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
-}
-
-void Correction::startToTraverseAllNodes() {
- mNeedsToTraverseAllNodes = true;
-}
-
bool Correction::needsToPrune() const {
// TODO: use edit distance here
return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance
@@ -273,44 +193,15 @@ bool Correction::needsToPrune() const {
|| (!mDoAutoCompletion && (mOutputIndex > mInputSize));
}
-void Correction::addCharToCurrentWord(const int32_t c) {
- mWord[mOutputIndex] = c;
- const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
- calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize,
- mWord, mOutputIndex + 1);
-}
-
-Correction::CorrectionType Correction::processSkipChar(
- const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
- addCharToCurrentWord(c);
- mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
- mTerminalOutputIndex = mOutputIndex;
- if (mNeedsToTraverseAllNodes && isTerminal) {
- incrementOutputIndex();
- return TRAVERSE_ALL_ON_TERMINAL;
- } else {
- incrementOutputIndex();
- return TRAVERSE_ALL_NOT_ON_TERMINAL;
- }
-}
-
-Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
- // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
- mTerminalInputIndex = mInputIndex;
- mTerminalOutputIndex = mOutputIndex;
- return UNRELATED;
-}
-
-inline bool isEquivalentChar(ProximityType type) {
+inline static bool isEquivalentChar(ProximityType type) {
return type == EQUIVALENT_CHAR;
}
-inline bool isProximityCharOrEquivalentChar(ProximityType type) {
+inline static bool isProximityCharOrEquivalentChar(ProximityType type) {
return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR;
}
-Correction::CorrectionType Correction::processCharAndCalcState(
- const int32_t c, const bool isTerminal) {
+Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
if (correctionCount > mMaxErrors) {
return processUnrelatedCorrectionType();
@@ -628,10 +519,10 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
}
-inline static int getQuoteCount(const unsigned short *word, const int length) {
+inline static int getQuoteCount(const int *word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
- if (word[i] == SINGLE_QUOTE) {
+ if (word[i] == KEYCODE_SINGLE_QUOTE) {
++quoteCount;
}
}
@@ -639,7 +530,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) {
}
inline static bool isUpperCase(unsigned short c) {
- return isAsciiUpper(toBaseChar(c));
+ return isAsciiUpper(toBaseCodePoint(c));
}
//////////////////////
@@ -672,7 +563,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
// TODO: use mExcessiveCount
const int matchCount = inputSize - correction->mProximityCount - excessiveCount;
- const unsigned short *word = correction->mWord;
+ const int *word = correction->mWord;
const bool skipped = skippedCount > 0;
const int quoteDiffCount = max(0, getQuoteCount(word, outputLength)
@@ -911,7 +802,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex
/* static */
int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
const int *freqArray, const int *wordLengthArray, const int wordCount,
- const Correction *correction, const bool isSpaceProximity, const unsigned short *word) {
+ const Correction *correction, const bool isSpaceProximity, const int *word) {
const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;
bool firstCapitalizedWordDemotion = false;
@@ -1040,9 +931,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(
}
/* Damerau-Levenshtein distance */
-inline static int editDistanceInternal(
- int *editDistanceTable, const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength) {
+inline static int editDistanceInternal(int *editDistanceTable, const int *before,
+ const int beforeLength, const int *after, const int afterLength) {
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
int *dp = editDistanceTable;
const int li = beforeLength + 1;
@@ -1056,9 +946,9 @@ inline static int editDistanceInternal(
for (int i = 0; i < li - 1; ++i) {
for (int j = 0; j < lo - 1; ++j) {
- const uint32_t ci = toBaseLowerCase(before[i]);
- const uint32_t co = toBaseLowerCase(after[j]);
- const uint16_t cost = (ci == co) ? 0 : 1;
+ const int ci = toBaseLowerCase(before[i]);
+ const int co = toBaseLowerCase(after[j]);
+ const int cost = (ci == co) ? 0 : 1;
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
@@ -1080,8 +970,8 @@ inline static int editDistanceInternal(
return dp[li * lo - 1];
}
-int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength) {
+int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength,
+ const int *after, const int afterLength) {
int table[(beforeLength + 1) * (afterLength + 1)];
return editDistanceInternal(table, before, beforeLength, after, afterLength);
}
@@ -1109,16 +999,15 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before,
// So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2.
/* static */
-float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength,
- const int score) {
+float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength,
+ const int *after, const int afterLength, const int score) {
if (0 == beforeLength || 0 == afterLength) {
return 0;
}
const int distance = editDistance(before, beforeLength, after, afterLength);
int spaceCount = 0;
for (int i = 0; i < afterLength; ++i) {
- if (after[i] == CODE_SPACE) {
+ if (after[i] == KEYCODE_SPACE) {
++spaceCount;
}
}
diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h
index f016d5453..912cd838e 100644
--- a/native/jni/src/correction.h
+++ b/native/jni/src/correction.h
@@ -56,7 +56,8 @@ class Correction {
// No need to initialize it explicitly here.
}
- virtual ~Correction() {}
+ // Non virtual inline destructor -- never inherit this class
+ ~Correction() {}
void resetCorrection();
void initCorrection(
const ProximityInfo *pi, const int inputSize, const int maxWordLength);
@@ -78,14 +79,13 @@ class Correction {
return ++mTotalTraverseCount;
}
- int getFreqForSplitMultipleWords(
- const int *freqArray, const int *wordLengthArray, const int wordCount,
- const bool isSpaceProximity, const unsigned short *word);
- int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
- int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
- int *wordLength, const int inputSize);
+ int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
+ const int wordCount, const bool isSpaceProximity, const int *word);
+ int getFinalProbability(const int probability, int **word, int *wordLength);
+ int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength,
+ const int inputSize);
- CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
+ CorrectionType processCharAndCalcState(const int c, const bool isTerminal);
/////////////////////////
// Tree helper methods
@@ -110,29 +110,28 @@ class Correction {
const int inputSize);
static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
const int wordCount, const Correction *correction, const bool isSpaceProximity,
- const unsigned short *word);
- static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
- const unsigned short *after, const int afterLength, const int score);
- static int editDistance(const unsigned short *before,
- const int beforeLength, const unsigned short *after, const int afterLength);
+ const int *word);
+ static float calcNormalizedScore(const int *before, const int beforeLength,
+ const int *after, const int afterLength, const int score);
+ static int editDistance(const int *before, const int beforeLength, const int *after,
+ const int afterLength);
private:
- static const int CODE_SPACE = ' ';
static const int MAX_INITIAL_SCORE = 255;
};
// proximity info state
- void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
+ void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates) {
mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
}
- const unsigned short *getPrimaryInputWord() const {
+ const int *getPrimaryInputWord() const {
return mProximityInfoState.getPrimaryInputWord();
}
- unsigned short getPrimaryCharAt(const int index) const {
- return mProximityInfoState.getPrimaryCharAt(index);
+ int getPrimaryCodePointAt(const int index) const {
+ return mProximityInfoState.getPrimaryCodePointAt(index);
}
private:
@@ -147,7 +146,7 @@ class Correction {
}
static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
- inline static void multiplyIntCapped(const int multiplier, int *base) {
+ AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) {
const int temp = *base;
if (temp != S_INT_MAX) {
// Branch if multiplier == 2 for the optimization
@@ -170,7 +169,7 @@ class Correction {
}
}
- inline static int powerIntCapped(const int base, const int n) {
+ AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) {
if (n <= 0) return 1;
if (base == 2) {
return n < 31 ? 1 << n : S_INT_MAX;
@@ -181,7 +180,7 @@ class Correction {
}
}
- inline static void multiplyRate(const int rate, int *freq) {
+ AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) {
if (*freq != S_INT_MAX) {
if (*freq > 1000000) {
*freq /= 100;
@@ -215,13 +214,13 @@ class Correction {
inline void incrementInputIndex();
inline void incrementOutputIndex();
inline void startToTraverseAllNodes();
- inline bool isSingleQuote(const unsigned short c);
- inline CorrectionType processSkipChar(
- const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+ inline bool isSingleQuote(const int c);
+ inline CorrectionType processSkipChar(const int c, const bool isTerminal,
+ const bool inputIndexIncremented);
inline CorrectionType processUnrelatedCorrectionType();
- inline void addCharToCurrentWord(const int32_t c);
- inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
- int *wordLength, const int inputSize);
+ inline void addCharToCurrentWord(const int c);
+ inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength,
+ const int inputSize);
static const int TYPED_LETTER_MULTIPLIER = 2;
static const int FULL_WORD_MULTIPLIER = 2;
@@ -241,7 +240,7 @@ class Correction {
uint8_t mTotalTraverseCount;
// The following arrays are state buffer.
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWord[MAX_WORD_LENGTH_INTERNAL];
int mDistances[MAX_WORD_LENGTH_INTERNAL];
// Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
@@ -275,5 +274,109 @@ class Correction {
bool mSkipping;
ProximityInfoState mProximityInfoState;
};
+
+inline void Correction::incrementInputIndex() {
+ ++mInputIndex;
+}
+
+AK_FORCE_INLINE void Correction::incrementOutputIndex() {
+ ++mOutputIndex;
+ mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex;
+ mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount;
+ mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos;
+ mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
+ mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;
+
+ mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
+ mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
+ mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
+ mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount;
+ mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount;
+
+ mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos;
+ mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos;
+ mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos;
+
+ mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded;
+
+ mCorrectionStates[mOutputIndex].mMatching = mMatching;
+ mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
+ mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
+ mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
+ mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
+ mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
+}
+
+inline void Correction::startToTraverseAllNodes() {
+ mNeedsToTraverseAllNodes = true;
+}
+
+inline bool Correction::isSingleQuote(const int c) {
+ const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex);
+ return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE);
+}
+
+AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c,
+ const bool isTerminal, const bool inputIndexIncremented) {
+ addCharToCurrentWord(c);
+ mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+ mTerminalOutputIndex = mOutputIndex;
+ if (mNeedsToTraverseAllNodes && isTerminal) {
+ incrementOutputIndex();
+ return TRAVERSE_ALL_ON_TERMINAL;
+ } else {
+ incrementOutputIndex();
+ return TRAVERSE_ALL_NOT_ON_TERMINAL;
+ }
+}
+
+inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+ // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+ mTerminalInputIndex = mInputIndex;
+ mTerminalOutputIndex = mOutputIndex;
+ return UNRELATED;
+}
+
+AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input,
+ const int inputSize, const int *output, const int outputLength) {
+ // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched.
+ // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j].
+ // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated,
+ // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize].
+ int *const current = editDistanceTable + outputLength * (inputSize + 1);
+ const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1);
+ const int *const prevprev =
+ outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0;
+ current[0] = outputLength;
+ const int co = toBaseLowerCase(output[outputLength - 1]);
+ const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0;
+ for (int i = 1; i <= inputSize; ++i) {
+ const int ci = toBaseLowerCase(input[i - 1]);
+ const uint16_t cost = (ci == co) ? 0 : 1;
+ current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost));
+ if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) {
+ current[i] = min(current[i], prevprev[i - 2] + 1);
+ }
+ }
+}
+
+AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) {
+ mWord[mOutputIndex] = c;
+ const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord();
+ calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord,
+ mOutputIndex + 1);
+}
+
+inline int Correction::getFinalProbabilityInternal(const int probability, int **word,
+ int *wordLength, const int inputSize) {
+ const int outputIndex = mTerminalOutputIndex;
+ const int inputIndex = mTerminalInputIndex;
+ *wordLength = outputIndex + 1;
+ *word = mWord;
+ int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability(
+ inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize);
+ return finalProbability;
+}
+
} // namespace latinime
#endif // LATINIME_CORRECTION_H
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index ea0f0ef70..40bc958d1 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -30,17 +30,15 @@
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \
dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0)
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0)
-#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0)
-// TODO: INTS_TO_CHARS
-#define SHORTS_TO_CHARS(input, length, output) do { \
- shortArrayToCharArray(input, length, output); } while (0)
+#define INTS_TO_CHARS(input, length, output) do { \
+ intArrayToCharArray(input, length, output); } while (0)
-static inline void dumpWordInfo(const unsigned short *word, const int length,
- const int rank, const int frequency) {
+static inline void dumpWordInfo(const int *word, const int length, const int rank,
+ const int frequency) {
static char charBuf[50];
int i = 0;
for (; i < length; ++i) {
- const unsigned short c = word[i];
+ const int c = word[i];
if (c == 0) {
break;
}
@@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length,
}
}
-static inline void dumpResult(
- const unsigned short *outWords, const int *frequencies, const int maxWordCounts,
+static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts,
const int maxWordLength) {
AKLOGI("--- DUMP RESULT ---------");
for (int i = 0; i < maxWordCounts; ++i) {
@@ -63,11 +60,11 @@ static inline void dumpResult(
AKLOGI("-------------------------");
}
-static inline void dumpWord(const unsigned short *word, const int length) {
+static inline void dumpWord(const int *word, const int length) {
static char charBuf[50];
int i = 0;
for (; i < length; ++i) {
- const unsigned short c = word[i];
+ const int c = word[i];
if (c == 0) {
break;
}
@@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) {
}
}
-static inline void dumpWordInt(const int *word, const int length) {
- static char charBuf[50];
-
- for (int i = 0; i < length; ++i) {
- charBuf[i] = word[i];
- }
- charBuf[length] = 0;
- AKLOGI("i[ %s ]", charBuf);
-}
-
-// TODO: Change this to intArrayToCharArray
-static inline void shortArrayToCharArray(
- const unsigned short *input, const int length, char *output) {
+static inline void intArrayToCharArray(const int *input, const int length, char *output) {
int i = 0;
- for (;i < length; ++i) {
- const unsigned short c = input[i];
+ for (; i < length; ++i) {
+ const int c = input[i];
if (c == 0) {
break;
}
@@ -137,11 +122,9 @@ static inline void showStackTrace() {
#define AKLOGI(fmt, ...)
#define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength)
#define DUMP_WORD(word, length)
-#define DUMP_WORD_INT(word, length)
#define ASSERT(success)
#define SHOW_STACK_TRACE
-// TODO: INTS_TO_CHARS
-#define SHORTS_TO_CHARS(input, length, output)
+#define INTS_TO_CHARS(input, length, output)
#endif
#ifdef FLAG_DO_PROFILE
@@ -219,6 +202,8 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#define DEBUG_SAMPLING_POINTS true
+#define DEBUG_POINTS_PROBABILITY true
#ifdef FLAG_FULL_DBG
#define DEBUG_GEO_FULL true
@@ -239,6 +224,8 @@ static inline void prof_out(void) {
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
+#define DEBUG_SAMPLING_POINTS false
+#define DEBUG_POINTS_PROBABILITY false
#define DEBUG_GEO_FULL false
@@ -282,6 +269,8 @@ static inline void prof_out(void) {
#define NOT_A_PROBABILITY (-1)
#define KEYCODE_SPACE ' '
+#define KEYCODE_SINGLE_QUOTE '\''
+#define KEYCODE_HYPHEN_MINUS '-'
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true
@@ -344,8 +333,8 @@ static inline void prof_out(void) {
#define MULTIPLE_WORDS_DEMOTION_RATE 80
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6
-#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35
-#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185
+#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f
+#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f
/* heuristic... This should be changed if we change the unit of the frequency. */
#define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100)
@@ -392,6 +381,14 @@ static inline void prof_out(void) {
template<typename T> inline T min(T a, T b) { return a < b ? a : b; }
template<typename T> inline T max(T a, T b) { return a > b ? a : b; }
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
+
+#ifdef __GNUC__
+#define AK_FORCE_INLINE __attribute__((always_inline)) __inline__
+#else // __GNUC__
+#define AK_FORCE_INLINE inline
+#endif // __GNUC__
+
// The ratio of neutral area radius to sweet spot radius.
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f
diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h
index 292382487..3fe3d5b74 100644
--- a/native/jni/src/dic_traverse_wrapper.h
+++ b/native/jni/src/dic_traverse_wrapper.h
@@ -62,6 +62,5 @@ class DicTraverseWrapper {
void *, const Dictionary *const, const int *, const int);
static void (*sDicTraverseSessionReleaseMethod)(void *);
};
-int register_DicTraverseSession(JNIEnv *env);
} // namespace latinime
#endif // LATINIME_DIC_TRAVERSE_WRAPPER_H
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 2fbe83e86..5fbe0461b 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -30,13 +30,12 @@ namespace latinime {
// TODO: Change the type of all keyCodes to uint32_t
Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust,
- int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords,
- int maxPredictions)
+ int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions)
: mDict(static_cast<unsigned char *>(dict)),
mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)),
mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust),
- mUnigramDictionary(new UnigramDictionary(mOffsetDict, typedLetterMultiplier,
- fullWordMultiplier, maxWordLength, maxWords, BinaryFormat::getFlags(mDict))),
+ mUnigramDictionary(new UnigramDictionary(mOffsetDict, fullWordMultiplier, maxWordLength,
+ maxWords, BinaryFormat::getFlags(mDict))),
mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)),
mGestureDecoder(new GestureDecoderWrapper(maxWordLength, maxWords)) {
if (DEBUG_DICT) {
@@ -55,11 +54,10 @@ Dictionary::~Dictionary() {
}
int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession,
- int *xcoordinates, int *ycoordinates, int *times, int *pointerIds,
- int *codes, int codesSize, int *prevWordChars,
- int prevWordLength, int commitPoint, bool isGesture,
- bool useFullEditDistance, unsigned short *outWords,
- int *frequencies, int *spaceIndices, int *outputTypes) const {
+ int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes,
+ int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
+ bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
+ int *outputTypes) const {
int result = 0;
if (isGesture) {
DicTraverseWrapper::initDicTraverseSession(
@@ -84,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi
}
int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const {
+ int *outWords, int *frequencies, int *outputTypes) const {
if (length <= 0) return 0;
return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
outputTypes);
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index a1358890d..2ca00ab63 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -41,17 +41,17 @@ class Dictionary {
const static int KIND_SHORTCUT = 7; // A shortcut
const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input)
- Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler,
- int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions);
+ Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int fullWordMultiplier,
+ int maxWordLength, int maxWords, int maxPredictions);
int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates,
int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize,
int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture,
- bool useFullEditDistance, unsigned short *outWords,
- int *frequencies, int *spaceIndices, int *outputTypes) const;
+ bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices,
+ int *outputTypes) const;
- int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
- unsigned short *outWords, int *frequencies, int *outputTypes) const;
+ int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords,
+ int *frequencies, int *outputTypes) const;
int getFrequency(const int32_t *word, int length) const;
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const;
@@ -68,7 +68,7 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
- static int wideStrLen(unsigned short *str);
+ static int wideStrLen(int *str);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
@@ -88,7 +88,7 @@ class Dictionary {
// public static utility methods
// static inline methods should be defined in the header file
-inline int Dictionary::wideStrLen(unsigned short *str) {
+inline int Dictionary::wideStrLen(int *str) {
if (!str) return 0;
int length = 0;
while (*str) {
diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h
index 31359e19d..38b91cc50 100644
--- a/native/jni/src/geometry_utils.h
+++ b/native/jni/src/geometry_utils.h
@@ -19,38 +19,46 @@
#include <cmath>
+#include "defines.h"
+
#define DEBUG_DECODER false
#define M_PI_F 3.14159265f
#define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \
? (floorf((f) * 10000.0f) / 10000.0f) : (f)
-#define SQUARE_FLOAT(x) ((x) * (x))
namespace latinime {
-static inline float getSquaredDistanceFloat(float x1, float y1, float x2, float y2) {
- const float deltaX = x1 - x2;
- const float deltaY = y1 - y2;
- return SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY);
+static inline float SQUARE_FLOAT(const float x) { return x * x; }
+
+static inline float getSquaredDistanceFloat(const float x1, const float y1, const float x2,
+ const float y2) {
+ return SQUARE_FLOAT(x1 - x2) + SQUARE_FLOAT(y1 - y2);
+}
+
+static inline float getNormalizedSquaredDistanceFloat(const float x1, const float y1,
+ const float x2, const float y2, const float scale) {
+ return getSquaredDistanceFloat(x1, y1, x2, y2) / SQUARE_FLOAT(scale);
}
-static inline float getDistanceFloat(float x1, float y1, float x2, float y2) {
+static inline float getDistanceFloat(const float x1, const float y1, const float x2,
+ const float y2) {
return hypotf(x1 - x2, y1 - y2);
}
-static inline int getDistanceInt(int x1, int y1, int x2, int y2) {
+static AK_FORCE_INLINE int getDistanceInt(const int x1, const int y1, const int x2, const int y2) {
return static_cast<int>(getDistanceFloat(static_cast<float>(x1), static_cast<float>(y1),
static_cast<float>(x2), static_cast<float>(y2)));
}
-static inline float getAngle(int x1, int y1, int x2, int y2) {
+static AK_FORCE_INLINE float getAngle(const int x1, const int y1, const int x2, const int y2) {
const int dx = x1 - x2;
const int dy = y1 - y2;
if (dx == 0 && dy == 0) return 0;
return atan2f(static_cast<float>(dy), static_cast<float>(dx));
}
-static inline float getAngleDiff(float a1, float a2) {
+static AK_FORCE_INLINE float getAngleDiff(const float a1, const float a2) {
const float deltaA = fabsf(a1 - a2);
const float diff = ROUND_FLOAT_10000(deltaA);
if (diff > M_PI_F) {
@@ -60,8 +68,8 @@ static inline float getAngleDiff(float a1, float a2) {
return diff;
}
-static inline float pointToLineSegSquaredDistanceFloat(
- float x, float y, float x1, float y1, float x2, float y2, bool extend) {
+static inline float pointToLineSegSquaredDistanceFloat(const float x, const float y, const float x1,
+ const float y1, const float x2, const float y2, const bool extend) {
const float ray1x = x - x1;
const float ray1y = y - y1;
const float ray2x = x2 - x1;
@@ -85,5 +93,25 @@ static inline float pointToLineSegSquaredDistanceFloat(
}
return getSquaredDistanceFloat(x, y, projectionX, projectionY);
}
+
+// Normal distribution N(u, sigma^2).
+struct NormalDistribution {
+ NormalDistribution(const float u, const float sigma)
+ : mU(u), mSigma(sigma),
+ mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F * SQUARE_FLOAT(sigma))),
+ mPreComputedExponentPart(-1.0f / (2.0f * SQUARE_FLOAT(sigma))) {}
+
+ float getProbabilityDensity(const float x) const {
+ const float shiftedX = x - mU;
+ return mPreComputedNonExpPart * expf(mPreComputedExponentPart * SQUARE_FLOAT(shiftedX));
+ }
+
+private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution);
+ float mU; // mean value
+ float mSigma; // standard deviation
+ float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2)
+ float mPreComputedExponentPart; // = -1 / (2 * sigma^2)
+}; // struct NormalDistribution
} // namespace latinime
#endif // LATINIME_GEOMETRY_UTILS_H
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.cpp b/native/jni/src/gesture/gesture_decoder_wrapper.cpp
index afbe0c5c3..20ad4a58c 100644
--- a/native/jni/src/gesture/gesture_decoder_wrapper.cpp
+++ b/native/jni/src/gesture/gesture_decoder_wrapper.cpp
@@ -19,4 +19,8 @@
namespace latinime {
IncrementalDecoderInterface *
(*GestureDecoderWrapper::sGestureDecoderFactoryMethod)(int, int) = 0;
+
+ GestureDecoderWrapper::~GestureDecoderWrapper() {
+ delete mIncrementalDecoderInterface;
+ }
} // namespace latinime
diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h
index 92e1ded49..5b056b647 100644
--- a/native/jni/src/gesture/gesture_decoder_wrapper.h
+++ b/native/jni/src/gesture/gesture_decoder_wrapper.h
@@ -33,20 +33,17 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface {
: mIncrementalDecoderInterface(getGestureDecoderInstance(maxWordLength, maxWords)) {
}
- virtual ~GestureDecoderWrapper() {
- delete mIncrementalDecoderInterface;
- }
+ virtual ~GestureDecoderWrapper();
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
- int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
- unsigned short *outWords, int *frequencies, int *outputIndices,
- int *outputTypes) const {
+ int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
+ int *frequencies, int *outputIndices, int *outputTypes) const {
if (!mIncrementalDecoderInterface) {
return 0;
}
- return mIncrementalDecoderInterface->getSuggestions(
- pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
- inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
+ return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
+ inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
+ outputIndices, outputTypes);
}
static void setGestureDecoderFactoryMethod(
diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h
index d1395aab9..e41513dbc 100644
--- a/native/jni/src/gesture/incremental_decoder_interface.h
+++ b/native/jni/src/gesture/incremental_decoder_interface.h
@@ -28,10 +28,9 @@ class ProximityInfo;
class IncrementalDecoderInterface {
public:
- virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession,
- int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes,
- int inputSize, int commitPoint, unsigned short *outWords, int *frequencies,
- int *outputIndices, int *outputTypes) const = 0;
+ virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs,
+ int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
+ int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0;
IncrementalDecoderInterface() { };
virtual ~IncrementalDecoderInterface() { };
private:
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.cpp b/native/jni/src/gesture/incremental_decoder_wrapper.cpp
index 8fcda6c9e..f6e45623a 100644
--- a/native/jni/src/gesture/incremental_decoder_wrapper.cpp
+++ b/native/jni/src/gesture/incremental_decoder_wrapper.cpp
@@ -19,4 +19,8 @@
namespace latinime {
IncrementalDecoderInterface *
(*IncrementalDecoderWrapper::sIncrementalDecoderFactoryMethod)(int, int) = 0;
+
+ IncrementalDecoderWrapper::~IncrementalDecoderWrapper() {
+ delete mIncrementalDecoderInterface;
+ }
} // namespace latinime
diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h
index da7afdb8a..7d16560ef 100644
--- a/native/jni/src/gesture/incremental_decoder_wrapper.h
+++ b/native/jni/src/gesture/incremental_decoder_wrapper.h
@@ -33,20 +33,17 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface {
: mIncrementalDecoderInterface(getIncrementalDecoderInstance(maxWordLength, maxWords)) {
}
- virtual ~IncrementalDecoderWrapper() {
- delete mIncrementalDecoderInterface;
- }
+ virtual ~IncrementalDecoderWrapper();
int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs,
- int *times, int *pointerIds, int *codes, int inputSize, int commitPoint,
- unsigned short *outWords, int *frequencies, int *outputIndices,
- int *outputTypes) const {
+ int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords,
+ int *frequencies, int *outputIndices, int *outputTypes) const {
if (!mIncrementalDecoderInterface) {
return 0;
}
- return mIncrementalDecoderInterface->getSuggestions(
- pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes,
- inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes);
+ return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs,
+ inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies,
+ outputIndices, outputTypes);
}
static void setIncrementalDecoderFactoryMethod(
diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp
index fde93b5a9..84db7c196 100644
--- a/native/jni/src/proximity_info.cpp
+++ b/native/jni/src/proximity_info.cpp
@@ -31,21 +31,21 @@ namespace latinime {
/* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f;
-static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len,
- jint *buffer) {
+static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray,
+ jsize len, jint *buffer) {
if (jArray && buffer) {
env->GetIntArrayRegion(jArray, 0, len, buffer);
} else if (buffer) {
- memset(buffer, 0, len * sizeof(jint));
+ memset(buffer, 0, len * sizeof(buffer[0]));
}
}
-static inline void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray, jsize len,
- jfloat *buffer) {
+static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray,
+ jsize len, jfloat *buffer) {
if (jArray && buffer) {
env->GetFloatArrayRegion(jArray, 0, len, buffer);
} else if (buffer) {
- memset(buffer, 0, len * sizeof(jfloat));
+ memset(buffer, 0, len * sizeof(buffer[0]));
}
}
@@ -127,13 +127,6 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const {
return false;
}
-static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float x2, float y2,
- float scale) {
- const float deltaX = x1 - x2;
- const float deltaY = y1 - y2;
- return (SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY)) / SQUARE_FLOAT(scale);
-}
-
float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloatG(
const int keyId, const int x, const int y) const {
const static float verticalSweetSpotScaleForGeometric = 1.1f;
@@ -239,6 +232,9 @@ int ProximityInfo::getKeyIndexOf(const int c) const {
// We do not have the coordinate data
return NOT_AN_INDEX;
}
+ if (c == NOT_A_CODE_POINT) {
+ return NOT_AN_INDEX;
+ }
const int lowerCode = static_cast<int>(toLowerCase(c));
hash_map_compat<int, int>::const_iterator mapPos = mCodeToKeyMap.find(lowerCode);
if (mapPos != mCodeToKeyMap.end()) {
@@ -296,9 +292,7 @@ int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const {
return 0;
}
-int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const {
- const int keyId0 = getKeyIndexOf(key0);
- const int keyId1 = getKeyIndexOf(key1);
+int ProximityInfo::getKeyKeyDistanceG(const int keyId0, const int keyId1) const {
if (keyId0 >= 0 && keyId1 >= 0) {
return mKeyKeyDistancesG[keyId0][keyId1];
}
diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h
index 70942aa19..7ee15d578 100644
--- a/native/jni/src/proximity_info.h
+++ b/native/jni/src/proximity_info.h
@@ -109,7 +109,7 @@ class ProximityInfo {
int getKeyCenterYOfCodePointG(int charCode) const;
int getKeyCenterXOfKeyIdG(int keyId) const;
int getKeyCenterYOfKeyIdG(int keyId) const;
- int getKeyKeyDistanceG(int key0, int key1) const;
+ int getKeyKeyDistanceG(int keyId0, int keyId1) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo);
diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp
index 392ec8194..db79bb616 100644
--- a/native/jni/src/proximity_info_state.cpp
+++ b/native/jni/src/proximity_info_state.cpp
@@ -15,6 +15,7 @@
*/
#include <cstring> // for memset()
+#include <sstream> // for debug prints
#include <stdint.h>
#define LOG_TAG "LatinIME: proximity_info_state.cpp"
@@ -33,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f;
const int ProximityInfoState::NOT_A_CODE = -1;
void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength,
- const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize,
+ const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize,
const int *const xCoordinates, const int *const yCoordinates, const int *const times,
const int *const pointerIds, const bool isGeometric) {
@@ -62,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
// - mNormalizedSquaredDistances
// TODO: Merge
for (int i = 0; i < inputSize; ++i) {
- const int32_t primaryKey = inputCodes[i];
+ const int primaryKey = inputCodes[i];
const int x = xCoordinates[i];
const int y = yCoordinates[i];
int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL];
@@ -104,7 +105,10 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
mLengthCache.clear();
mDistanceCache.clear();
mNearKeysVector.clear();
+ mSearchKeysVector.clear();
mRelativeSpeeds.clear();
+ mCharProbabilities.clear();
+ mDirections.clear();
}
if (DEBUG_GEO_FULL) {
AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d",
@@ -130,6 +134,10 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0];
NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1];
NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2];
+ // "sumAngle" is accumulated by each angle of input points. And when "sumAngle" exceeds
+ // the threshold we save that point, reset sumAngle. This aims to keep the figure of
+ // the curve.
+ float sumAngle = 0.0f;
for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) {
// Assuming pointerId == 0 if pointerIds is null.
@@ -138,13 +146,22 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid);
}
if (pointerId == pid) {
- const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i);
+ const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i);
const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i];
const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i];
const int time = times ? times[i] : -1;
+
+ if (i > 1) {
+ const float prevAngle = getAngle(xCoordinates[i - 2], yCoordinates[i - 2],
+ xCoordinates[i - 1], yCoordinates[i - 1]);
+ const float currentAngle =
+ getAngle(xCoordinates[i - 1], yCoordinates[i - 1], x, y);
+ sumAngle += getAngleDiff(prevAngle, currentAngle);
+ }
+
if (pushTouchPoint(i, c, x, y, time, isGeometric /* do sampling */,
- i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances,
- prevPrevNearKeysDistances)) {
+ i == lastInputIndex, sumAngle, currentNearKeysDistances,
+ prevNearKeysDistances, prevPrevNearKeysDistances)) {
// Previous point information was popped.
NearKeysDistanceMap *tmp = prevNearKeysDistances;
prevNearKeysDistances = currentNearKeysDistances;
@@ -154,6 +171,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
prevPrevNearKeysDistances = prevNearKeysDistances;
prevNearKeysDistances = currentNearKeysDistances;
currentNearKeysDistances = tmp;
+ sumAngle = 0.0f;
}
}
}
@@ -161,43 +179,68 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
}
if (mInputSize > 0 && isGeometric) {
- int sumDuration = mTimes.back() - mTimes.front();
- int sumLength = mLengthCache.back() - mLengthCache.front();
- float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration);
+ // Relative speed calculation.
+ const int sumDuration = mTimes.back() - mTimes.front();
+ const int sumLength = mLengthCache.back() - mLengthCache.front();
+ const float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration);
mRelativeSpeeds.resize(mInputSize);
for (int i = lastSavedInputSize; i < mInputSize; ++i) {
const int index = mInputIndice[i];
int length = 0;
int duration = 0;
- if (index == 0 && index < inputSize - 1) {
- length = getDistanceInt(xCoordinates[index], yCoordinates[index],
- xCoordinates[index + 1], yCoordinates[index + 1]);
- duration = times[index + 1] - times[index];
- } else if (index == inputSize - 1 && index > 0) {
- length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1],
- xCoordinates[index], yCoordinates[index]);
- duration = times[index] - times[index - 1];
- } else if (0 < index && index < inputSize - 1) {
- length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1],
- xCoordinates[index], yCoordinates[index])
- + getDistanceInt(xCoordinates[index], yCoordinates[index],
- xCoordinates[index + 1], yCoordinates[index + 1]);
- duration = times[index + 1] - times[index - 1];
+
+ // Calculate velocity by using distances and durations of
+ // NUM_POINTS_FOR_SPEED_CALCULATION points for both forward and backward.
+ static const int NUM_POINTS_FOR_SPEED_CALCULATION = 2;
+ for (int j = index; j < min(inputSize - 1, index + NUM_POINTS_FOR_SPEED_CALCULATION);
+ ++j) {
+ if (i < mInputSize - 1 && j >= mInputIndice[i + 1]) {
+ break;
+ }
+ length += getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ for (int j = index - 1; j >= max(0, index - NUM_POINTS_FOR_SPEED_CALCULATION); --j) {
+ if (i > 0 && j < mInputIndice[i - 1]) {
+ break;
+ }
+ length += getDistanceInt(xCoordinates[j], yCoordinates[j],
+ xCoordinates[j + 1], yCoordinates[j + 1]);
+ duration += times[j + 1] - times[j];
+ }
+ if (duration == 0 || sumDuration == 0) {
+ // Cannot calculate speed; thus, it gives an average value (1.0);
+ mRelativeSpeeds[i] = 1.0f;
} else {
- length = 0;
- duration = 1;
+ const float speed = static_cast<float>(length) / static_cast<float>(duration);
+ mRelativeSpeeds[i] = speed / averageSpeed;
}
- const float speed = static_cast<float>(length) / static_cast<float>(duration);
- mRelativeSpeeds[i] = speed / averageSpeed;
+ }
+
+ // Direction calculation.
+ mDirections.resize(mInputSize - 1);
+ for (int i = max(0, lastSavedInputSize - 1); i < mInputSize - 1; ++i) {
+ mDirections[i] = getDirection(i, i + 1);
+ }
+
+ }
+
+ if (DEBUG_GEO_FULL) {
+ for (int i = 0; i < mInputSize; ++i) {
+ AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i],
+ mTimes[i]);
}
}
if (mInputSize > 0) {
const int keyCount = mProximityInfo->getKeyCount();
mNearKeysVector.resize(mInputSize);
+ mSearchKeysVector.resize(mInputSize);
mDistanceCache.resize(mInputSize * keyCount);
for (int i = lastSavedInputSize; i < mInputSize; ++i) {
mNearKeysVector[i].reset();
+ mSearchKeysVector[i].reset();
static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f;
for (int k = 0; k < keyCount; ++k) {
const int index = i * keyCount + k;
@@ -207,29 +250,53 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
mProximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y);
mDistanceCache[index] = normalizedSquaredDistance;
if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) {
- mNearKeysVector[i].set(k, 1);
+ mNearKeysVector[i][k] = true;
}
}
}
+ if (isGeometric) {
+ // updates probabilities of skipping or mapping each key for all points.
+ updateAlignPointProbabilities(lastSavedInputSize);
- static const float READ_FORWORD_LENGTH_SCALE = 0.95f;
- const int readForwordLength = static_cast<int>(
- hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight())
- * READ_FORWORD_LENGTH_SCALE);
- for (int i = 0; i < mInputSize; ++i) {
- if (DEBUG_GEO_FULL) {
- AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i],
- mTimes[i]);
- }
- for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) {
- if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) {
- break;
+ static const float READ_FORWORD_LENGTH_SCALE = 0.95f;
+ const int readForwordLength = static_cast<int>(
+ hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight())
+ * READ_FORWORD_LENGTH_SCALE);
+ for (int i = 0; i < mInputSize; ++i) {
+ if (i >= lastSavedInputSize) {
+ mSearchKeysVector[i].reset();
+ }
+ for (int j = max(i, lastSavedInputSize); j < mInputSize; ++j) {
+ if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) {
+ break;
+ }
+ mSearchKeysVector[i] |= mNearKeysVector[j];
}
- mNearKeysVector[i] |= mNearKeysVector[j];
}
}
}
+ if (DEBUG_SAMPLING_POINTS) {
+ std::stringstream originalX, originalY, sampledX, sampledY;
+ for (int i = 0; i < inputSize; ++i) {
+ originalX << xCoordinates[i];
+ originalY << yCoordinates[i];
+ if (i != inputSize - 1) {
+ originalX << ";";
+ originalY << ";";
+ }
+ }
+ for (int i = 0; i < mInputSize; ++i) {
+ sampledX << mInputXs[i];
+ sampledY << mInputYs[i];
+ if (i != mInputSize - 1) {
+ sampledX << ";";
+ sampledY << ";";
+ }
+ }
+ AKLOGI("\n%s, %s,\n%s, %s,\n", originalX.str().c_str(), originalY.str().c_str(),
+ sampledX.str().c_str(), sampledY.str().c_str());
+ }
// end
///////////////////////
@@ -239,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
&& xCoordinates && yCoordinates;
if (!isGeometric && pointerId == 0) {
for (int i = 0; i < inputSize; ++i) {
- mPrimaryInputWord[i] = getPrimaryCharAt(i);
+ mPrimaryInputWord[i] = getPrimaryCodePointAt(i);
}
for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) {
- const int *proximityChars = getProximityCharsAt(i);
- const int primaryKey = proximityChars[0];
+ const int *proximityCodePoints = getProximityCodePointsAt(i);
+ const int primaryKey = proximityCodePoints[0];
const int x = xCoordinates[i];
const int y = yCoordinates[i];
if (DEBUG_PROXIMITY_CHARS) {
@@ -252,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
a += 0;
AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y);
}
- for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) {
- const int currentChar = proximityChars[j];
+ for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0;
+ ++j) {
+ const int currentCodePoint = proximityCodePoints[j];
const float squaredDistance =
hasInputCoordinates() ? calculateNormalizedSquaredDistance(
- mProximityInfo->getKeyIndexOf(currentChar), i) :
+ mProximityInfo->getKeyIndexOf(currentCodePoint), i) :
NOT_A_DISTANCE_FLOAT;
if (squaredDistance >= 0.0f) {
mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] =
@@ -267,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi
PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
}
if (DEBUG_PROXIMITY_CHARS) {
- AKLOGI("--- Proximity (%d) = %c", j, currentChar);
+ AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint);
}
}
}
@@ -294,7 +362,7 @@ bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSiz
// the given point and the nearest key position.
float ProximityInfoState::updateNearKeysDistances(const int x, const int y,
NearKeysDistanceMap *const currentNearKeysDistances) {
- static const float NEAR_KEY_THRESHOLD = 4.0f;
+ static const float NEAR_KEY_THRESHOLD = 2.0f;
currentNearKeysDistances->clear();
const int keyCount = mProximityInfo->getKeyCount();
@@ -332,64 +400,49 @@ bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const current
// Calculating a point score that indicates usefulness of the point.
float ProximityInfoState::getPointScore(
const int x, const int y, const int time, const bool lastPoint, const float nearest,
- const NearKeysDistanceMap *const currentNearKeysDistances,
+ const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
const NearKeysDistanceMap *const prevPrevNearKeysDistances) const {
static const int DISTANCE_BASE_SCALE = 100;
- static const int SAVE_DISTANCE_SCALE = 200;
- static const int SKIP_DISTANCE_SCALE = 25;
- static const int CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 40;
- static const int STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 50;
- static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 27;
- static const float SAVE_DISTANCE_SCORE = 2.0f;
- static const float SKIP_DISTANCE_SCORE = -1.0f;
- static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f;
- static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 36.0f;
- static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f;
- static const float STRAIGHT_SKIP_SCORE = -1.0f;
- static const float CORNER_ANGLE_THRESHOLD = M_PI_F / 2.0f;
+ static const float NEAR_KEY_THRESHOLD = 0.6f;
+ static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 25;
+ static const float NOT_LOCALMIN_DISTANCE_SCORE = -1.0f;
+ static const float LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE = 1.0f;
+ static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 2.0f / 3.0f;
+ static const float CORNER_SUM_ANGLE_THRESHOLD = M_PI_F / 4.0f;
static const float CORNER_SCORE = 1.0f;
- const std::size_t size = mInputXs.size();
- if (size <= 1) {
+ const size_t size = mInputXs.size();
+ // If there is only one point, add this point. Besides, if the previous point's distance map
+ // is empty, we re-compute nearby keys distances from the current point.
+ // Note that the current point is the first point in the incremental input that needs to
+ // be re-computed.
+ if (size <= 1 || prevNearKeysDistances->empty()) {
return 0.0f;
}
+
const int baseSampleRate = mProximityInfo->getMostCommonKeyWidth();
- const int distNext = getDistanceInt(x, y, mInputXs.back(), mInputYs.back())
- * DISTANCE_BASE_SCALE;
const int distPrev = getDistanceInt(mInputXs.back(), mInputYs.back(),
mInputXs[size - 2], mInputYs[size - 2]) * DISTANCE_BASE_SCALE;
float score = 0.0f;
- // Sum of distances
- if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) {
- score += SAVE_DISTANCE_SCORE;
- }
- // Distance
- if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) {
- score += SKIP_DISTANCE_SCORE;
- }
// Location
- if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) {
- if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
- prevPrevNearKeysDistances)) {
- score += CHECK_LOCALMIN_DISTANCE_SCORE;
- }
+ if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances,
+ prevPrevNearKeysDistances)) {
+ score += NOT_LOCALMIN_DISTANCE_SCORE;
+ } else if (nearest < NEAR_KEY_THRESHOLD) {
+ // Promote points nearby keys
+ score += LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE;
}
// Angle
const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back());
const float angle2 = getAngle(mInputXs.back(), mInputYs.back(),
mInputXs[size - 2], mInputYs[size - 2]);
const float angleDiff = getAngleDiff(angle1, angle2);
- // Skip straight
- if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD
- && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE
- && angleDiff < STRAIGHT_ANGLE_THRESHOLD) {
- score += STRAIGHT_SKIP_SCORE;
- }
+
// Save corner
if (distPrev > baseSampleRate * CORNER_CHECK_DISTANCE_THRESHOLD_SCALE
- && angleDiff > CORNER_ANGLE_THRESHOLD) {
+ && (sumAngle > CORNER_SUM_ANGLE_THRESHOLD || angleDiff > CORNER_ANGLE_THRESHOLD)) {
score += CORNER_SCORE;
}
return score;
@@ -397,18 +450,18 @@ float ProximityInfoState::getPointScore(
// Sampling touch point and pushing information to vectors.
// Returning if previous point is popped or not.
-bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y,
- const int time, const bool sample, const bool isLastPoint,
+bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y,
+ const int time, const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
const NearKeysDistanceMap *const prevPrevNearKeysDistances) {
- static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f;
+ static const int LAST_POINT_SKIP_DISTANCE_SCALE = 4;
size_t size = mInputXs.size();
bool popped = false;
- if (nodeChar < 0 && sample) {
+ if (nodeCodePoint < 0 && sample) {
const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances);
- const float score = getPointScore(x, y, time, isLastPoint, nearest,
+ const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle,
currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances);
if (score < 0) {
// Pop previous point because it would be useless.
@@ -419,42 +472,24 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar
popped = false;
}
// Check if the last point should be skipped.
- if (isLastPoint) {
- if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back())
- < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) {
+ if (isLastPoint && size > 0) {
+ if (getDistanceInt(x, y, mInputXs.back(), mInputYs.back())
+ * LAST_POINT_SKIP_DISTANCE_SCALE < mProximityInfo->getMostCommonKeyWidth()) {
+ // This point is not used because it's too close to the previous point.
if (DEBUG_GEO_FULL) {
- AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %f, "
- "width = %f", size, x, y, mInputXs.back(), mInputYs.back(),
- getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()),
+ AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %d, "
+ "width = %d", size, x, y, mInputXs.back(), mInputYs.back(),
+ getDistanceInt(x, y, mInputXs.back(), mInputYs.back()),
mProximityInfo->getMostCommonKeyWidth()
- * LAST_POINT_SKIP_DISTANCE_SCALE);
+ / LAST_POINT_SKIP_DISTANCE_SCALE);
}
return popped;
- } else if (size > 1) {
- int minChar = 0;
- float minDist = mMaxPointToKeyLength;
- for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin();
- it != currentNearKeysDistances->end(); ++it) {
- if (minDist > it->second) {
- minChar = it->first;
- minDist = it->second;
- }
- }
- NearKeysDistanceMap::const_iterator itPP =
- prevNearKeysDistances->find(minChar);
- if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) {
- if (DEBUG_GEO_FULL) {
- AKLOGI("p1: char = %c, minDist = %f, prevNear key minDist = %f",
- minChar, itPP->second, minDist);
- }
- return popped;
- }
}
}
}
- if (nodeChar >= 0 && (x < 0 || y < 0)) {
- const int keyId = mProximityInfo->getKeyIndexOf(nodeChar);
+ if (nodeCodePoint >= 0 && (x < 0 || y < 0)) {
+ const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint);
if (keyId >= 0) {
x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
@@ -503,22 +538,92 @@ int ProximityInfoState::getDuration(const int index) const {
return 0;
}
-float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint,
- const float scale) const {
+float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint) const {
const int keyId = mProximityInfo->getKeyIndexOf(codePoint);
if (keyId != NOT_AN_INDEX) {
const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
- return min(mDistanceCache[index] * scale, mMaxPointToKeyLength);
+ return min(mDistanceCache[index], mMaxPointToKeyLength);
}
- if (isSkippableChar(codePoint)) {
+ if (isSkippableCodePoint(codePoint)) {
return 0.0f;
}
// If the char is not a key on the keyboard then return the max length.
return MAX_POINT_TO_KEY_LENGTH;
}
+float ProximityInfoState::getPointToKeyByIdLength(const int inputIndex, const int keyId) const {
+ if (keyId != NOT_AN_INDEX) {
+ const int index = inputIndex * mProximityInfo->getKeyCount() + keyId;
+ return min(mDistanceCache[index], mMaxPointToKeyLength);
+ }
+ // If the char is not a key on the keyboard then return the max length.
+ return static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+}
+
+// In the following function, c is the current character of the dictionary word currently examined.
+// currentChars is an array containing the keys close to the character the user actually typed at
+// the same position. We want to see if c is in it: if so, then the word contains at that position
+// a character close to what the user typed.
+// What the user typed is actually the first character of the array.
+// proximityIndex is a pointer to the variable where getMatchedProximityId returns the index of c
+// in the proximity chars of the input index.
+// Notice : accented characters do not have a proximity list, so they are alone in their list. The
+// non-accented version of the character should be considered "close", but not the other keys close
+// to the non-accented version.
+ProximityType ProximityInfoState::getMatchedProximityId(const int index, const int c,
+ const bool checkProximityChars, int *proximityIndex) const {
+ const int *currentCodePoints = getProximityCodePointsAt(index);
+ const int firstCodePoint = currentCodePoints[0];
+ const int baseLowerC = toBaseLowerCase(c);
+
+ // The first char in the array is what user typed. If it matches right away, that means the
+ // user typed that same char for this pos.
+ if (firstCodePoint == baseLowerC || firstCodePoint == c) {
+ return EQUIVALENT_CHAR;
+ }
+
+ if (!checkProximityChars) return UNRELATED_CHAR;
+
+ // If the non-accented, lowercased version of that first character matches c, then we have a
+ // non-accented version of the accented character the user typed. Treat it as a close char.
+ if (toBaseLowerCase(firstCodePoint) == baseLowerC) {
+ return NEAR_PROXIMITY_CHAR;
+ }
+
+ // Not an exact nor an accent-alike match: search the list of close keys
+ int j = 1;
+ while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return NEAR_PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ ++j;
+ while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
+ && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
+ const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c);
+ if (matched) {
+ if (proximityIndex) {
+ *proximityIndex = j;
+ }
+ return ADDITIONAL_PROXIMITY_CHAR;
+ }
+ ++j;
+ }
+ }
+ // Was not included, signal this as an unrelated character.
+ return UNRELATED_CHAR;
+}
+
int ProximityInfoState::getSpaceY() const {
- const int keyId = mProximityInfo->getKeyIndexOf(' ');
+ const int keyId = mProximityInfo->getKeyIndexOf(KEYCODE_SPACE);
return mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
}
@@ -538,8 +643,9 @@ int32_t ProximityInfoState::getAllPossibleChars(
return filterSize;
}
int newFilterSize = filterSize;
- for (int j = 0; j < mProximityInfo->getKeyCount(); ++j) {
- if (mNearKeysVector[index].test(j)) {
+ const int keyCount = mProximityInfo->getKeyCount();
+ for (int j = 0; j < keyCount; ++j) {
+ if (mSearchKeysVector[index].test(j)) {
const int32_t keyCodePoint = mProximityInfo->getCodePointOf(j);
bool insert = true;
// TODO: Avoid linear search
@@ -557,6 +663,12 @@ int32_t ProximityInfoState::getAllPossibleChars(
return newFilterSize;
}
+bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const {
+ ASSERT(keyId >= 0);
+ ASSERT(index >= 0 && index < mInputSize);
+ return mSearchKeysVector[index].test(keyId);
+}
+
void ProximityInfoState::popInputData() {
mInputXs.pop_back();
mInputYs.pop_back();
@@ -565,4 +677,389 @@ void ProximityInfoState::popInputData() {
mInputIndice.pop_back();
}
+float ProximityInfoState::getDirection(const int index0, const int index1) const {
+ if (index0 < 0 || index0 > mInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > mInputSize - 1) {
+ return 0.0f;
+ }
+ const int x1 = mInputXs[index0];
+ const int y1 = mInputYs[index0];
+ const int x2 = mInputXs[index1];
+ const int y2 = mInputYs[index1];
+ return getAngle(x1, y1, x2, y2);
+}
+
+float ProximityInfoState::getPointAngle(const int index) const {
+ if (index <= 0 || index >= mInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(index - 1, index);
+ const float nextDirection = getDirection(index, index + 1);
+ const float directionDiff = getAngleDiff(previousDirection, nextDirection);
+ return directionDiff;
+}
+
+float ProximityInfoState::getPointsAngle(
+ const int index0, const int index1, const int index2) const {
+ if (index0 < 0 || index0 > mInputSize - 1) {
+ return 0.0f;
+ }
+ if (index1 < 0 || index1 > mInputSize - 1) {
+ return 0.0f;
+ }
+ if (index2 < 0 || index2 > mInputSize - 1) {
+ return 0.0f;
+ }
+ const float previousDirection = getDirection(index0, index1);
+ const float nextDirection = getDirection(index1, index2);
+ return getAngleDiff(previousDirection, nextDirection);
+}
+
+float ProximityInfoState::getLineToKeyDistance(
+ const int from, const int to, const int keyId, const bool extend) const {
+ if (from < 0 || from > mInputSize - 1) {
+ return 0.0f;
+ }
+ if (to < 0 || to > mInputSize - 1) {
+ return 0.0f;
+ }
+ const int x0 = mInputXs[from];
+ const int y0 = mInputYs[from];
+ const int x1 = mInputXs[to];
+ const int y1 = mInputYs[to];
+
+ const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId);
+ const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId);
+
+ return pointToLineSegSquaredDistanceFloat(keyX, keyY, x0, y0, x1, y1, extend);
+}
+
+// Updates probabilities of aligning to some keys and skipping.
+// Word suggestion should be based on this probabilities.
+void ProximityInfoState::updateAlignPointProbabilities(const int start) {
+ static const float MIN_PROBABILITY = 0.000001f;
+ static const float MAX_SKIP_PROBABILITY = 0.95f;
+ static const float SKIP_FIRST_POINT_PROBABILITY = 0.01f;
+ static const float SKIP_LAST_POINT_PROBABILITY = 0.1f;
+ static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f;
+ static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f;
+ static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f;
+ static const float NEAREST_DISTANCE_WEIGHT = 0.5f;
+ static const float NEAREST_DISTANCE_BIAS = 0.5f;
+ static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f;
+ static const float NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f;
+
+ static const float ANGLE_WEIGHT = 0.90f;
+ static const float DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f;
+ static const float SKIP_DEEP_CORNER_PROBABILITY = 0.1f;
+ static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f;
+ static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f;
+ static const float SKIP_CORNER_PROBABILITY = 0.4f;
+ static const float SPEED_MARGIN = 0.1f;
+ static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f;
+
+ const int keyCount = mProximityInfo->getKeyCount();
+ mCharProbabilities.resize(mInputSize);
+ // Calculates probabilities of using a point as a correlated point with the character
+ // for each point.
+ for (int i = start; i < mInputSize; ++i) {
+ mCharProbabilities[i].clear();
+ // First, calculates skip probability. Starts form MIN_SKIP_PROBABILITY.
+ // Note that all values that are multiplied to this probability should be in [0.0, 1.0];
+ float skipProbability = MAX_SKIP_PROBABILITY;
+
+ const float currentAngle = getPointAngle(i);
+ const float relativeSpeed = getRelativeSpeed(i);
+
+ float nearestKeyDistance = static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+ for (int j = 0; j < keyCount; ++j) {
+ if (mNearKeysVector[i].test(j)) {
+ const float distance = getPointToKeyByIdLength(i, j);
+ if (distance < nearestKeyDistance) {
+ nearestKeyDistance = distance;
+ }
+ }
+ }
+
+ if (i == 0) {
+ skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT
+ + NEAREST_DISTANCE_BIAS);
+ // Promote the first point
+ skipProbability *= SKIP_FIRST_POINT_PROBABILITY;
+ } else if (i == mInputSize - 1) {
+ skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT_FOR_LAST
+ + NEAREST_DISTANCE_BIAS_FOR_LAST);
+ // Promote the last point
+ skipProbability *= SKIP_LAST_POINT_PROBABILITY;
+ } else {
+ // If the current speed is relatively slower than adjacent keys, we promote this point.
+ if (getRelativeSpeed(i - 1) - SPEED_MARGIN > relativeSpeed
+ && relativeSpeed < getRelativeSpeed(i + 1) - SPEED_MARGIN) {
+ if (currentAngle < CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= min(1.0f, relativeSpeed
+ * SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY);
+ } else {
+ // If the angle is small enough, we promote this point more. (e.g. pit vs put)
+ skipProbability *= min(1.0f, relativeSpeed * SPEED_WEIGHT_FOR_SKIP_PROBABILITY
+ + MIN_SPEED_RATE_FOR_SKIP_PROBABILITY);
+ }
+ }
+
+ skipProbability *= min(1.0f, relativeSpeed * nearestKeyDistance *
+ NEAREST_DISTANCE_WEIGHT + NEAREST_DISTANCE_BIAS);
+
+ // Adjusts skip probability by a rate depending on angle.
+ // ANGLE_RATE of skipProbability is adjusted by current angle.
+ skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ANGLE_WEIGHT
+ + (1.0f - ANGLE_WEIGHT);
+ if (currentAngle > DEEP_CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= SKIP_DEEP_CORNER_PROBABILITY;
+ }
+ // We assume the angle of this point is the angle for point[i], point[i - 2]
+ // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1]
+ // and point[i - 2] is this angle can be more affected by the noise.
+ const float prevAngle = getPointsAngle(i, i - 2, i - 3);
+ if (i >= 3 && prevAngle < STRAIGHT_ANGLE_THRESHOLD
+ && currentAngle > CORNER_ANGLE_THRESHOLD) {
+ skipProbability *= SKIP_CORNER_PROBABILITY;
+ }
+ }
+
+ // probabilities must be in [0.0, MAX_SKIP_PROBABILITY];
+ ASSERT(skipProbability >= 0.0f);
+ ASSERT(skipProbability <= MAX_SKIP_PROBABILITY);
+ mCharProbabilities[i][NOT_AN_INDEX] = skipProbability;
+
+ // Second, calculates key probabilities by dividing the rest probability
+ // (1.0f - skipProbability).
+ const float inputCharProbability = 1.0f - skipProbability;
+
+ // TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine
+ // learning or something would be efficient.
+ static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f;
+ static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f;
+ static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f;
+ static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f;
+ static const float MIN_STANDERD_DIVIATION = 0.37f;
+
+ const float speedxAngleRate = min(relativeSpeed * currentAngle / M_PI_F
+ * SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION,
+ MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION);
+ const float speedxNearestKeyDistanceRate = min(relativeSpeed * nearestKeyDistance
+ * SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION,
+ MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION);
+ const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate + MIN_STANDERD_DIVIATION;
+
+ NormalDistribution distribution(CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma);
+ static const float PREV_DISTANCE_WEIGHT = 0.5f;
+ static const float NEXT_DISTANCE_WEIGHT = 0.6f;
+ // Summing up probability densities of all near keys.
+ float sumOfProbabilityDensities = 0.0f;
+ for (int j = 0; j < keyCount; ++j) {
+ if (mNearKeysVector[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(i, j));
+ if (i == 0 && i != mInputSize - 1) {
+ // For the first point, weighted average of distances from first point and the
+ // next point to the key is used as a point to key distance.
+ const float nextDistance = sqrtf(getPointToKeyByIdLength(i + 1, j));
+ if (nextDistance < distance) {
+ // The distance of the first point tends to bigger than continuing
+ // points because the first touch by the user can be sloppy.
+ // So we promote the first point if the distance of that point is larger
+ // than the distance of the next point.
+ distance = (distance + nextDistance * NEXT_DISTANCE_WEIGHT)
+ / (1.0f + NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == mInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float previousDistance = sqrtf(getPointToKeyByIdLength(i - 1, j));
+ if (previousDistance < distance) {
+ // The distance of the last point tends to bigger than continuing points
+ // because the last touch by the user can be sloppy. So we promote the
+ // last point if the distance of that point is larger than the distance of
+ // the previous point.
+ distance = (distance + previousDistance * PREV_DISTANCE_WEIGHT)
+ / (1.0f + PREV_DISTANCE_WEIGHT);
+ }
+ }
+ // TODO: Promote the first point when the extended line from the next input is near
+ // from a key. Also, promote the last point as well.
+ sumOfProbabilityDensities += distribution.getProbabilityDensity(distance);
+ }
+ }
+
+ // Split the probability of an input point to keys that are close to the input point.
+ for (int j = 0; j < keyCount; ++j) {
+ if (mNearKeysVector[i].test(j)) {
+ float distance = sqrtf(getPointToKeyByIdLength(i, j));
+ if (i == 0 && i != mInputSize - 1) {
+ // For the first point, weighted average of distances from the first point and
+ // the next point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(i + 1, j));
+ if (prevDistance < distance) {
+ distance = (distance + prevDistance * NEXT_DISTANCE_WEIGHT)
+ / (1.0f + NEXT_DISTANCE_WEIGHT);
+ }
+ } else if (i != 0 && i == mInputSize - 1) {
+ // For the first point, weighted average of distances from last point and
+ // the previous point to the key is used as a point to key distance.
+ const float prevDistance = sqrtf(getPointToKeyByIdLength(i - 1, j));
+ if (prevDistance < distance) {
+ distance = (distance + prevDistance * PREV_DISTANCE_WEIGHT)
+ / (1.0f + PREV_DISTANCE_WEIGHT);
+ }
+ }
+ const float probabilityDensity = distribution.getProbabilityDensity(distance);
+ const float probability = inputCharProbability * probabilityDensity
+ / sumOfProbabilityDensities;
+ mCharProbabilities[i][j] = probability;
+ }
+ }
+ }
+
+
+ if (DEBUG_POINTS_PROBABILITY) {
+ for (int i = 0; i < mInputSize; ++i) {
+ std::stringstream sstream;
+ sstream << i << ", ";
+ sstream << "("<< mInputXs[i] << ", ";
+ sstream << ", "<< mInputYs[i] << "), ";
+ sstream << "Speed: "<< getRelativeSpeed(i) << ", ";
+ sstream << "Angle: "<< getPointAngle(i) << ", \n";
+
+ for (hash_map_compat<int, float>::iterator it = mCharProbabilities[i].begin();
+ it != mCharProbabilities[i].end(); ++it) {
+ if (it->first == NOT_AN_INDEX) {
+ sstream << it->first
+ << "(skip):"
+ << it->second
+ << "\n";
+ } else {
+ sstream << it->first
+ << "("
+ << static_cast<char>(mProximityInfo->getCodePointOf(it->first))
+ << "):"
+ << it->second
+ << "\n";
+ }
+ }
+ AKLOGI("%s", sstream.str().c_str());
+ }
+ }
+
+ // Decrease key probabilities of points which don't have the highest probability of that key
+ // among nearby points. Probabilities of the first point and the last point are not suppressed.
+ for (int i = max(start, 1); i < mInputSize; ++i) {
+ for (int j = i + 1; j < mInputSize; ++j) {
+ if (!suppressCharProbabilities(i, j)) {
+ break;
+ }
+ }
+ for (int j = i - 1; j >= max(start, 0); --j) {
+ if (!suppressCharProbabilities(i, j)) {
+ break;
+ }
+ }
+ }
+
+ // Converting from raw probabilities to log probabilities to calculate spatial distance.
+ for (int i = start; i < mInputSize; ++i) {
+ for (int j = 0; j < keyCount; ++j) {
+ hash_map_compat<int, float>::iterator it = mCharProbabilities[i].find(j);
+ if (it == mCharProbabilities[i].end()){
+ mNearKeysVector[i].reset(j);
+ } else if(it->second < MIN_PROBABILITY) {
+ // Erases from near keys vector because it has very low probability.
+ mNearKeysVector[i].reset(j);
+ mCharProbabilities[i].erase(j);
+ } else {
+ it->second = -logf(it->second);
+ }
+ }
+ mCharProbabilities[i][NOT_AN_INDEX] = -logf(mCharProbabilities[i][NOT_AN_INDEX]);
+ }
+}
+
+// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and
+// increases char probabilities of index1 by checking probabilities of index0.
+bool ProximityInfoState::suppressCharProbabilities(const int index0, const int index1) {
+ ASSERT(0 <= index0 && index0 < mInputSize);
+ ASSERT(0 <= index1 && index1 < mInputSize);
+
+ static const float SUPPRESSION_LENGTH_WEIGHT = 1.5f;
+ static const float MIN_SUPPRESSION_RATE = 0.1f;
+ static const float SUPPRESSION_WEIGHT = 0.5f;
+ static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f;
+ static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f;
+
+ const float keyWidthFloat = static_cast<float>(mProximityInfo->getMostCommonKeyWidth());
+ const float diff = fabsf(static_cast<float>(mLengthCache[index0] - mLengthCache[index1]));
+ if (diff > keyWidthFloat * SUPPRESSION_LENGTH_WEIGHT) {
+ return false;
+ }
+ const float suppressionRate = MIN_SUPPRESSION_RATE
+ + diff / keyWidthFloat / SUPPRESSION_LENGTH_WEIGHT * SUPPRESSION_WEIGHT;
+ for (hash_map_compat<int, float>::iterator it = mCharProbabilities[index0].begin();
+ it != mCharProbabilities[index0].end(); ++it) {
+ hash_map_compat<int, float>::iterator it2 = mCharProbabilities[index1].find(it->first);
+ if (it2 != mCharProbabilities[index1].end() && it->second < it2->second) {
+ const float newProbability = it->second * suppressionRate;
+ const float suppression = it->second - newProbability;
+ it->second = newProbability;
+ // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point.
+ mCharProbabilities[index0][NOT_AN_INDEX] += suppression;
+
+ // Add the probability of the same key nearby index1
+ const float probabilityGain = min(suppression * SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN,
+ mCharProbabilities[index1][NOT_AN_INDEX]
+ * SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN);
+ it2->second += probabilityGain;
+ mCharProbabilities[index1][NOT_AN_INDEX] -= probabilityGain;
+ }
+ }
+ return true;
+}
+
+// Get a word that is detected by tracing highest probability sequence into codePointBuf and
+// returns probability of generating the word.
+float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const {
+ static const float DEMOTION_LOG_PROBABILITY = 0.3f;
+ int index = 0;
+ float sumLogProbability = 0.0f;
+ // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases.
+ for (int i = 0; i < mInputSize && index < MAX_WORD_LENGTH_INTERNAL - 1; ++i) {
+ float minLogProbability = static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+ int character = NOT_AN_INDEX;
+ for (hash_map_compat<int, float>::const_iterator it = mCharProbabilities[i].begin();
+ it != mCharProbabilities[i].end(); ++it) {
+ const float logProbability = (it->first != NOT_AN_INDEX)
+ ? it->second + DEMOTION_LOG_PROBABILITY : it->second;
+ if (logProbability < minLogProbability) {
+ minLogProbability = logProbability;
+ character = it->first;
+ }
+ }
+ if (character != NOT_AN_INDEX) {
+ codePointBuf[index] = mProximityInfo->getCodePointOf(character);
+ index++;
+ }
+ sumLogProbability += minLogProbability;
+ }
+ codePointBuf[index] = '\0';
+ return sumLogProbability;
+}
+
+// Returns a probability of mapping index to keyIndex.
+float ProximityInfoState::getProbability(const int index, const int keyIndex) const {
+ ASSERT(0 <= index && index < mInputSize);
+ hash_map_compat<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex);
+ if (it != mCharProbabilities[index].end()) {
+ return it->second;
+ }
+ return static_cast<float>(MAX_POINT_TO_KEY_LENGTH);
+}
+
} // namespace latinime
diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h
index c1ec76c38..39a238889 100644
--- a/native/jni/src/proximity_info_state.h
+++ b/native/jni/src/proximity_info_state.h
@@ -43,36 +43,38 @@ class ProximityInfoState {
// Defined in proximity_info_state.cpp //
/////////////////////////////////////////
void initInputParams(const int pointerId, const float maxPointToKeyLength,
- const ProximityInfo *proximityInfo, const int32_t *const inputCodes,
+ const ProximityInfo *proximityInfo, const int *const inputCodes,
const int inputSize, const int *xCoordinates, const int *yCoordinates,
const int *const times, const int *const pointerIds, const bool isGeometric);
/////////////////////////////////////////
// Defined here //
/////////////////////////////////////////
- ProximityInfoState()
+ AK_FORCE_INLINE ProximityInfoState()
: mProximityInfo(0), mMaxPointToKeyLength(0),
mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(),
mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0),
mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(),
- mDistanceCache(), mLengthCache(), mRelativeSpeeds(), mNearKeysVector(),
+ mDistanceCache(), mLengthCache(), mRelativeSpeeds(), mDirections(),
+ mCharProbabilities(), mNearKeysVector(), mSearchKeysVector(),
mTouchPositionCorrectionEnabled(false), mInputSize(0) {
memset(mInputCodes, 0, sizeof(mInputCodes));
memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances));
memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord));
}
- virtual ~ProximityInfoState() {}
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~ProximityInfoState() {}
- inline unsigned short getPrimaryCharAt(const int index) const {
- return getProximityCharsAt(index)[0];
+ inline int getPrimaryCodePointAt(const int index) const {
+ return getProximityCodePointsAt(index)[0];
}
- inline bool existsCharInProximityAt(const int index, const int c) const {
- const int *chars = getProximityCharsAt(index);
+ AK_FORCE_INLINE bool existsCodePointInProximityAt(const int index, const int c) const {
+ const int *codePoints = getProximityCodePointsAt(index);
int i = 0;
- while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
- if (chars[i++] == c) {
+ while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) {
+ if (codePoints[i++] == c) {
return true;
}
}
@@ -81,90 +83,25 @@ class ProximityInfoState {
inline bool existsAdjacentProximityChars(const int index) const {
if (index < 0 || index >= mInputSize) return false;
- const int currentChar = getPrimaryCharAt(index);
+ const int currentCodePoint = getPrimaryCodePointAt(index);
const int leftIndex = index - 1;
- if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) {
+ if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) {
return true;
}
const int rightIndex = index + 1;
- if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) {
+ if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) {
return true;
}
return false;
}
- // In the following function, c is the current character of the dictionary word
- // currently examined.
- // currentChars is an array containing the keys close to the character the
- // user actually typed at the same position. We want to see if c is in it: if so,
- // then the word contains at that position a character close to what the user
- // typed.
- // What the user typed is actually the first character of the array.
- // proximityIndex is a pointer to the variable where getMatchedProximityId returns
- // the index of c in the proximity chars of the input index.
- // Notice : accented characters do not have a proximity list, so they are alone
- // in their list. The non-accented version of the character should be considered
- // "close", but not the other keys close to the non-accented version.
- inline ProximityType getMatchedProximityId(const int index,
- const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const {
- const int *currentChars = getProximityCharsAt(index);
- const int firstChar = currentChars[0];
- const unsigned short baseLowerC = toBaseLowerCase(c);
-
- // The first char in the array is what user typed. If it matches right away,
- // that means the user typed that same char for this pos.
- if (firstChar == baseLowerC || firstChar == c) {
- return EQUIVALENT_CHAR;
- }
-
- if (!checkProximityChars) return UNRELATED_CHAR;
-
- // If the non-accented, lowercased version of that first character matches c,
- // then we have a non-accented version of the accented character the user
- // typed. Treat it as a close char.
- if (toBaseLowerCase(firstChar) == baseLowerC)
- return NEAR_PROXIMITY_CHAR;
-
- // Not an exact nor an accent-alike match: search the list of close keys
- int j = 1;
- while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return NEAR_PROXIMITY_CHAR;
- }
- ++j;
- }
- if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- ++j;
- while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL
- && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) {
- const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
- if (matched) {
- if (proximityIndex) {
- *proximityIndex = j;
- }
- return ADDITIONAL_PROXIMITY_CHAR;
- }
- ++j;
- }
- }
-
- // Was not included, signal this as an unrelated character.
- return UNRELATED_CHAR;
- }
-
inline int getNormalizedSquaredDistance(
const int inputIndex, const int proximityIndex) const {
return mNormalizedSquaredDistances[
inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex];
}
- inline const unsigned short *getPrimaryInputWord() const {
+ inline const int *getPrimaryInputWord() const {
return mPrimaryInputWord;
}
@@ -172,13 +109,13 @@ class ProximityInfoState {
return mTouchPositionCorrectionEnabled;
}
- inline bool sameAsTyped(const unsigned short *word, int length) const {
+ inline bool sameAsTyped(const int *word, int length) const {
if (length != mInputSize) {
return false;
}
const int *inputCodes = mInputCodes;
while (length--) {
- if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) {
+ if (*inputCodes != *word) {
return false;
}
inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL;
@@ -213,7 +150,11 @@ class ProximityInfoState {
return mIsContinuationPossible;
}
- float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const;
+ float getPointToKeyLength(const int inputIndex, const int charCode) const;
+ float getPointToKeyByIdLength(const int inputIndex, const int keyId) const;
+
+ ProximityType getMatchedProximityId(const int index, const int c,
+ const bool checkProximityChars, int *proximityIndex = 0) const;
int getSpaceY() const;
@@ -223,6 +164,25 @@ class ProximityInfoState {
float getRelativeSpeed(const int index) const {
return mRelativeSpeeds[index];
}
+
+ float getDirection(const int index) const {
+ return mDirections[index];
+ }
+ // get xy direction
+ float getDirection(const int x, const int y) const;
+
+ float getPointAngle(const int index) const;
+ // Returns angle of three points. x, y, and z are indices.
+ float getPointsAngle(const int index0, const int index1, const int index2) const;
+
+ float getHighestProbabilitySequence(int *const codePointBuf) const;
+
+ float getProbability(const int index, const int charCode) const;
+
+ float getLineToKeyDistance(
+ const int from, const int to, const int keyId, const bool extend) const;
+
+ bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const;
private:
DISALLOW_COPY_AND_ASSIGN(ProximityInfoState);
typedef hash_map_compat<int, float> NearKeysDistanceMap;
@@ -234,8 +194,8 @@ class ProximityInfoState {
float calculateSquaredDistanceFromSweetSpotCenter(
const int keyIndex, const int inputIndex) const;
- bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time,
- const bool sample, const bool isLastPoint,
+ bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time,
+ const bool sample, const bool isLastPoint, const float sumAngle,
NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
const NearKeysDistanceMap *const prevPrevNearKeysDistances);
@@ -248,7 +208,7 @@ class ProximityInfoState {
return mInputXs.size() > 0 && mInputYs.size() > 0;
}
- inline const int *getProximityCharsAt(const int index) const {
+ inline const int *getProximityCodePointsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL);
}
@@ -259,12 +219,14 @@ class ProximityInfoState {
const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
float getPointScore(
const int x, const int y, const int time, const bool last, const float nearest,
- const NearKeysDistanceMap *const currentNearKeysDistances,
+ const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances,
const NearKeysDistanceMap *const prevNearKeysDistances,
const NearKeysDistanceMap *const prevPrevNearKeysDistances) const;
bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates,
const int *const yCoordinates, const int *const times);
void popInputData();
+ void updateAlignPointProbabilities(const int start);
+ bool suppressCharProbabilities(const int index1, const int index2);
// const
const ProximityInfo *mProximityInfo;
@@ -286,12 +248,23 @@ class ProximityInfoState {
std::vector<float> mDistanceCache;
std::vector<int> mLengthCache;
std::vector<float> mRelativeSpeeds;
+ std::vector<float> mDirections;
+ // probabilities of skipping or mapping to a key for each point.
+ std::vector<hash_map_compat<int, float> > mCharProbabilities;
+ // The vector for the key code set which holds nearby keys for each sampled input point
+ // 1. Used to calculate the probability of the key
+ // 2. Used to calculate mSearchKeysVector
std::vector<NearKeycodesSet> mNearKeysVector;
+ // The vector for the key code set which holds nearby keys of some trailing sampled input points
+ // for each sampled input point. These nearby keys contain the next characters which can be in
+ // the dictionary. Specifically, currently we are looking for keys nearby trailing sampled
+ // inputs including the current input point.
+ std::vector<NearKeycodesSet> mSearchKeysVector;
bool mTouchPositionCorrectionEnabled;
- int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
+ int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL];
int mInputSize;
- unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
+ int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL];
};
} // namespace latinime
#endif // LATINIME_PROXIMITY_INFO_STATE_H
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index e72e7e3be..fed3c7251 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -43,18 +43,16 @@ class TerminalAttributes {
return mHasNextShortcutTarget;
}
- // Gets the shortcut target itself as a uint16_t string. For parameters and return value
+ // Gets the shortcut target itself as an int string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
- // TODO: make the output an uint32_t* to handle the whole unicode range.
- inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) {
+ inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
- mHasNextShortcutTarget =
- 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
+ mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos);
if (NOT_A_CODE_POINT == codePoint) break;
- outWord[i] = (uint16_t)codePoint;
+ outWord[i] = codePoint;
}
*outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags);
return i;
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index e3649bd4b..d134a47e6 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -41,14 +41,11 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[
{ 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
// TODO: check the header
-UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier,
- int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags)
- : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
- TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
- // TODO : remove this variable.
- ROOT_POS(0),
- BYTES_IN_ONE_CHAR(sizeof(int)),
- MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) {
+UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier,
+ int maxWordLength, int maxWords, const unsigned int flags)
+ : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
+ FULL_WORD_MULTIPLIER(fullWordMultiplier), // TODO : remove this variable.
+ ROOT_POS(0), MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) {
if (DEBUG_DICT) {
AKLOGI("UnigramDictionary - constructor");
}
@@ -57,13 +54,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typed
UnigramDictionary::~UnigramDictionary() {
}
-static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) {
- return static_cast<unsigned int>(sizeof(*codes)) * codesSize;
+static inline int getCodesBufferSize(const int *codes, const int codesSize) {
+ return sizeof(*codes) * codesSize;
}
-// TODO: This needs to take a const unsigned short* and not tinker with its contents
-static inline void addWord(unsigned short *word, int length, int frequency,
- WordsPriorityQueue *queue, int type) {
+// TODO: This needs to take a const int* and not tinker with its contents
+static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) {
queue->push(frequency, word, length, type);
}
@@ -105,6 +101,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
WordsPriorityQueuePool *queuePool,
const digraph_t *const digraphs, const unsigned int digraphsSize) const {
+ assert(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
+ assert(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
+ assert(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
const int startIndex = static_cast<int>(codesDest - codesBuffer);
if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
@@ -125,9 +124,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// Make i the index of the second char of the digraph for simplicity. Forgetting
// to do that results in an infinite recursion so take care!
++i;
- memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR);
- codesDest[(i - 1) * (BYTES_IN_ONE_CHAR / sizeof(codesDest[0]))] =
- replacementCodePoint;
+ memcpy(codesDest, codesSrc, i * sizeof(codesDest[0]));
+ codesDest[i - 1] = replacementCodePoint;
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
@@ -137,7 +135,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
// In our example, after "pru" in the buffer copy the "e", and continue on "fen"
- memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
+ memcpy(codesDest + i, codesSrc + i, sizeof(codesDest[0]));
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
@@ -153,13 +151,13 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
// If the word contains several digraphs, we'll come it for the product of them.
// eg. if the word is "ueberpruefen" we'll test, in order, against
// "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen".
- const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain;
+ const unsigned int remainingBytes = sizeof(codesDest[0]) * codesRemain;
if (0 != remainingBytes) {
memcpy(codesDest, codesSrc, remainingBytes);
memcpy(&xCoordinatesBuffer[startIndex], &xcoordinates[codesBufferSize - codesRemain],
- sizeof(int) * codesRemain);
+ sizeof(xCoordinatesBuffer[0]) * codesRemain);
memcpy(&yCoordinatesBuffer[startIndex], &ycoordinates[codesBufferSize - codesRemain],
- sizeof(int) * codesRemain);
+ sizeof(yCoordinatesBuffer[0]) * codesRemain);
}
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
@@ -173,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
- int *outputTypes) const {
-
+ const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const {
WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH);
queuePool.clearAll();
Correction masterCorrection;
@@ -188,8 +184,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
- &queuePool, GERMAN_UMLAUT_DIGRAPHS,
- sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
+ &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS));
} else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
int codesBuffer[getCodesBufferSize(codes, codesSize)];
int xCoordinatesBuffer[codesSize];
@@ -197,8 +192,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection,
- &queuePool, FRENCH_LIGATURES_DIGRAPHS,
- sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
+ &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS));
} else { // Normal processing
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
@@ -222,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
AKLOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
- short unsigned int *w = outWords + j * MAX_WORD_LENGTH;
+ int *w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
(void)s; // To suppress compiler warning
@@ -234,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
return suggestedWordsCount;
}
-void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, Correction *correction,
- WordsPriorityQueuePool *queuePool) const {
-
+void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const int inputSize,
+ const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
+ const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool)
+ const {
PROF_OPEN;
PROF_START(0);
PROF_END(0);
@@ -288,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (queue->size() > 0) {
WordsPriorityQueue::SuggestedWord *sw = queue->top();
const int score = sw->mScore;
- const unsigned short *word = sw->mWord;
+ const int *word = sw->mWord;
const int wordLength = sw->mWordLength;
float ns = Correction::RankingAlgorithm::calcNormalizedScore(
correction->getPrimaryInputWord(), i, word, wordLength, score);
@@ -307,15 +300,13 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int
Correction *correction) const {
if (DEBUG_DICT) {
AKLOGI("initSuggest");
- DUMP_WORD_INT(codes, inputSize);
+ DUMP_WORD(codes, inputSize);
}
correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates);
const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
correction->initCorrection(proximityInfo, inputSize, maxDepth);
}
-static const char SPACE = ' ';
-
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
@@ -374,7 +365,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
}
}
-inline void UnigramDictionary::onTerminal(const int probability,
+void UnigramDictionary::onTerminal(const int probability,
const TerminalAttributes& terminalAttributes, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
const int currentWordIndex) const {
@@ -382,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
int wordLength;
- unsigned short *wordPointer;
+ int *wordPointer;
if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
@@ -410,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability,
// so that the insert order is protected inside the queue for words
// with the same score. For the moment we use -1 to make sure the shortcut will
// never be in front of the word.
- uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
+ int shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
int shortcutFrequency;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency);
@@ -450,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion(
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const {
+ int *wordLengthArray, int *outputWord, int *outputWordLength) const {
if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_ABORT;
}
@@ -493,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion(
// TODO: Remove the safety net above //
//////////////////////////////////////////////
- unsigned short *tempOutputWord = 0;
+ int *tempOutputWord = 0;
int nextWordLength = 0;
// TODO: Optimize init suggestion
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
inputSize, correction);
- unsigned short word[MAX_WORD_LENGTH_INTERNAL];
+ int word[MAX_WORD_LENGTH_INTERNAL];
int freq = getMostFrequentWordLike(
inputWordStartPos, inputWordLength, correction, word);
if (freq > 0) {
@@ -570,7 +561,7 @@ int UnigramDictionary::getSubStringSuggestion(
if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
return FLAG_MULTIPLE_SUGGEST_SKIP;
}
- outputWord[tempOutputWordLength] = SPACE;
+ outputWord[tempOutputWordLength] = KEYCODE_SPACE;
if (outputWordLength) {
++*outputWordLength;
}
@@ -598,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
const bool useFullEditDistance, const int inputSize, Correction *correction,
WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate,
const int startInputPos, const int startWordIndex, const int outputWordLength,
- int *freqArray, int *wordLengthArray, unsigned short *outputWord) const {
+ int *freqArray, int *wordLengthArray, int *outputWord) const {
if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
// Return if the last word index
return;
@@ -684,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
}
// Allocating fixed length array on stack
- unsigned short outputWord[MAX_WORD_LENGTH];
+ int outputWord[MAX_WORD_LENGTH];
int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
const int outputWordLength = 0;
@@ -698,12 +689,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit
// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
-inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
- const int inputSize, Correction *correction, unsigned short *word) const {
- uint16_t inWord[inputSize];
-
+int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, const int inputSize,
+ Correction *correction, int *word) const {
+ int inWord[inputSize];
for (int i = 0; i < inputSize; ++i) {
- inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i);
+ inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i);
}
return getMostFrequentWordLikeInner(inWord, inputSize, word);
}
@@ -721,14 +711,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
// In and out parameters may point to the same location. This function takes care
// not to use any input parameters after it wrote into its outputs.
static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
- const uint8_t *const root, const int startPos, const uint16_t *const inWord,
- const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex,
+ const uint8_t *const root, const int startPos, const int *const inWord,
+ const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex,
int *outPos) {
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
int pos = startPos;
- int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
- int32_t baseChar = toBaseLowerCase(codePoint);
- const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]);
+ int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
+ int baseChar = toBaseLowerCase(codePoint);
+ const int wChar = toBaseLowerCase(inWord[startInputIndex]);
if (baseChar != wChar) {
*outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos;
@@ -759,8 +749,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags,
// It will compare the frequency to the max frequency, and if greater, will
// copy the word into the output buffer. In output value maxFreq, it will
// write the new maximum frequency if it changed.
-static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length,
- short unsigned int *outWord, int *maxFreq) {
+static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord,
+ int *maxFreq) {
if (freq > *maxFreq) {
for (int q = 0; q < length; ++q) {
outWord[q] = newWord[q];
@@ -772,9 +762,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in
// Will find the highest frequency of the words like the one passed as an argument,
// that is, everything that only differs by case/accents.
-int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord,
- const int inputSize, short unsigned int *outWord) const {
- int32_t newWord[MAX_WORD_LENGTH_INTERNAL];
+int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+ int *outWord) const {
+ int newWord[MAX_WORD_LENGTH_INTERNAL];
int depth = 0;
int maxFreq = -1;
const uint8_t *const root = DICT_ROOT;
@@ -834,7 +824,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord
return maxFreq;
}
-int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const {
+int UnigramDictionary::getFrequency(const int *const inWord, const int length) const {
const uint8_t *const root = DICT_ROOT;
int pos = BinaryFormat::getTerminalPosition(root, inWord, length,
false /* forceLowerCaseSearch */);
@@ -859,8 +849,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
}
// TODO: remove this function.
-int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset,
- int length) const {
+int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const {
return -1;
}
@@ -878,7 +867,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// there aren't any more nodes at this level, it merely returns the address of the first byte after
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent.
-inline bool UnigramDictionary::processCurrentNode(const int initialPos,
+bool UnigramDictionary::processCurrentNode(const int initialPos,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
WordsPriorityQueuePool *queuePool, const int currentWordIndex) const {
@@ -906,7 +895,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// else if FLAG_IS_TERMINAL: the frequency
// else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address
// Note that you can't have a node that both is not a terminal and has no children.
- int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
+ int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos);
assert(NOT_A_CODE_POINT != c);
// We are going to loop through each character and make it look like it's a different
@@ -920,7 +909,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
// We prefetch the next char. If 'c' is the last char of this node, we will have
// NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node
// should behave as a terminal or not and whether we have children.
- const int32_t nextc = hasMultipleChars
+ const int nextc = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextc);
// If there are more chars in this nodes, then this virtual node is not a terminal.
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 57129bb07..248b09db1 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -39,14 +39,14 @@ class UnigramDictionary {
static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0;
static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1;
static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2;
- UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler,
- int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
- int getFrequency(const int32_t *const inWord, const int length) const;
- int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
+ UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength,
+ int maxWords, const unsigned int flags);
+ int getFrequency(const int *const inWord, const int length) const;
+ int getBigramPosition(int pos, int *word, int offset, int length) const;
int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
- const bool useFullEditDistance, unsigned short *outWords, int *frequencies,
+ const bool useFullEditDistance, int *outWords, int *frequencies,
int *outputTypes) const;
virtual ~UnigramDictionary();
@@ -93,9 +93,9 @@ class UnigramDictionary {
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
const int currentWordIndex) const;
int getMostFrequentWordLike(const int startInputIndex, const int inputSize,
- Correction *correction, unsigned short *word) const;
- int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize,
- short unsigned int *outWord) const;
+ Correction *correction, int *word) const;
+ int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize,
+ int *outWord) const;
int getSubStringSuggestion(
ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
const int *codes, const bool useFullEditDistance, Correction *correction,
@@ -103,22 +103,19 @@ class UnigramDictionary {
const bool hasAutoCorrectionCandidate, const int currentWordIndex,
const int inputWordStartPos, const int inputWordLength,
const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
- int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const;
- void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
- const int *xcoordinates, const int *ycoordinates, const int *codes,
- const bool useFullEditDistance, const int inputSize,
- Correction *correction, WordsPriorityQueuePool *queuePool,
+ int *wordLengthArray, int *outputWord, int *outputWordLength) const;
+ void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates,
+ const int *ycoordinates, const int *codes, const bool useFullEditDistance,
+ const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool,
const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
const int outputWordLength, int *freqArray, int *wordLengthArray,
- unsigned short *outputWord) const;
+ int *outputWord) const;
const uint8_t *const DICT_ROOT;
const int MAX_WORD_LENGTH;
const int MAX_WORDS;
- const int TYPED_LETTER_MULTIPLIER;
const int FULL_WORD_MULTIPLIER;
const int ROOT_POS;
- const unsigned int BYTES_IN_ONE_CHAR;
const int MAX_DIGRAPH_SEARCH_DEPTH;
const int FLAGS;
diff --git a/native/jni/src/words_priority_queue.cpp b/native/jni/src/words_priority_queue.cpp
new file mode 100644
index 000000000..7e18d0f87
--- /dev/null
+++ b/native/jni/src/words_priority_queue.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "words_priority_queue.h"
+
+namespace latinime {
+
+int WordsPriorityQueue::outputSuggestions(const int *before, const int beforeLength,
+ int *frequencies, int *outputCodePoints, int* outputTypes) {
+ mHighestSuggestedWord = 0;
+ const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size()));
+ SuggestedWord *swBuffer[size];
+ int index = size - 1;
+ while (!mSuggestions.empty() && index >= 0) {
+ SuggestedWord *sw = mSuggestions.top();
+ if (DEBUG_WORDS_PRIORITY_QUEUE) {
+ AKLOGI("dump word. %d", sw->mScore);
+ DUMP_WORD(sw->mWord, sw->mWordLength);
+ }
+ swBuffer[index] = sw;
+ mSuggestions.pop();
+ --index;
+ }
+ if (size >= 2) {
+ SuggestedWord *nsMaxSw = 0;
+ int maxIndex = 0;
+ float maxNs = 0;
+ for (int i = 0; i < size; ++i) {
+ SuggestedWord *tempSw = swBuffer[i];
+ if (!tempSw) {
+ continue;
+ }
+ const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0);
+ if (tempNs >= maxNs) {
+ maxNs = tempNs;
+ maxIndex = i;
+ nsMaxSw = tempSw;
+ }
+ }
+ if (maxIndex > 0 && nsMaxSw) {
+ memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0]));
+ swBuffer[0] = nsMaxSw;
+ }
+ }
+ for (int i = 0; i < size; ++i) {
+ SuggestedWord *sw = swBuffer[i];
+ if (!sw) {
+ AKLOGE("SuggestedWord is null %d", i);
+ continue;
+ }
+ const int wordLength = sw->mWordLength;
+ int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH;
+ frequencies[i] = sw->mScore;
+ outputTypes[i] = sw->mType;
+ memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0]));
+ if (wordLength < MAX_WORD_LENGTH) {
+ targetAddress[wordLength] = 0;
+ }
+ sw->mUsed = false;
+ }
+ return size;
+}
+} // namespace latinime
diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h
index 19efa5da3..7d0c4d17d 100644
--- a/native/jni/src/words_priority_queue.h
+++ b/native/jni/src/words_priority_queue.h
@@ -30,15 +30,15 @@ class WordsPriorityQueue {
class SuggestedWord {
public:
int mScore;
- unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
+ int mWord[MAX_WORD_LENGTH_INTERNAL];
int mWordLength;
bool mUsed;
int mType;
- void setParams(int score, unsigned short *word, int wordLength, int type) {
+ void setParams(int score, int *word, int wordLength, int type) {
mScore = score;
mWordLength = wordLength;
- memcpy(mWord, word, sizeof(unsigned short) * wordLength);
+ memcpy(mWord, word, sizeof(mWord[0]) * wordLength);
mUsed = true;
mType = type;
}
@@ -53,13 +53,14 @@ class WordsPriorityQueue {
}
}
- virtual ~WordsPriorityQueue() {
+ // Non virtual inline destructor -- never inherit this class
+ AK_FORCE_INLINE ~WordsPriorityQueue() {
delete[] mSuggestedWords;
}
- void push(int score, unsigned short *word, int wordLength, int type) {
+ void push(int score, int *word, int wordLength, int type) {
SuggestedWord *sw = 0;
- if (mSuggestions.size() >= MAX_WORDS) {
+ if (size() >= MAX_WORDS) {
sw = mSuggestions.top();
const int minScore = sw->mScore;
if (minScore >= score) {
@@ -94,68 +95,11 @@ class WordsPriorityQueue {
return sw;
}
- int outputSuggestions(const unsigned short *before, const int beforeLength,
- int *frequencies, unsigned short *outputChars, int* outputTypes) {
- mHighestSuggestedWord = 0;
- const unsigned int size = min(
- MAX_WORDS, static_cast<unsigned int>(mSuggestions.size()));
- SuggestedWord *swBuffer[size];
- int index = size - 1;
- while (!mSuggestions.empty() && index >= 0) {
- SuggestedWord *sw = mSuggestions.top();
- if (DEBUG_WORDS_PRIORITY_QUEUE) {
- AKLOGI("dump word. %d", sw->mScore);
- DUMP_WORD(sw->mWord, sw->mWordLength);
- }
- swBuffer[index] = sw;
- mSuggestions.pop();
- --index;
- }
- if (size >= 2) {
- SuggestedWord *nsMaxSw = 0;
- unsigned int maxIndex = 0;
- float maxNs = 0;
- for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord *tempSw = swBuffer[i];
- if (!tempSw) {
- continue;
- }
- const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0);
- if (tempNs >= maxNs) {
- maxNs = tempNs;
- maxIndex = i;
- nsMaxSw = tempSw;
- }
- }
- if (maxIndex > 0 && nsMaxSw) {
- memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(SuggestedWord *));
- swBuffer[0] = nsMaxSw;
- }
- }
- for (unsigned int i = 0; i < size; ++i) {
- SuggestedWord *sw = swBuffer[i];
- if (!sw) {
- AKLOGE("SuggestedWord is null %d", i);
- continue;
- }
- const unsigned int wordLength = sw->mWordLength;
- unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH;
- frequencies[i] = sw->mScore;
- outputTypes[i] = sw->mType;
- memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short));
- if (wordLength < MAX_WORD_LENGTH) {
- targetAddress[wordLength] = 0;
- }
- sw->mUsed = false;
- }
- return size;
- }
-
int size() const {
- return mSuggestions.size();
+ return static_cast<int>(mSuggestions.size());
}
- void clear() {
+ AK_FORCE_INLINE void clear() {
mHighestSuggestedWord = 0;
while (!mSuggestions.empty()) {
SuggestedWord *sw = mSuggestions.top();
@@ -175,15 +119,18 @@ class WordsPriorityQueue {
DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength);
}
- float getHighestNormalizedScore(const unsigned short *before, const int beforeLength,
- unsigned short **outWord, int *outScore, int *outLength) {
+ float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord,
+ int *outScore, int *outLength) {
if (!mHighestSuggestedWord) {
return 0.0;
}
- return getNormalizedScore(
- mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength);
+ return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore,
+ outLength);
}
+ int outputSuggestions(const int *before, const int beforeLength, int *frequencies,
+ int *outputCodePoints, int* outputTypes);
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue);
struct wordComparator {
@@ -192,9 +139,8 @@ class WordsPriorityQueue {
}
};
- SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word,
- int wordLength, int type) {
- for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
+ SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) {
+ for (int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) {
mSuggestedWords[i].setParams(score, word, wordLength, type);
return &mSuggestedWords[i];
@@ -203,10 +149,10 @@ class WordsPriorityQueue {
return 0;
}
- static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before,
- const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) {
+ static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength,
+ int **outWord, int *outScore, int *outLength) {
const int score = sw->mScore;
- unsigned short *word = sw->mWord;
+ int *word = sw->mWord;
const int wordLength = sw->mWordLength;
if (outScore) {
*outScore = score;
@@ -217,15 +163,15 @@ class WordsPriorityQueue {
if (outLength) {
*outLength = wordLength;
}
- return Correction::RankingAlgorithm::calcNormalizedScore(
- before, beforeLength, word, wordLength, score);
+ return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word,
+ wordLength, score);
}
typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>,
wordComparator> Suggestions;
Suggestions mSuggestions;
- const unsigned int MAX_WORDS;
- const unsigned int MAX_WORD_LENGTH;
+ const int MAX_WORDS;
+ const int MAX_WORD_LENGTH;
SuggestedWord *mSuggestedWords;
SuggestedWord *mHighestSuggestedWord;
};
diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h
index 2d52903e0..c14afa07b 100644
--- a/native/jni/src/words_priority_queue_pool.h
+++ b/native/jni/src/words_priority_queue_pool.h
@@ -36,7 +36,8 @@ class WordsPriorityQueuePool {
}
}
- virtual ~WordsPriorityQueuePool() {
+ // Non virtual inline destructor -- never inherit this class
+ ~WordsPriorityQueuePool() {
// Note: these explicit calls to the destructor match the calls to placement new() above.
if (mMasterQueue) mMasterQueue->~WordsPriorityQueue();
for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i) {
@@ -68,7 +69,7 @@ class WordsPriorityQueuePool {
}
}
- inline void clearSubQueue(const int wordIndex) {
+ AK_FORCE_INLINE void clearSubQueue(const int wordIndex) {
for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
WordsPriorityQueue *queue = getSubQueue(wordIndex, i);
if (queue) {