diff options
Diffstat (limited to 'native/jni/src')
29 files changed, 1409 insertions, 938 deletions
diff --git a/native/jni/src/additional_proximity_chars.h b/native/jni/src/additional_proximity_chars.h index d420c4664..0333c2dbd 100644 --- a/native/jni/src/additional_proximity_chars.h +++ b/native/jni/src/additional_proximity_chars.h @@ -39,7 +39,7 @@ class AdditionalProximityChars { static const int EN_US_ADDITIONAL_U_SIZE = 4; static const int32_t EN_US_ADDITIONAL_U[]; - static bool isEnLocale(const char *localeStr) { + AK_FORCE_INLINE static bool isEnLocale(const char *localeStr) { const size_t LOCALE_EN_US_SIZE = strlen(LOCALE_EN_US); return localeStr && strlen(localeStr) >= LOCALE_EN_US_SIZE && strncmp(localeStr, LOCALE_EN_US, LOCALE_EN_US_SIZE) == 0; diff --git a/native/jni/src/basechars.cpp b/native/jni/src/basechars.cpp deleted file mode 100644 index 379cb6226..000000000 --- a/native/jni/src/basechars.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <stdint.h> - -#include "char_utils.h" - -namespace latinime { - -/* - * Table mapping most combined Latin, Greek, and Cyrillic characters - * to their base characters. If c is in range, BASE_CHARS[c] == c - * if c is not a combined character, or the base character if it - * is combined. - */ -const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = { - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, - 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, - 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, - 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, - 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, - 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, - 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f - // Manually changed df to 73 - 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, - 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, - 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, - 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f - 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, - 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, - 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, - 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, - 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, - 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, - 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, - 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, - 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, - 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, - 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, - 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, - 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, - 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, - 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, - 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, - 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, - 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, - 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, - 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, - 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, - 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, - 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, - 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, - 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, - 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, - 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, - 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, - 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, - 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, - 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, - 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, - 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, - 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, - 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, - 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, - 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, - 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, - 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, - 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, - 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, - 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, - 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, - 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, - 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, - 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, - 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, - 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, - 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, - 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, - 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, - 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, - 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, - 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, - 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, - 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, - 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, - 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, - 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, - 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, - 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, - 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, - 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, - 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, - 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, - 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, - 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, - 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, - 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, - 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, - 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, - 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, - 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, - 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, - 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, - 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, - 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, - 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, - 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, - 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, - 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, - 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, - 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, - 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, - 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, - 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, - 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, - 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, - 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, - 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, - 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, - 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, - 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, - 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, - 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, - 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, - 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, - 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, - 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, - 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, - 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, - 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, - 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, - 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, - 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, - 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, - 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, - 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, - 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, - 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, - 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, - 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, - 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, - 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, - 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, - 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, - 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, - 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, - 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, - 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, - 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, - 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, - 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, - 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, - 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, -}; -// generated with: -// cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' -} // namespace latinime diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index dade4f16b..f89dd1615 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, BigramDictionary::~BigramDictionary() { } -bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency, - int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const { +bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq, + int *bigramCodePoints, int *outputTypes) const { word[length] = 0; if (DEBUG_DICT) { #ifdef FLAG_DBG char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; + for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]); AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency); #endif } @@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ int insertAt = 0; while (insertAt < MAX_PREDICTIONS) { if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency - && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) { + && length < Dictionary::wideStrLen( + bigramCodePoints + insertAt * MAX_WORD_LENGTH))) { break; } insertAt++; @@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0])); bigramFreq[insertAt] = frequency; outputTypes[insertAt] = Dictionary::KIND_PREDICTION; - memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH, - bigramChars + insertAt * MAX_WORD_LENGTH, - (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH); - unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH; + memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH, + bigramCodePoints + insertAt * MAX_WORD_LENGTH, + (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH); + int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } @@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * prevWordLength: its length. * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions. * codesSize: the size of the codes array. - * bigramChars: an array for output, at the same format as outwords for getSuggestions. + * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * bigramFreq: an array to output frequencies. * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. @@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes, - int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const { +int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes, + int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name @@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in int bigramCount = 0; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - uint16_t bigramBuffer[MAX_WORD_LENGTH]; + int bigramBuffer[MAX_WORD_LENGTH]; int unigramFreq = 0; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); @@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // here, but it can't get too bad. const int frequency = BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp); - if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars, + if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints, outputTypes)) { ++bigramCount; } @@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } -bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const { +bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. int maxAlt = MAX_ALTERNATIVES; - const unsigned short firstBaseChar = toBaseLowerCase(*word); + const int firstBaseChar = toBaseLowerCase(*word); while (maxAlt > 0) { if (toBaseLowerCase(*inputCodes) == firstBaseChar) { return true; diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 5f11ae822..150192de2 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -27,23 +27,23 @@ namespace latinime { class BigramDictionary { public: BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); - int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const; - void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, + int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords, + int *frequencies, int *outputTypes) const; + void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength, std::map<int, int> *map, uint8_t *filter) const; - bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; + bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); - bool addWordBigram(unsigned short *word, int length, int frequency, - int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const; + bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints, + int *outputTypes) const; int getBigramAddress(int *pos, bool advance); int getBigramFreq(int *pos); void searchForTerminalNode(int addressLookingFor, int frequency); bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } - bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; - int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, + bool checkFirstCharacter(int *word, int *inputCodes) const; + int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; const unsigned char *DICT; diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index eec52e323..9a8c315f7 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -28,10 +28,6 @@ class BinaryFormat { public: // Mask and flags for children address type selection. static const int MASK_GROUP_ADDRESS_TYPE = 0xC0; - static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; - static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; - static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; - static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; // Flag for single/multiple char group static const int FLAG_HAS_MULTIPLE_CHARS = 0x20; @@ -61,36 +57,21 @@ class BinaryFormat { // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; - static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; - const static int UNKNOWN_FORMAT = -1; - // Originally, format version 1 had a 16-bit magic number, then the version number `01' - // then options that must be 0. Hence the first 32-bits of the format are always as follow - // and it's okay to consider them a magic number as a whole. - const static uint32_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100; - const static unsigned int FORMAT_VERSION_1_HEADER_SIZE = 5; - // The versions of Latin IME that only handle format version 1 only test for the magic - // number, so we had to change it so that version 2 files would be rejected by older - // implementations. On this occasion, we made the magic number 32 bits long. - const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; - - const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; - const static int SHORTCUT_LIST_SIZE_SIZE = 2; + static const int UNKNOWN_FORMAT = -1; + static const int SHORTCUT_LIST_SIZE_SIZE = 2; static int detectFormat(const uint8_t *const dict); static unsigned int getHeaderSize(const uint8_t *const dict); static unsigned int getFlags(const uint8_t *const dict); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); - static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); + static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipFrequency(const uint8_t flags, const int pos); static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos); - static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); @@ -98,10 +79,10 @@ class BinaryFormat { static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); static int getAttributeFrequencyFromFlags(const int flags); - static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, + static int getTerminalPosition(const uint8_t *const root, const int *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, - uint16_t *outWord, int *outUnigramFrequency); + int *outWord, int *outUnigramFrequency); static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -113,17 +94,37 @@ class BinaryFormat { REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1, REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4 }; - const static unsigned int NO_FLAGS = 0; private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); - const static int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - const static int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; - const static int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; + static const int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; + static const int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; + static const int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; + static const int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; + static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; + + // Originally, format version 1 had a 16-bit magic number, then the version number `01' + // then options that must be 0. Hence the first 32-bits of the format are always as follow + // and it's okay to consider them a magic number as a whole. + static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100; + static const unsigned int FORMAT_VERSION_1_HEADER_SIZE = 5; + // The versions of Latin IME that only handle format version 1 only test for the magic + // number, so we had to change it so that version 2 files would be rejected by older + // implementations. On this occasion, we made the magic number 32 bits long. + static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; + + static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; + static const int32_t MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + static const int32_t CHARACTER_ARRAY_TERMINATOR = 0x1F; + static const int MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE = 2; + static const unsigned int NO_FLAGS = 0; static int skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos); + static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); }; -inline int BinaryFormat::detectFormat(const uint8_t *const dict) { +AK_FORCE_INLINE int BinaryFormat::detectFormat(const uint8_t *const dict) { // The magic number is stored big-endian. const uint32_t magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3]; switch (magicNumber) { @@ -148,7 +149,7 @@ inline int BinaryFormat::detectFormat(const uint8_t *const dict) { inline unsigned int BinaryFormat::getFlags(const uint8_t *const dict) { switch (detectFormat(dict)) { case 1: - return NO_FLAGS; + return NO_FLAGS; // TODO: NO_FLAGS is unused anywhere else? default: return (dict[6] << 8) + dict[7]; } @@ -166,7 +167,8 @@ inline unsigned int BinaryFormat::getHeaderSize(const uint8_t *const dict) { } } -inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) { +AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, + int *pos) { const int msb = dict[(*pos)++]; if (msb < 0x80) return msb; return ((msb & 0x7F) << 8) | dict[(*pos)++]; @@ -176,17 +178,18 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict return dict[(*pos)++]; } -inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { +AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, + int *pos) { const int origin = *pos; - const int32_t codePoint = dict[origin]; + const int codePoint = dict[origin]; if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) { *pos = origin + 1; return NOT_A_CODE_POINT; } else { *pos = origin + 3; - const int32_t char_1 = codePoint << 16; - const int32_t char_2 = char_1 + (dict[origin + 1] << 8); + const int char_1 = codePoint << 16; + const int char_2 = char_1 + (dict[origin + 1] << 8); return char_2 + dict[origin + 2]; } } else { @@ -200,9 +203,9 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const return dict[pos]; } -inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { +AK_FORCE_INLINE int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { int currentPos = pos; - int32_t character = dict[currentPos++]; + int character = dict[currentPos++]; while (CHARACTER_ARRAY_TERMINATOR != character) { if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE; @@ -226,7 +229,7 @@ static inline int attributeAddressSize(const uint8_t flags) { */ } -static inline int skipExistingBigrams(const uint8_t *const dict, const int pos) { +static AK_FORCE_INLINE int skipExistingBigrams(const uint8_t *const dict, const int pos) { int currentPos = pos; uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); while (flags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT) { @@ -255,7 +258,7 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { return FLAG_IS_TERMINAL & flags ? pos + 1 : pos; } -inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags, +AK_FORCE_INLINE int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos) { if (FLAG_HAS_SHORTCUT_TARGETS & flags) { return pos + shortcutByteSize(dict, pos); @@ -264,7 +267,7 @@ inline int BinaryFormat::skipShortcuts(const uint8_t *const dict, const uint8_t } } -inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags, +AK_FORCE_INLINE int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos) { if (FLAG_HAS_BIGRAMS & flags) { return skipExistingBigrams(dict, pos); @@ -273,7 +276,7 @@ inline int BinaryFormat::skipBigrams(const uint8_t *const dict, const uint8_t fl } } -inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags, +AK_FORCE_INLINE int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; @@ -282,7 +285,7 @@ inline int BinaryFormat::skipAllAttributes(const uint8_t *const dict, const uint return newPos; } -inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict, +AK_FORCE_INLINE int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags, const int pos) { int currentPos = pos; currentPos = skipChildrenPosition(flags, currentPos); @@ -290,8 +293,8 @@ inline int BinaryFormat::skipChildrenPosAndAttributes(const uint8_t *const dict, return currentPos; } -inline int BinaryFormat::readChildrenPosition(const uint8_t *const dict, const uint8_t flags, - const int pos) { +AK_FORCE_INLINE int BinaryFormat::readChildrenPosition(const uint8_t *const dict, + const uint8_t flags, const int pos) { int offset = 0; switch (MASK_GROUP_ADDRESS_TYPE & flags) { case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: @@ -318,7 +321,7 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) { return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags)); } -inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, +AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos) { int offset = 0; const int origin = *pos; @@ -352,8 +355,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) { // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. -inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, - const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) { +AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, + const int *const inWord, const int length, const bool forceLowerCaseSearch) { int pos = 0; int wordPos = 0; @@ -362,14 +365,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, // there was no match (or we would have found it). if (wordPos >= length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; + const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= charGroupCount) return NOT_VALID_WORD; const int charGroupPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is @@ -438,8 +441,8 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, * outUnigramFrequency: a pointer to an int to write the frequency into. * Return value : the length of the word, of 0 if the word was not found. */ -inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, - const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) { +AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, + const int maxDepth, int *outWord, int *outUnigramFrequency) { int pos = 0; int wordPos = 0; @@ -457,13 +460,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a --charGroupCount) { const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); - const int32_t character = getCodePointAndForwardPointer(root, &pos); + const int character = getCodePointAndForwardPointer(root, &pos); if (address == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { - int32_t nextChar = getCodePointAndForwardPointer(root, &pos); + int nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; @@ -522,13 +525,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a if (0 != lastCandidateGroupPos) { const uint8_t lastFlags = getFlagsAndForwardPointer(root, &lastCandidateGroupPos); - const int32_t lastChar = + const int lastChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { - int32_t nextChar = - getCodePointAndForwardPointer(root, &lastCandidateGroupPos); + int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; while (-1 != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index d0547a982..ff05f68d6 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -15,8 +15,10 @@ */ #include <cstdlib> +#include <stdint.h> #include "char_utils.h" +#include "defines.h" namespace latinime { @@ -33,7 +35,7 @@ struct LatinCapitalSmallPair { // // unsigned short c, cc, ccc, ccc2; // for (c = 0; c < 0xFFFF ; c++) { -// if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { +// if (c < NELEMS(BASE_CHARS)) { // cc = BASE_CHARS[c]; // } else { // cc = c; @@ -894,9 +896,183 @@ static int compare_pair_capital(const void *a, const void *b) { unsigned short latin_tolower(const unsigned short c) { struct LatinCapitalSmallPair *p = static_cast<struct LatinCapitalSmallPair *>(bsearch(&c, SORTED_CHAR_MAP, - sizeof(SORTED_CHAR_MAP) / sizeof(SORTED_CHAR_MAP[0]), - sizeof(SORTED_CHAR_MAP[0]), - compare_pair_capital)); + NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital)); return p ? p->small : c; } + +/* + * Table mapping most combined Latin, Greek, and Cyrillic characters + * to their base characters. If c is in range, BASE_CHARS[c] == c + * if c is not a combined character, or the base character if it + * is combined. + * + * Generated with: + * cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; \ + * $base[hex($foo[0])] = hex($foo[5]);} \ + * for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { \ + * printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' + */ +const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, + 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, + 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, + 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, + 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, + 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, + 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f + // Manually changed df to 73 + 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, + 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, + 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, + 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f + 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, + 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, + 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, + 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, + 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, + 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, + 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, + 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, + 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, + 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, + 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, + 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, + 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, + 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, + 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, + 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, + 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, + 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, + 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, + 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, + 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, + 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, + 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, + 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, + 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, + 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, + 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, + 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, + 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, + 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, + 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, + 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, + 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, + 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, + 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, + 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, + 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, + 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, + 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, + 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, + 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, + 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, + 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, + 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, + 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, + 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, + 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, + 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, + 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, + 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, + 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, + 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, + 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, + 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, + 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, + 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, + 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, + 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, + 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, + 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, + 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, + 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, + 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, + 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, + 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, + 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, + 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, + 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, + 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, + 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, + 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, + 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, + 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, + 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, + 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, + 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, + 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, + 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, + 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, + 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, + 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, + 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, + 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, + 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, + 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, + 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, + 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, + 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, + 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, + 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, + 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, + 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, + 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, + 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, + 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, + 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, + 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, + 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, + 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, + 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, + 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, + 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, + 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, + 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, + 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, + 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, + 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, + 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, + 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, + 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, + 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, + 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, + 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, + 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, + 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, + 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, +}; } // namespace latinime diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index 9008e364c..c632b79b8 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -18,22 +18,23 @@ #define LATINIME_CHAR_UTILS_H #include <cctype> -#include <stdint.h> + +#include "defines.h" namespace latinime { -inline static bool isAsciiUpper(unsigned short c) { +inline static bool isAsciiUpper(int c) { // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). return (c >= 'A' && c <= 'Z'); } -inline static unsigned short toAsciiLower(unsigned short c) { +inline static int toAsciiLower(int c) { return c - 'A' + 'a'; } -inline static bool isAscii(unsigned short c) { - return isascii(static_cast<int>(c)) != 0; +inline static bool isAscii(int c) { + return isascii(c) != 0; } unsigned short latin_tolower(const unsigned short c); @@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c); * if c is not a combined character, or the base character if it * is combined. */ - static const int BASE_CHARS_SIZE = 0x0500; -extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE]; +extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; -inline static unsigned short toBaseChar(unsigned short c) { +inline static int toBaseCodePoint(int c) { if (c < BASE_CHARS_SIZE) { - return BASE_CHARS[c]; + return static_cast<int>(BASE_CHARS[c]); } return c; } -inline static unsigned short toLowerCase(const unsigned short c) { +AK_FORCE_INLINE static int toLowerCase(const int c) { if (isAsciiUpper(c)) { return toAsciiLower(c); } else if (isAscii(c)) { return c; } - return latin_tolower(c); + return static_cast<int>(latin_tolower(static_cast<unsigned short>(c))); } -inline static unsigned short toBaseLowerCase(const unsigned short c) { - return toLowerCase(toBaseChar(c)); +AK_FORCE_INLINE static int toBaseLowerCase(const int c) { + return toLowerCase(toBaseCodePoint(c)); } -inline static bool isSkippableChar(const uint16_t character) { +inline static bool isSkippableCodePoint(const int codePoint) { // TODO: Do not hardcode here - return character == '\'' || character == '-'; + return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; } } // namespace latinime diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 524abe9a1..50f33fe23 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -60,29 +60,6 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, } } -inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, - const int inputSize, const unsigned short *output, const int outputLength) { - // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. - // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. - // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, - // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. - int *const current = editDistanceTable + outputLength * (inputSize + 1); - const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1); - const int *const prevprev = - outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; - current[0] = outputLength; - const uint32_t co = toBaseLowerCase(output[outputLength - 1]); - const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; - for (int i = 1; i <= inputSize; ++i) { - const uint32_t ci = toBaseLowerCase(input[i - 1]); - const uint16_t cost = (ci == co) ? 0 : 1; - current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); - if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { - current[i] = min(current[i], prevprev[i - 2] + 1); - } - } -} - inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, const int outputLength, const int inputSize) { if (DEBUG_EDIT_DISTANCE) { @@ -91,16 +68,6 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputSize]; } -////////////////////// -// inline functions // -////////////////////// -static const char SINGLE_QUOTE = '\''; - -inline bool Correction::isSingleQuote(const unsigned short c) { - const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); - return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE); -} - //////////////// // Correction // //////////////// @@ -162,31 +129,20 @@ bool Correction::sameAsTyped() { } int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const bool isSpaceProximity, const unsigned short *word) { + const int wordCount, const bool isSpaceProximity, const int *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, wordCount, this, isSpaceProximity, word); } -int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { +int Correction::getFinalProbability(const int probability, int **word, int *wordLength) { return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); } -int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputSize) { +int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, + const int inputSize) { return getFinalProbabilityInternal(probability, word, wordLength, inputSize); } -int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputSize) { - const int outputIndex = mTerminalOutputIndex; - const int inputIndex = mTerminalInputIndex; - *wordLength = outputIndex + 1; - *word = mWord; - int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( - inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize); - return finalProbability; -} - bool Correction::initProcessState(const int outputIndex) { if (mCorrectionStates[outputIndex].mChildCount <= 0) { return false; @@ -230,42 +186,6 @@ int Correction::getInputIndex() const { return mInputIndex; } -void Correction::incrementInputIndex() { - ++mInputIndex; -} - -void Correction::incrementOutputIndex() { - ++mOutputIndex; - mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; - mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; - mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; - mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; - mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; - - mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount; - mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; - mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; - mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; - mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; - - mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; - mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos; - mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos; - - mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded; - - mCorrectionStates[mOutputIndex].mMatching = mMatching; - mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; - mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; - mCorrectionStates[mOutputIndex].mTransposing = mTransposing; - mCorrectionStates[mOutputIndex].mExceeding = mExceeding; - mCorrectionStates[mOutputIndex].mSkipping = mSkipping; -} - -void Correction::startToTraverseAllNodes() { - mNeedsToTraverseAllNodes = true; -} - bool Correction::needsToPrune() const { // TODO: use edit distance here return mOutputIndex - 1 >= mMaxDepth || mProximityCount > mMaxEditDistance @@ -273,44 +193,15 @@ bool Correction::needsToPrune() const { || (!mDoAutoCompletion && (mOutputIndex > mInputSize)); } -void Correction::addCharToCurrentWord(const int32_t c) { - mWord[mOutputIndex] = c; - const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); - calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, - mWord, mOutputIndex + 1); -} - -Correction::CorrectionType Correction::processSkipChar( - const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { - addCharToCurrentWord(c); - mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); - mTerminalOutputIndex = mOutputIndex; - if (mNeedsToTraverseAllNodes && isTerminal) { - incrementOutputIndex(); - return TRAVERSE_ALL_ON_TERMINAL; - } else { - incrementOutputIndex(); - return TRAVERSE_ALL_NOT_ON_TERMINAL; - } -} - -Correction::CorrectionType Correction::processUnrelatedCorrectionType() { - // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType - mTerminalInputIndex = mInputIndex; - mTerminalOutputIndex = mOutputIndex; - return UNRELATED; -} - -inline bool isEquivalentChar(ProximityType type) { +inline static bool isEquivalentChar(ProximityType type) { return type == EQUIVALENT_CHAR; } -inline bool isProximityCharOrEquivalentChar(ProximityType type) { +inline static bool isProximityCharOrEquivalentChar(ProximityType type) { return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; } -Correction::CorrectionType Correction::processCharAndCalcState( - const int32_t c, const bool isTerminal) { +Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); if (correctionCount > mMaxErrors) { return processUnrelatedCorrectionType(); @@ -628,10 +519,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } -inline static int getQuoteCount(const unsigned short *word, const int length) { +inline static int getQuoteCount(const int *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { - if (word[i] == SINGLE_QUOTE) { + if (word[i] == KEYCODE_SINGLE_QUOTE) { ++quoteCount; } } @@ -639,7 +530,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) { } inline static bool isUpperCase(unsigned short c) { - return isAsciiUpper(toBaseChar(c)); + return isAsciiUpper(toBaseCodePoint(c)); } ////////////////////// @@ -672,7 +563,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // TODO: use mExcessiveCount const int matchCount = inputSize - correction->mProximityCount - excessiveCount; - const unsigned short *word = correction->mWord; + const int *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) @@ -911,7 +802,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex /* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, - const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { + const Correction *correction, const bool isSpaceProximity, const int *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; @@ -1040,9 +931,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( } /* Damerau-Levenshtein distance */ -inline static int editDistanceInternal( - int *editDistanceTable, const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength) { +inline static int editDistanceInternal(int *editDistanceTable, const int *before, + const int beforeLength, const int *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] int *dp = editDistanceTable; const int li = beforeLength + 1; @@ -1056,9 +946,9 @@ inline static int editDistanceInternal( for (int i = 0; i < li - 1; ++i) { for (int j = 0; j < lo - 1; ++j) { - const uint32_t ci = toBaseLowerCase(before[i]); - const uint32_t co = toBaseLowerCase(after[j]); - const uint16_t cost = (ci == co) ? 0 : 1; + const int ci = toBaseLowerCase(before[i]); + const int co = toBaseLowerCase(after[j]); + const int cost = (ci == co) ? 0 : 1; dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) @@ -1080,8 +970,8 @@ inline static int editDistanceInternal( return dp[li * lo - 1]; } -int Correction::RankingAlgorithm::editDistance(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength) { +int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength, + const int *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); } @@ -1109,16 +999,15 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before, // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. /* static */ -float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength, - const int score) { +float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength, + const int *after, const int afterLength, const int score) { if (0 == beforeLength || 0 == afterLength) { return 0; } const int distance = editDistance(before, beforeLength, after, afterLength); int spaceCount = 0; for (int i = 0; i < afterLength; ++i) { - if (after[i] == CODE_SPACE) { + if (after[i] == KEYCODE_SPACE) { ++spaceCount; } } diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index f016d5453..912cd838e 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -56,7 +56,8 @@ class Correction { // No need to initialize it explicitly here. } - virtual ~Correction() {} + // Non virtual inline destructor -- never inherit this class + ~Correction() {} void resetCorrection(); void initCorrection( const ProximityInfo *pi, const int inputSize, const int maxWordLength); @@ -78,14 +79,13 @@ class Correction { return ++mTotalTraverseCount; } - int getFreqForSplitMultipleWords( - const int *freqArray, const int *wordLengthArray, const int wordCount, - const bool isSpaceProximity, const unsigned short *word); - int getFinalProbability(const int probability, unsigned short **word, int *wordLength); - int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputSize); + int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, + const int wordCount, const bool isSpaceProximity, const int *word); + int getFinalProbability(const int probability, int **word, int *wordLength); + int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, + const int inputSize); - CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); + CorrectionType processCharAndCalcState(const int c, const bool isTerminal); ///////////////////////// // Tree helper methods @@ -110,29 +110,28 @@ class Correction { const int inputSize); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction *correction, const bool isSpaceProximity, - const unsigned short *word); - static float calcNormalizedScore(const unsigned short *before, const int beforeLength, - const unsigned short *after, const int afterLength, const int score); - static int editDistance(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength); + const int *word); + static float calcNormalizedScore(const int *before, const int beforeLength, + const int *after, const int afterLength, const int score); + static int editDistance(const int *before, const int beforeLength, const int *after, + const int afterLength); private: - static const int CODE_SPACE = ' '; static const int MAX_INITIAL_SCORE = 255; }; // proximity info state - void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, + void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes, const int inputSize, const int *xCoordinates, const int *yCoordinates) { mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); } - const unsigned short *getPrimaryInputWord() const { + const int *getPrimaryInputWord() const { return mProximityInfoState.getPrimaryInputWord(); } - unsigned short getPrimaryCharAt(const int index) const { - return mProximityInfoState.getPrimaryCharAt(index); + int getPrimaryCodePointAt(const int index) const { + return mProximityInfoState.getPrimaryCodePointAt(index); } private: @@ -147,7 +146,7 @@ class Correction { } static const int TWO_31ST_DIV_2 = S_INT_MAX / 2; - inline static void multiplyIntCapped(const int multiplier, int *base) { + AK_FORCE_INLINE static void multiplyIntCapped(const int multiplier, int *base) { const int temp = *base; if (temp != S_INT_MAX) { // Branch if multiplier == 2 for the optimization @@ -170,7 +169,7 @@ class Correction { } } - inline static int powerIntCapped(const int base, const int n) { + AK_FORCE_INLINE static int powerIntCapped(const int base, const int n) { if (n <= 0) return 1; if (base == 2) { return n < 31 ? 1 << n : S_INT_MAX; @@ -181,7 +180,7 @@ class Correction { } } - inline static void multiplyRate(const int rate, int *freq) { + AK_FORCE_INLINE static void multiplyRate(const int rate, int *freq) { if (*freq != S_INT_MAX) { if (*freq > 1000000) { *freq /= 100; @@ -215,13 +214,13 @@ class Correction { inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); - inline bool isSingleQuote(const unsigned short c); - inline CorrectionType processSkipChar( - const int32_t c, const bool isTerminal, const bool inputIndexIncremented); + inline bool isSingleQuote(const int c); + inline CorrectionType processSkipChar(const int c, const bool isTerminal, + const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); - inline void addCharToCurrentWord(const int32_t c); - inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputSize); + inline void addCharToCurrentWord(const int c); + inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength, + const int inputSize); static const int TYPED_LETTER_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2; @@ -241,7 +240,7 @@ class Correction { uint8_t mTotalTraverseCount; // The following arrays are state buffer. - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH_INTERNAL]; int mDistances[MAX_WORD_LENGTH_INTERNAL]; // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. @@ -275,5 +274,109 @@ class Correction { bool mSkipping; ProximityInfoState mProximityInfoState; }; + +inline void Correction::incrementInputIndex() { + ++mInputIndex; +} + +AK_FORCE_INLINE void Correction::incrementOutputIndex() { + ++mOutputIndex; + mCorrectionStates[mOutputIndex].mParentIndex = mCorrectionStates[mOutputIndex - 1].mParentIndex; + mCorrectionStates[mOutputIndex].mChildCount = mCorrectionStates[mOutputIndex - 1].mChildCount; + mCorrectionStates[mOutputIndex].mSiblingPos = mCorrectionStates[mOutputIndex - 1].mSiblingPos; + mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex; + mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes; + + mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount; + mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount; + mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount; + mCorrectionStates[mOutputIndex].mExcessiveCount = mExcessiveCount; + mCorrectionStates[mOutputIndex].mSkippedCount = mSkippedCount; + + mCorrectionStates[mOutputIndex].mSkipPos = mSkipPos; + mCorrectionStates[mOutputIndex].mTransposedPos = mTransposedPos; + mCorrectionStates[mOutputIndex].mExcessivePos = mExcessivePos; + + mCorrectionStates[mOutputIndex].mLastCharExceeded = mLastCharExceeded; + + mCorrectionStates[mOutputIndex].mMatching = mMatching; + mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; + mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; + mCorrectionStates[mOutputIndex].mTransposing = mTransposing; + mCorrectionStates[mOutputIndex].mExceeding = mExceeding; + mCorrectionStates[mOutputIndex].mSkipping = mSkipping; +} + +inline void Correction::startToTraverseAllNodes() { + mNeedsToTraverseAllNodes = true; +} + +inline bool Correction::isSingleQuote(const int c) { + const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex); + return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE); +} + +AK_FORCE_INLINE Correction::CorrectionType Correction::processSkipChar(const int c, + const bool isTerminal, const bool inputIndexIncremented) { + addCharToCurrentWord(c); + mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); + mTerminalOutputIndex = mOutputIndex; + if (mNeedsToTraverseAllNodes && isTerminal) { + incrementOutputIndex(); + return TRAVERSE_ALL_ON_TERMINAL; + } else { + incrementOutputIndex(); + return TRAVERSE_ALL_NOT_ON_TERMINAL; + } +} + +inline Correction::CorrectionType Correction::processUnrelatedCorrectionType() { + // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType + mTerminalInputIndex = mInputIndex; + mTerminalOutputIndex = mOutputIndex; + return UNRELATED; +} + +AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, const int *input, + const int inputSize, const int *output, const int outputLength) { + // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. + // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. + // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, + // and calculate dp[ouputLength][0] ... dp[outputLength][inputSize]. + int *const current = editDistanceTable + outputLength * (inputSize + 1); + const int *const prev = editDistanceTable + (outputLength - 1) * (inputSize + 1); + const int *const prevprev = + outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; + current[0] = outputLength; + const int co = toBaseLowerCase(output[outputLength - 1]); + const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; + for (int i = 1; i <= inputSize; ++i) { + const int ci = toBaseLowerCase(input[i - 1]); + const uint16_t cost = (ci == co) ? 0 : 1; + current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); + if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { + current[i] = min(current[i], prevprev[i - 2] + 1); + } + } +} + +AK_FORCE_INLINE void Correction::addCharToCurrentWord(const int c) { + mWord[mOutputIndex] = c; + const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); + calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord, + mOutputIndex + 1); +} + +inline int Correction::getFinalProbabilityInternal(const int probability, int **word, + int *wordLength, const int inputSize) { + const int outputIndex = mTerminalOutputIndex; + const int inputIndex = mTerminalInputIndex; + *wordLength = outputIndex + 1; + *word = mWord; + int finalProbability= Correction::RankingAlgorithm::calculateFinalProbability( + inputIndex, outputIndex, probability, mEditDistanceTable, this, inputSize); + return finalProbability; +} + } // namespace latinime #endif // LATINIME_CORRECTION_H diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index ea0f0ef70..40bc958d1 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -30,17 +30,15 @@ #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) do { \ - shortArrayToCharArray(input, length, output); } while (0) +#define INTS_TO_CHARS(input, length, output) do { \ + intArrayToCharArray(input, length, output); } while (0) -static inline void dumpWordInfo(const unsigned short *word, const int length, - const int rank, const int frequency) { +static inline void dumpWordInfo(const int *word, const int length, const int rank, + const int frequency) { static char charBuf[50]; int i = 0; for (; i < length; ++i) { - const unsigned short c = word[i]; + const int c = word[i]; if (c == 0) { break; } @@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length, } } -static inline void dumpResult( - const unsigned short *outWords, const int *frequencies, const int maxWordCounts, +static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts, const int maxWordLength) { AKLOGI("--- DUMP RESULT ---------"); for (int i = 0; i < maxWordCounts; ++i) { @@ -63,11 +60,11 @@ static inline void dumpResult( AKLOGI("-------------------------"); } -static inline void dumpWord(const unsigned short *word, const int length) { +static inline void dumpWord(const int *word, const int length) { static char charBuf[50]; int i = 0; for (; i < length; ++i) { - const unsigned short c = word[i]; + const int c = word[i]; if (c == 0) { break; } @@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) { } } -static inline void dumpWordInt(const int *word, const int length) { - static char charBuf[50]; - - for (int i = 0; i < length; ++i) { - charBuf[i] = word[i]; - } - charBuf[length] = 0; - AKLOGI("i[ %s ]", charBuf); -} - -// TODO: Change this to intArrayToCharArray -static inline void shortArrayToCharArray( - const unsigned short *input, const int length, char *output) { +static inline void intArrayToCharArray(const int *input, const int length, char *output) { int i = 0; - for (;i < length; ++i) { - const unsigned short c = input[i]; + for (; i < length; ++i) { + const int c = input[i]; if (c == 0) { break; } @@ -137,11 +122,9 @@ static inline void showStackTrace() { #define AKLOGI(fmt, ...) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_WORD(word, length) -#define DUMP_WORD_INT(word, length) #define ASSERT(success) #define SHOW_STACK_TRACE -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) +#define INTS_TO_CHARS(input, length, output) #endif #ifdef FLAG_DO_PROFILE @@ -219,6 +202,8 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_SAMPLING_POINTS true +#define DEBUG_POINTS_PROBABILITY true #ifdef FLAG_FULL_DBG #define DEBUG_GEO_FULL true @@ -239,6 +224,8 @@ static inline void prof_out(void) { #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false +#define DEBUG_SAMPLING_POINTS false +#define DEBUG_POINTS_PROBABILITY false #define DEBUG_GEO_FULL false @@ -282,6 +269,8 @@ static inline void prof_out(void) { #define NOT_A_PROBABILITY (-1) #define KEYCODE_SPACE ' ' +#define KEYCODE_SINGLE_QUOTE '\'' +#define KEYCODE_HYPHEN_MINUS '-' #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true @@ -344,8 +333,8 @@ static inline void prof_out(void) { #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 -#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35 -#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185 +#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35f +#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185f /* heuristic... This should be changed if we change the unit of the frequency. */ #define SUPPRESS_SHORT_MULTIPLE_WORDS_THRESHOLD_FREQ (MAX_FREQ * 58 / 100) @@ -392,6 +381,14 @@ static inline void prof_out(void) { template<typename T> inline T min(T a, T b) { return a < b ? a : b; } template<typename T> inline T max(T a, T b) { return a > b ? a : b; } +#define NELEMS(x) (sizeof(x) / sizeof((x)[0])) + +#ifdef __GNUC__ +#define AK_FORCE_INLINE __attribute__((always_inline)) __inline__ +#else // __GNUC__ +#define AK_FORCE_INLINE inline +#endif // __GNUC__ + // The ratio of neutral area radius to sweet spot radius. #define NEUTRAL_AREA_RADIUS_RATIO 1.3f diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h index 292382487..3fe3d5b74 100644 --- a/native/jni/src/dic_traverse_wrapper.h +++ b/native/jni/src/dic_traverse_wrapper.h @@ -62,6 +62,5 @@ class DicTraverseWrapper { void *, const Dictionary *const, const int *, const int); static void (*sDicTraverseSessionReleaseMethod)(void *); }; -int register_DicTraverseSession(JNIEnv *env); } // namespace latinime #endif // LATINIME_DIC_TRAVERSE_WRAPPER_H diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 2fbe83e86..5fbe0461b 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -30,13 +30,12 @@ namespace latinime { // TODO: Change the type of all keyCodes to uint32_t Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, - int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, - int maxPredictions) + int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions) : mDict(static_cast<unsigned char *>(dict)), mOffsetDict((static_cast<unsigned char *>(dict)) + BinaryFormat::getHeaderSize(mDict)), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), - mUnigramDictionary(new UnigramDictionary(mOffsetDict, typedLetterMultiplier, - fullWordMultiplier, maxWordLength, maxWords, BinaryFormat::getFlags(mDict))), + mUnigramDictionary(new UnigramDictionary(mOffsetDict, fullWordMultiplier, maxWordLength, + maxWords, BinaryFormat::getFlags(mDict))), mBigramDictionary(new BigramDictionary(mOffsetDict, maxWordLength, maxPredictions)), mGestureDecoder(new GestureDecoderWrapper(maxWordLength, maxWords)) { if (DEBUG_DICT) { @@ -55,11 +54,10 @@ Dictionary::~Dictionary() { } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, - int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, - int *codes, int codesSize, int *prevWordChars, - int prevWordLength, int commitPoint, bool isGesture, - bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) const { + int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes, + int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, + bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, + int *outputTypes) const { int result = 0; if (isGesture) { DicTraverseWrapper::initDicTraverseSession( @@ -84,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi } int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const { + int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, outputTypes); diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index a1358890d..2ca00ab63 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -41,17 +41,17 @@ class Dictionary { const static int KIND_SHORTCUT = 7; // A shortcut const static int KIND_PREDICTION = 8; // A prediction (== a suggestion with no input) - Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int typedLetterMultipler, - int fullWordMultiplier, int maxWordLength, int maxWords, int maxPredictions); + Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust, int fullWordMultiplier, + int maxWordLength, int maxWords, int maxPredictions); int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, - bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) const; + bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, + int *outputTypes) const; - int getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const; + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords, + int *frequencies, int *outputTypes) const; int getFrequency(const int32_t *word, int length) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; @@ -68,7 +68,7 @@ class Dictionary { // public static utility methods // static inline methods should be defined in the header file - static int wideStrLen(unsigned short *str); + static int wideStrLen(int *str); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); @@ -88,7 +88,7 @@ class Dictionary { // public static utility methods // static inline methods should be defined in the header file -inline int Dictionary::wideStrLen(unsigned short *str) { +inline int Dictionary::wideStrLen(int *str) { if (!str) return 0; int length = 0; while (*str) { diff --git a/native/jni/src/geometry_utils.h b/native/jni/src/geometry_utils.h index 31359e19d..38b91cc50 100644 --- a/native/jni/src/geometry_utils.h +++ b/native/jni/src/geometry_utils.h @@ -19,38 +19,46 @@ #include <cmath> +#include "defines.h" + #define DEBUG_DECODER false #define M_PI_F 3.14159265f #define ROUND_FLOAT_10000(f) ((f) < 1000.0f && (f) > 0.001f) \ ? (floorf((f) * 10000.0f) / 10000.0f) : (f) -#define SQUARE_FLOAT(x) ((x) * (x)) namespace latinime { -static inline float getSquaredDistanceFloat(float x1, float y1, float x2, float y2) { - const float deltaX = x1 - x2; - const float deltaY = y1 - y2; - return SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY); +static inline float SQUARE_FLOAT(const float x) { return x * x; } + +static inline float getSquaredDistanceFloat(const float x1, const float y1, const float x2, + const float y2) { + return SQUARE_FLOAT(x1 - x2) + SQUARE_FLOAT(y1 - y2); +} + +static inline float getNormalizedSquaredDistanceFloat(const float x1, const float y1, + const float x2, const float y2, const float scale) { + return getSquaredDistanceFloat(x1, y1, x2, y2) / SQUARE_FLOAT(scale); } -static inline float getDistanceFloat(float x1, float y1, float x2, float y2) { +static inline float getDistanceFloat(const float x1, const float y1, const float x2, + const float y2) { return hypotf(x1 - x2, y1 - y2); } -static inline int getDistanceInt(int x1, int y1, int x2, int y2) { +static AK_FORCE_INLINE int getDistanceInt(const int x1, const int y1, const int x2, const int y2) { return static_cast<int>(getDistanceFloat(static_cast<float>(x1), static_cast<float>(y1), static_cast<float>(x2), static_cast<float>(y2))); } -static inline float getAngle(int x1, int y1, int x2, int y2) { +static AK_FORCE_INLINE float getAngle(const int x1, const int y1, const int x2, const int y2) { const int dx = x1 - x2; const int dy = y1 - y2; if (dx == 0 && dy == 0) return 0; return atan2f(static_cast<float>(dy), static_cast<float>(dx)); } -static inline float getAngleDiff(float a1, float a2) { +static AK_FORCE_INLINE float getAngleDiff(const float a1, const float a2) { const float deltaA = fabsf(a1 - a2); const float diff = ROUND_FLOAT_10000(deltaA); if (diff > M_PI_F) { @@ -60,8 +68,8 @@ static inline float getAngleDiff(float a1, float a2) { return diff; } -static inline float pointToLineSegSquaredDistanceFloat( - float x, float y, float x1, float y1, float x2, float y2, bool extend) { +static inline float pointToLineSegSquaredDistanceFloat(const float x, const float y, const float x1, + const float y1, const float x2, const float y2, const bool extend) { const float ray1x = x - x1; const float ray1y = y - y1; const float ray2x = x2 - x1; @@ -85,5 +93,25 @@ static inline float pointToLineSegSquaredDistanceFloat( } return getSquaredDistanceFloat(x, y, projectionX, projectionY); } + +// Normal distribution N(u, sigma^2). +struct NormalDistribution { + NormalDistribution(const float u, const float sigma) + : mU(u), mSigma(sigma), + mPreComputedNonExpPart(1.0f / sqrtf(2.0f * M_PI_F * SQUARE_FLOAT(sigma))), + mPreComputedExponentPart(-1.0f / (2.0f * SQUARE_FLOAT(sigma))) {} + + float getProbabilityDensity(const float x) const { + const float shiftedX = x - mU; + return mPreComputedNonExpPart * expf(mPreComputedExponentPart * SQUARE_FLOAT(shiftedX)); + } + +private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NormalDistribution); + float mU; // mean value + float mSigma; // standard deviation + float mPreComputedNonExpPart; // = 1 / sqrt(2 * PI * sigma^2) + float mPreComputedExponentPart; // = -1 / (2 * sigma^2) +}; // struct NormalDistribution } // namespace latinime #endif // LATINIME_GEOMETRY_UTILS_H diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.cpp b/native/jni/src/gesture/gesture_decoder_wrapper.cpp index afbe0c5c3..20ad4a58c 100644 --- a/native/jni/src/gesture/gesture_decoder_wrapper.cpp +++ b/native/jni/src/gesture/gesture_decoder_wrapper.cpp @@ -19,4 +19,8 @@ namespace latinime { IncrementalDecoderInterface * (*GestureDecoderWrapper::sGestureDecoderFactoryMethod)(int, int) = 0; + + GestureDecoderWrapper::~GestureDecoderWrapper() { + delete mIncrementalDecoderInterface; + } } // namespace latinime diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h index 92e1ded49..5b056b647 100644 --- a/native/jni/src/gesture/gesture_decoder_wrapper.h +++ b/native/jni/src/gesture/gesture_decoder_wrapper.h @@ -33,20 +33,17 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface { : mIncrementalDecoderInterface(getGestureDecoderInstance(maxWordLength, maxWords)) { } - virtual ~GestureDecoderWrapper() { - delete mIncrementalDecoderInterface; - } + virtual ~GestureDecoderWrapper(); int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, - int *outputTypes) const { + int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, + int *frequencies, int *outputIndices, int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } - return mIncrementalDecoderInterface->getSuggestions( - pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, - inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); + return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs, + inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } static void setGestureDecoderFactoryMethod( diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h index d1395aab9..e41513dbc 100644 --- a/native/jni/src/gesture/incremental_decoder_interface.h +++ b/native/jni/src/gesture/incremental_decoder_interface.h @@ -28,10 +28,9 @@ class ProximityInfo; class IncrementalDecoderInterface { public: - virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, - int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, - int inputSize, int commitPoint, unsigned short *outWords, int *frequencies, - int *outputIndices, int *outputTypes) const = 0; + virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, + int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0; IncrementalDecoderInterface() { }; virtual ~IncrementalDecoderInterface() { }; private: diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.cpp b/native/jni/src/gesture/incremental_decoder_wrapper.cpp index 8fcda6c9e..f6e45623a 100644 --- a/native/jni/src/gesture/incremental_decoder_wrapper.cpp +++ b/native/jni/src/gesture/incremental_decoder_wrapper.cpp @@ -19,4 +19,8 @@ namespace latinime { IncrementalDecoderInterface * (*IncrementalDecoderWrapper::sIncrementalDecoderFactoryMethod)(int, int) = 0; + + IncrementalDecoderWrapper::~IncrementalDecoderWrapper() { + delete mIncrementalDecoderInterface; + } } // namespace latinime diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h index da7afdb8a..7d16560ef 100644 --- a/native/jni/src/gesture/incremental_decoder_wrapper.h +++ b/native/jni/src/gesture/incremental_decoder_wrapper.h @@ -33,20 +33,17 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface { : mIncrementalDecoderInterface(getIncrementalDecoderInstance(maxWordLength, maxWords)) { } - virtual ~IncrementalDecoderWrapper() { - delete mIncrementalDecoderInterface; - } + virtual ~IncrementalDecoderWrapper(); int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, - int *outputTypes) const { + int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, + int *frequencies, int *outputIndices, int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } - return mIncrementalDecoderInterface->getSuggestions( - pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, - inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); + return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs, + inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } static void setIncrementalDecoderFactoryMethod( diff --git a/native/jni/src/proximity_info.cpp b/native/jni/src/proximity_info.cpp index fde93b5a9..84db7c196 100644 --- a/native/jni/src/proximity_info.cpp +++ b/native/jni/src/proximity_info.cpp @@ -31,21 +31,21 @@ namespace latinime { /* static */ const float ProximityInfo::NOT_A_DISTANCE_FLOAT = -1.0f; -static inline void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, jsize len, - jint *buffer) { +static AK_FORCE_INLINE void safeGetOrFillZeroIntArrayRegion(JNIEnv *env, jintArray jArray, + jsize len, jint *buffer) { if (jArray && buffer) { env->GetIntArrayRegion(jArray, 0, len, buffer); } else if (buffer) { - memset(buffer, 0, len * sizeof(jint)); + memset(buffer, 0, len * sizeof(buffer[0])); } } -static inline void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray, jsize len, - jfloat *buffer) { +static AK_FORCE_INLINE void safeGetOrFillZeroFloatArrayRegion(JNIEnv *env, jfloatArray jArray, + jsize len, jfloat *buffer) { if (jArray && buffer) { env->GetFloatArrayRegion(jArray, 0, len, buffer); } else if (buffer) { - memset(buffer, 0, len * sizeof(jfloat)); + memset(buffer, 0, len * sizeof(buffer[0])); } } @@ -127,13 +127,6 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { return false; } -static inline float getNormalizedSquaredDistanceFloat(float x1, float y1, float x2, float y2, - float scale) { - const float deltaX = x1 - x2; - const float deltaY = y1 - y2; - return (SQUARE_FLOAT(deltaX) + SQUARE_FLOAT(deltaY)) / SQUARE_FLOAT(scale); -} - float ProximityInfo::getNormalizedSquaredDistanceFromCenterFloatG( const int keyId, const int x, const int y) const { const static float verticalSweetSpotScaleForGeometric = 1.1f; @@ -239,6 +232,9 @@ int ProximityInfo::getKeyIndexOf(const int c) const { // We do not have the coordinate data return NOT_AN_INDEX; } + if (c == NOT_A_CODE_POINT) { + return NOT_AN_INDEX; + } const int lowerCode = static_cast<int>(toLowerCase(c)); hash_map_compat<int, int>::const_iterator mapPos = mCodeToKeyMap.find(lowerCode); if (mapPos != mCodeToKeyMap.end()) { @@ -296,9 +292,7 @@ int ProximityInfo::getKeyCenterYOfKeyIdG(int keyId) const { return 0; } -int ProximityInfo::getKeyKeyDistanceG(int key0, int key1) const { - const int keyId0 = getKeyIndexOf(key0); - const int keyId1 = getKeyIndexOf(key1); +int ProximityInfo::getKeyKeyDistanceG(const int keyId0, const int keyId1) const { if (keyId0 >= 0 && keyId1 >= 0) { return mKeyKeyDistancesG[keyId0][keyId1]; } diff --git a/native/jni/src/proximity_info.h b/native/jni/src/proximity_info.h index 70942aa19..7ee15d578 100644 --- a/native/jni/src/proximity_info.h +++ b/native/jni/src/proximity_info.h @@ -109,7 +109,7 @@ class ProximityInfo { int getKeyCenterYOfCodePointG(int charCode) const; int getKeyCenterXOfKeyIdG(int keyId) const; int getKeyCenterYOfKeyIdG(int keyId) const; - int getKeyKeyDistanceG(int key0, int key1) const; + int getKeyKeyDistanceG(int keyId0, int keyId1) const; private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProximityInfo); diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 392ec8194..db79bb616 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -15,6 +15,7 @@ */ #include <cstring> // for memset() +#include <sstream> // for debug prints #include <stdint.h> #define LOG_TAG "LatinIME: proximity_info_state.cpp" @@ -33,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f; const int ProximityInfoState::NOT_A_CODE = -1; void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, - const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, + const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric) { @@ -62,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi // - mNormalizedSquaredDistances // TODO: Merge for (int i = 0; i < inputSize; ++i) { - const int32_t primaryKey = inputCodes[i]; + const int primaryKey = inputCodes[i]; const int x = xCoordinates[i]; const int y = yCoordinates[i]; int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; @@ -104,7 +105,10 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi mLengthCache.clear(); mDistanceCache.clear(); mNearKeysVector.clear(); + mSearchKeysVector.clear(); mRelativeSpeeds.clear(); + mCharProbabilities.clear(); + mDirections.clear(); } if (DEBUG_GEO_FULL) { AKLOGI("Init ProximityInfoState: reused points = %d, last input size = %d", @@ -130,6 +134,10 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi NearKeysDistanceMap *currentNearKeysDistances = &nearKeysDistances[0]; NearKeysDistanceMap *prevNearKeysDistances = &nearKeysDistances[1]; NearKeysDistanceMap *prevPrevNearKeysDistances = &nearKeysDistances[2]; + // "sumAngle" is accumulated by each angle of input points. And when "sumAngle" exceeds + // the threshold we save that point, reset sumAngle. This aims to keep the figure of + // the curve. + float sumAngle = 0.0f; for (int i = pushTouchPointStartIndex; i <= lastInputIndex; ++i) { // Assuming pointerId == 0 if pointerIds is null. @@ -138,13 +146,22 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid); } if (pointerId == pid) { - const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i); + const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i); const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; const int time = times ? times[i] : -1; + + if (i > 1) { + const float prevAngle = getAngle(xCoordinates[i - 2], yCoordinates[i - 2], + xCoordinates[i - 1], yCoordinates[i - 1]); + const float currentAngle = + getAngle(xCoordinates[i - 1], yCoordinates[i - 1], x, y); + sumAngle += getAngleDiff(prevAngle, currentAngle); + } + if (pushTouchPoint(i, c, x, y, time, isGeometric /* do sampling */, - i == lastInputIndex, currentNearKeysDistances, prevNearKeysDistances, - prevPrevNearKeysDistances)) { + i == lastInputIndex, sumAngle, currentNearKeysDistances, + prevNearKeysDistances, prevPrevNearKeysDistances)) { // Previous point information was popped. NearKeysDistanceMap *tmp = prevNearKeysDistances; prevNearKeysDistances = currentNearKeysDistances; @@ -154,6 +171,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi prevPrevNearKeysDistances = prevNearKeysDistances; prevNearKeysDistances = currentNearKeysDistances; currentNearKeysDistances = tmp; + sumAngle = 0.0f; } } } @@ -161,43 +179,68 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi } if (mInputSize > 0 && isGeometric) { - int sumDuration = mTimes.back() - mTimes.front(); - int sumLength = mLengthCache.back() - mLengthCache.front(); - float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration); + // Relative speed calculation. + const int sumDuration = mTimes.back() - mTimes.front(); + const int sumLength = mLengthCache.back() - mLengthCache.front(); + const float averageSpeed = static_cast<float>(sumLength) / static_cast<float>(sumDuration); mRelativeSpeeds.resize(mInputSize); for (int i = lastSavedInputSize; i < mInputSize; ++i) { const int index = mInputIndice[i]; int length = 0; int duration = 0; - if (index == 0 && index < inputSize - 1) { - length = getDistanceInt(xCoordinates[index], yCoordinates[index], - xCoordinates[index + 1], yCoordinates[index + 1]); - duration = times[index + 1] - times[index]; - } else if (index == inputSize - 1 && index > 0) { - length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1], - xCoordinates[index], yCoordinates[index]); - duration = times[index] - times[index - 1]; - } else if (0 < index && index < inputSize - 1) { - length = getDistanceInt(xCoordinates[index - 1], yCoordinates[index - 1], - xCoordinates[index], yCoordinates[index]) - + getDistanceInt(xCoordinates[index], yCoordinates[index], - xCoordinates[index + 1], yCoordinates[index + 1]); - duration = times[index + 1] - times[index - 1]; + + // Calculate velocity by using distances and durations of + // NUM_POINTS_FOR_SPEED_CALCULATION points for both forward and backward. + static const int NUM_POINTS_FOR_SPEED_CALCULATION = 2; + for (int j = index; j < min(inputSize - 1, index + NUM_POINTS_FOR_SPEED_CALCULATION); + ++j) { + if (i < mInputSize - 1 && j >= mInputIndice[i + 1]) { + break; + } + length += getDistanceInt(xCoordinates[j], yCoordinates[j], + xCoordinates[j + 1], yCoordinates[j + 1]); + duration += times[j + 1] - times[j]; + } + for (int j = index - 1; j >= max(0, index - NUM_POINTS_FOR_SPEED_CALCULATION); --j) { + if (i > 0 && j < mInputIndice[i - 1]) { + break; + } + length += getDistanceInt(xCoordinates[j], yCoordinates[j], + xCoordinates[j + 1], yCoordinates[j + 1]); + duration += times[j + 1] - times[j]; + } + if (duration == 0 || sumDuration == 0) { + // Cannot calculate speed; thus, it gives an average value (1.0); + mRelativeSpeeds[i] = 1.0f; } else { - length = 0; - duration = 1; + const float speed = static_cast<float>(length) / static_cast<float>(duration); + mRelativeSpeeds[i] = speed / averageSpeed; } - const float speed = static_cast<float>(length) / static_cast<float>(duration); - mRelativeSpeeds[i] = speed / averageSpeed; + } + + // Direction calculation. + mDirections.resize(mInputSize - 1); + for (int i = max(0, lastSavedInputSize - 1); i < mInputSize - 1; ++i) { + mDirections[i] = getDirection(i, i + 1); + } + + } + + if (DEBUG_GEO_FULL) { + for (int i = 0; i < mInputSize; ++i) { + AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i], + mTimes[i]); } } if (mInputSize > 0) { const int keyCount = mProximityInfo->getKeyCount(); mNearKeysVector.resize(mInputSize); + mSearchKeysVector.resize(mInputSize); mDistanceCache.resize(mInputSize * keyCount); for (int i = lastSavedInputSize; i < mInputSize; ++i) { mNearKeysVector[i].reset(); + mSearchKeysVector[i].reset(); static const float NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD = 4.0f; for (int k = 0; k < keyCount; ++k) { const int index = i * keyCount + k; @@ -207,29 +250,53 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi mProximityInfo->getNormalizedSquaredDistanceFromCenterFloatG(k, x, y); mDistanceCache[index] = normalizedSquaredDistance; if (normalizedSquaredDistance < NEAR_KEY_NORMALIZED_SQUARED_THRESHOLD) { - mNearKeysVector[i].set(k, 1); + mNearKeysVector[i][k] = true; } } } + if (isGeometric) { + // updates probabilities of skipping or mapping each key for all points. + updateAlignPointProbabilities(lastSavedInputSize); - static const float READ_FORWORD_LENGTH_SCALE = 0.95f; - const int readForwordLength = static_cast<int>( - hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight()) - * READ_FORWORD_LENGTH_SCALE); - for (int i = 0; i < mInputSize; ++i) { - if (DEBUG_GEO_FULL) { - AKLOGI("Sampled(%d): x = %d, y = %d, time = %d", i, mInputXs[i], mInputYs[i], - mTimes[i]); - } - for (int j = max(i + 1, lastSavedInputSize); j < mInputSize; ++j) { - if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) { - break; + static const float READ_FORWORD_LENGTH_SCALE = 0.95f; + const int readForwordLength = static_cast<int>( + hypotf(mProximityInfo->getKeyboardWidth(), mProximityInfo->getKeyboardHeight()) + * READ_FORWORD_LENGTH_SCALE); + for (int i = 0; i < mInputSize; ++i) { + if (i >= lastSavedInputSize) { + mSearchKeysVector[i].reset(); + } + for (int j = max(i, lastSavedInputSize); j < mInputSize; ++j) { + if (mLengthCache[j] - mLengthCache[i] >= readForwordLength) { + break; + } + mSearchKeysVector[i] |= mNearKeysVector[j]; } - mNearKeysVector[i] |= mNearKeysVector[j]; } } } + if (DEBUG_SAMPLING_POINTS) { + std::stringstream originalX, originalY, sampledX, sampledY; + for (int i = 0; i < inputSize; ++i) { + originalX << xCoordinates[i]; + originalY << yCoordinates[i]; + if (i != inputSize - 1) { + originalX << ";"; + originalY << ";"; + } + } + for (int i = 0; i < mInputSize; ++i) { + sampledX << mInputXs[i]; + sampledY << mInputYs[i]; + if (i != mInputSize - 1) { + sampledX << ";"; + sampledY << ";"; + } + } + AKLOGI("\n%s, %s,\n%s, %s,\n", originalX.str().c_str(), originalY.str().c_str(), + sampledX.str().c_str(), sampledY.str().c_str()); + } // end /////////////////////// @@ -239,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi && xCoordinates && yCoordinates; if (!isGeometric && pointerId == 0) { for (int i = 0; i < inputSize; ++i) { - mPrimaryInputWord[i] = getPrimaryCharAt(i); + mPrimaryInputWord[i] = getPrimaryCodePointAt(i); } for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) { - const int *proximityChars = getProximityCharsAt(i); - const int primaryKey = proximityChars[0]; + const int *proximityCodePoints = getProximityCodePointsAt(i); + const int primaryKey = proximityCodePoints[0]; const int x = xCoordinates[i]; const int y = yCoordinates[i]; if (DEBUG_PROXIMITY_CHARS) { @@ -252,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi a += 0; AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); } - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { - const int currentChar = proximityChars[j]; + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0; + ++j) { + const int currentCodePoint = proximityCodePoints[j]; const float squaredDistance = hasInputCoordinates() ? calculateNormalizedSquaredDistance( - mProximityInfo->getKeyIndexOf(currentChar), i) : + mProximityInfo->getKeyIndexOf(currentCodePoint), i) : NOT_A_DISTANCE_FLOAT; if (squaredDistance >= 0.0f) { mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = @@ -267,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; } if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- Proximity (%d) = %c", j, currentChar); + AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint); } } } @@ -294,7 +362,7 @@ bool ProximityInfoState::checkAndReturnIsContinuationPossible(const int inputSiz // the given point and the nearest key position. float ProximityInfoState::updateNearKeysDistances(const int x, const int y, NearKeysDistanceMap *const currentNearKeysDistances) { - static const float NEAR_KEY_THRESHOLD = 4.0f; + static const float NEAR_KEY_THRESHOLD = 2.0f; currentNearKeysDistances->clear(); const int keyCount = mProximityInfo->getKeyCount(); @@ -332,64 +400,49 @@ bool ProximityInfoState::isPrevLocalMin(const NearKeysDistanceMap *const current // Calculating a point score that indicates usefulness of the point. float ProximityInfoState::getPointScore( const int x, const int y, const int time, const bool lastPoint, const float nearest, - const NearKeysDistanceMap *const currentNearKeysDistances, + const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances) const { static const int DISTANCE_BASE_SCALE = 100; - static const int SAVE_DISTANCE_SCALE = 200; - static const int SKIP_DISTANCE_SCALE = 25; - static const int CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE = 40; - static const int STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE = 50; - static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 27; - static const float SAVE_DISTANCE_SCORE = 2.0f; - static const float SKIP_DISTANCE_SCORE = -1.0f; - static const float CHECK_LOCALMIN_DISTANCE_SCORE = -1.0f; - static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F / 36.0f; - static const float STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD = 0.5f; - static const float STRAIGHT_SKIP_SCORE = -1.0f; - static const float CORNER_ANGLE_THRESHOLD = M_PI_F / 2.0f; + static const float NEAR_KEY_THRESHOLD = 0.6f; + static const int CORNER_CHECK_DISTANCE_THRESHOLD_SCALE = 25; + static const float NOT_LOCALMIN_DISTANCE_SCORE = -1.0f; + static const float LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE = 1.0f; + static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 2.0f / 3.0f; + static const float CORNER_SUM_ANGLE_THRESHOLD = M_PI_F / 4.0f; static const float CORNER_SCORE = 1.0f; - const std::size_t size = mInputXs.size(); - if (size <= 1) { + const size_t size = mInputXs.size(); + // If there is only one point, add this point. Besides, if the previous point's distance map + // is empty, we re-compute nearby keys distances from the current point. + // Note that the current point is the first point in the incremental input that needs to + // be re-computed. + if (size <= 1 || prevNearKeysDistances->empty()) { return 0.0f; } + const int baseSampleRate = mProximityInfo->getMostCommonKeyWidth(); - const int distNext = getDistanceInt(x, y, mInputXs.back(), mInputYs.back()) - * DISTANCE_BASE_SCALE; const int distPrev = getDistanceInt(mInputXs.back(), mInputYs.back(), mInputXs[size - 2], mInputYs[size - 2]) * DISTANCE_BASE_SCALE; float score = 0.0f; - // Sum of distances - if (distPrev + distNext > baseSampleRate * SAVE_DISTANCE_SCALE) { - score += SAVE_DISTANCE_SCORE; - } - // Distance - if (distPrev < baseSampleRate * SKIP_DISTANCE_SCALE) { - score += SKIP_DISTANCE_SCORE; - } // Location - if (distPrev < baseSampleRate * CHECK_LOCALMIN_DISTANCE_THRESHOLD_SCALE) { - if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances, - prevPrevNearKeysDistances)) { - score += CHECK_LOCALMIN_DISTANCE_SCORE; - } + if (!isPrevLocalMin(currentNearKeysDistances, prevNearKeysDistances, + prevPrevNearKeysDistances)) { + score += NOT_LOCALMIN_DISTANCE_SCORE; + } else if (nearest < NEAR_KEY_THRESHOLD) { + // Promote points nearby keys + score += LOCALMIN_DISTANCE_AND_NEAR_TO_KEY_SCORE; } // Angle const float angle1 = getAngle(x, y, mInputXs.back(), mInputYs.back()); const float angle2 = getAngle(mInputXs.back(), mInputYs.back(), mInputXs[size - 2], mInputYs[size - 2]); const float angleDiff = getAngleDiff(angle1, angle2); - // Skip straight - if (nearest > STRAIGHT_SKIP_NEAREST_DISTANCE_THRESHOLD - && distPrev < baseSampleRate * STRAIGHT_SKIP_DISTANCE_THRESHOLD_SCALE - && angleDiff < STRAIGHT_ANGLE_THRESHOLD) { - score += STRAIGHT_SKIP_SCORE; - } + // Save corner if (distPrev > baseSampleRate * CORNER_CHECK_DISTANCE_THRESHOLD_SCALE - && angleDiff > CORNER_ANGLE_THRESHOLD) { + && (sumAngle > CORNER_SUM_ANGLE_THRESHOLD || angleDiff > CORNER_ANGLE_THRESHOLD)) { score += CORNER_SCORE; } return score; @@ -397,18 +450,18 @@ float ProximityInfoState::getPointScore( // Sampling touch point and pushing information to vectors. // Returning if previous point is popped or not. -bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, - const int time, const bool sample, const bool isLastPoint, +bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, + const int time, const bool sample, const bool isLastPoint, const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances) { - static const float LAST_POINT_SKIP_DISTANCE_SCALE = 0.25f; + static const int LAST_POINT_SKIP_DISTANCE_SCALE = 4; size_t size = mInputXs.size(); bool popped = false; - if (nodeChar < 0 && sample) { + if (nodeCodePoint < 0 && sample) { const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); - const float score = getPointScore(x, y, time, isLastPoint, nearest, + const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle, currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); if (score < 0) { // Pop previous point because it would be useless. @@ -419,42 +472,24 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar popped = false; } // Check if the last point should be skipped. - if (isLastPoint) { - if (size > 0 && getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()) - < mProximityInfo->getMostCommonKeyWidth() * LAST_POINT_SKIP_DISTANCE_SCALE) { + if (isLastPoint && size > 0) { + if (getDistanceInt(x, y, mInputXs.back(), mInputYs.back()) + * LAST_POINT_SKIP_DISTANCE_SCALE < mProximityInfo->getMostCommonKeyWidth()) { + // This point is not used because it's too close to the previous point. if (DEBUG_GEO_FULL) { - AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %f, " - "width = %f", size, x, y, mInputXs.back(), mInputYs.back(), - getDistanceFloat(x, y, mInputXs.back(), mInputYs.back()), + AKLOGI("p0: size = %zd, x = %d, y = %d, lx = %d, ly = %d, dist = %d, " + "width = %d", size, x, y, mInputXs.back(), mInputYs.back(), + getDistanceInt(x, y, mInputXs.back(), mInputYs.back()), mProximityInfo->getMostCommonKeyWidth() - * LAST_POINT_SKIP_DISTANCE_SCALE); + / LAST_POINT_SKIP_DISTANCE_SCALE); } return popped; - } else if (size > 1) { - int minChar = 0; - float minDist = mMaxPointToKeyLength; - for (NearKeysDistanceMap::const_iterator it = currentNearKeysDistances->begin(); - it != currentNearKeysDistances->end(); ++it) { - if (minDist > it->second) { - minChar = it->first; - minDist = it->second; - } - } - NearKeysDistanceMap::const_iterator itPP = - prevNearKeysDistances->find(minChar); - if (itPP != prevNearKeysDistances->end() && minDist > itPP->second) { - if (DEBUG_GEO_FULL) { - AKLOGI("p1: char = %c, minDist = %f, prevNear key minDist = %f", - minChar, itPP->second, minDist); - } - return popped; - } } } } - if (nodeChar >= 0 && (x < 0 || y < 0)) { - const int keyId = mProximityInfo->getKeyIndexOf(nodeChar); + if (nodeCodePoint >= 0 && (x < 0 || y < 0)) { + const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint); if (keyId >= 0) { x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); @@ -503,22 +538,92 @@ int ProximityInfoState::getDuration(const int index) const { return 0; } -float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint, - const float scale) const { +float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int codePoint) const { const int keyId = mProximityInfo->getKeyIndexOf(codePoint); if (keyId != NOT_AN_INDEX) { const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; - return min(mDistanceCache[index] * scale, mMaxPointToKeyLength); + return min(mDistanceCache[index], mMaxPointToKeyLength); } - if (isSkippableChar(codePoint)) { + if (isSkippableCodePoint(codePoint)) { return 0.0f; } // If the char is not a key on the keyboard then return the max length. return MAX_POINT_TO_KEY_LENGTH; } +float ProximityInfoState::getPointToKeyByIdLength(const int inputIndex, const int keyId) const { + if (keyId != NOT_AN_INDEX) { + const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; + return min(mDistanceCache[index], mMaxPointToKeyLength); + } + // If the char is not a key on the keyboard then return the max length. + return static_cast<float>(MAX_POINT_TO_KEY_LENGTH); +} + +// In the following function, c is the current character of the dictionary word currently examined. +// currentChars is an array containing the keys close to the character the user actually typed at +// the same position. We want to see if c is in it: if so, then the word contains at that position +// a character close to what the user typed. +// What the user typed is actually the first character of the array. +// proximityIndex is a pointer to the variable where getMatchedProximityId returns the index of c +// in the proximity chars of the input index. +// Notice : accented characters do not have a proximity list, so they are alone in their list. The +// non-accented version of the character should be considered "close", but not the other keys close +// to the non-accented version. +ProximityType ProximityInfoState::getMatchedProximityId(const int index, const int c, + const bool checkProximityChars, int *proximityIndex) const { + const int *currentCodePoints = getProximityCodePointsAt(index); + const int firstCodePoint = currentCodePoints[0]; + const int baseLowerC = toBaseLowerCase(c); + + // The first char in the array is what user typed. If it matches right away, that means the + // user typed that same char for this pos. + if (firstCodePoint == baseLowerC || firstCodePoint == c) { + return EQUIVALENT_CHAR; + } + + if (!checkProximityChars) return UNRELATED_CHAR; + + // If the non-accented, lowercased version of that first character matches c, then we have a + // non-accented version of the accented character the user typed. Treat it as a close char. + if (toBaseLowerCase(firstCodePoint) == baseLowerC) { + return NEAR_PROXIMITY_CHAR; + } + + // Not an exact nor an accent-alike match: search the list of close keys + int j = 1; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return NEAR_PROXIMITY_CHAR; + } + ++j; + } + if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + ++j; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return ADDITIONAL_PROXIMITY_CHAR; + } + ++j; + } + } + // Was not included, signal this as an unrelated character. + return UNRELATED_CHAR; +} + int ProximityInfoState::getSpaceY() const { - const int keyId = mProximityInfo->getKeyIndexOf(' '); + const int keyId = mProximityInfo->getKeyIndexOf(KEYCODE_SPACE); return mProximityInfo->getKeyCenterYOfKeyIdG(keyId); } @@ -538,8 +643,9 @@ int32_t ProximityInfoState::getAllPossibleChars( return filterSize; } int newFilterSize = filterSize; - for (int j = 0; j < mProximityInfo->getKeyCount(); ++j) { - if (mNearKeysVector[index].test(j)) { + const int keyCount = mProximityInfo->getKeyCount(); + for (int j = 0; j < keyCount; ++j) { + if (mSearchKeysVector[index].test(j)) { const int32_t keyCodePoint = mProximityInfo->getCodePointOf(j); bool insert = true; // TODO: Avoid linear search @@ -557,6 +663,12 @@ int32_t ProximityInfoState::getAllPossibleChars( return newFilterSize; } +bool ProximityInfoState::isKeyInSerchKeysAfterIndex(const int index, const int keyId) const { + ASSERT(keyId >= 0); + ASSERT(index >= 0 && index < mInputSize); + return mSearchKeysVector[index].test(keyId); +} + void ProximityInfoState::popInputData() { mInputXs.pop_back(); mInputYs.pop_back(); @@ -565,4 +677,389 @@ void ProximityInfoState::popInputData() { mInputIndice.pop_back(); } +float ProximityInfoState::getDirection(const int index0, const int index1) const { + if (index0 < 0 || index0 > mInputSize - 1) { + return 0.0f; + } + if (index1 < 0 || index1 > mInputSize - 1) { + return 0.0f; + } + const int x1 = mInputXs[index0]; + const int y1 = mInputYs[index0]; + const int x2 = mInputXs[index1]; + const int y2 = mInputYs[index1]; + return getAngle(x1, y1, x2, y2); +} + +float ProximityInfoState::getPointAngle(const int index) const { + if (index <= 0 || index >= mInputSize - 1) { + return 0.0f; + } + const float previousDirection = getDirection(index - 1, index); + const float nextDirection = getDirection(index, index + 1); + const float directionDiff = getAngleDiff(previousDirection, nextDirection); + return directionDiff; +} + +float ProximityInfoState::getPointsAngle( + const int index0, const int index1, const int index2) const { + if (index0 < 0 || index0 > mInputSize - 1) { + return 0.0f; + } + if (index1 < 0 || index1 > mInputSize - 1) { + return 0.0f; + } + if (index2 < 0 || index2 > mInputSize - 1) { + return 0.0f; + } + const float previousDirection = getDirection(index0, index1); + const float nextDirection = getDirection(index1, index2); + return getAngleDiff(previousDirection, nextDirection); +} + +float ProximityInfoState::getLineToKeyDistance( + const int from, const int to, const int keyId, const bool extend) const { + if (from < 0 || from > mInputSize - 1) { + return 0.0f; + } + if (to < 0 || to > mInputSize - 1) { + return 0.0f; + } + const int x0 = mInputXs[from]; + const int y0 = mInputYs[from]; + const int x1 = mInputXs[to]; + const int y1 = mInputYs[to]; + + const int keyX = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); + const int keyY = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); + + return pointToLineSegSquaredDistanceFloat(keyX, keyY, x0, y0, x1, y1, extend); +} + +// Updates probabilities of aligning to some keys and skipping. +// Word suggestion should be based on this probabilities. +void ProximityInfoState::updateAlignPointProbabilities(const int start) { + static const float MIN_PROBABILITY = 0.000001f; + static const float MAX_SKIP_PROBABILITY = 0.95f; + static const float SKIP_FIRST_POINT_PROBABILITY = 0.01f; + static const float SKIP_LAST_POINT_PROBABILITY = 0.1f; + static const float MIN_SPEED_RATE_FOR_SKIP_PROBABILITY = 0.15f; + static const float SPEED_WEIGHT_FOR_SKIP_PROBABILITY = 0.9f; + static const float SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY = 0.6f; + static const float NEAREST_DISTANCE_WEIGHT = 0.5f; + static const float NEAREST_DISTANCE_BIAS = 0.5f; + static const float NEAREST_DISTANCE_WEIGHT_FOR_LAST = 0.6f; + static const float NEAREST_DISTANCE_BIAS_FOR_LAST = 0.4f; + + static const float ANGLE_WEIGHT = 0.90f; + static const float DEEP_CORNER_ANGLE_THRESHOLD = M_PI_F * 60.0f / 180.0f; + static const float SKIP_DEEP_CORNER_PROBABILITY = 0.1f; + static const float CORNER_ANGLE_THRESHOLD = M_PI_F * 30.0f / 180.0f; + static const float STRAIGHT_ANGLE_THRESHOLD = M_PI_F * 15.0f / 180.0f; + static const float SKIP_CORNER_PROBABILITY = 0.4f; + static const float SPEED_MARGIN = 0.1f; + static const float CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION = 0.0f; + + const int keyCount = mProximityInfo->getKeyCount(); + mCharProbabilities.resize(mInputSize); + // Calculates probabilities of using a point as a correlated point with the character + // for each point. + for (int i = start; i < mInputSize; ++i) { + mCharProbabilities[i].clear(); + // First, calculates skip probability. Starts form MIN_SKIP_PROBABILITY. + // Note that all values that are multiplied to this probability should be in [0.0, 1.0]; + float skipProbability = MAX_SKIP_PROBABILITY; + + const float currentAngle = getPointAngle(i); + const float relativeSpeed = getRelativeSpeed(i); + + float nearestKeyDistance = static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + for (int j = 0; j < keyCount; ++j) { + if (mNearKeysVector[i].test(j)) { + const float distance = getPointToKeyByIdLength(i, j); + if (distance < nearestKeyDistance) { + nearestKeyDistance = distance; + } + } + } + + if (i == 0) { + skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT + + NEAREST_DISTANCE_BIAS); + // Promote the first point + skipProbability *= SKIP_FIRST_POINT_PROBABILITY; + } else if (i == mInputSize - 1) { + skipProbability *= min(1.0f, nearestKeyDistance * NEAREST_DISTANCE_WEIGHT_FOR_LAST + + NEAREST_DISTANCE_BIAS_FOR_LAST); + // Promote the last point + skipProbability *= SKIP_LAST_POINT_PROBABILITY; + } else { + // If the current speed is relatively slower than adjacent keys, we promote this point. + if (getRelativeSpeed(i - 1) - SPEED_MARGIN > relativeSpeed + && relativeSpeed < getRelativeSpeed(i + 1) - SPEED_MARGIN) { + if (currentAngle < CORNER_ANGLE_THRESHOLD) { + skipProbability *= min(1.0f, relativeSpeed + * SLOW_STRAIGHT_WEIGHT_FOR_SKIP_PROBABILITY); + } else { + // If the angle is small enough, we promote this point more. (e.g. pit vs put) + skipProbability *= min(1.0f, relativeSpeed * SPEED_WEIGHT_FOR_SKIP_PROBABILITY + + MIN_SPEED_RATE_FOR_SKIP_PROBABILITY); + } + } + + skipProbability *= min(1.0f, relativeSpeed * nearestKeyDistance * + NEAREST_DISTANCE_WEIGHT + NEAREST_DISTANCE_BIAS); + + // Adjusts skip probability by a rate depending on angle. + // ANGLE_RATE of skipProbability is adjusted by current angle. + skipProbability *= (M_PI_F - currentAngle) / M_PI_F * ANGLE_WEIGHT + + (1.0f - ANGLE_WEIGHT); + if (currentAngle > DEEP_CORNER_ANGLE_THRESHOLD) { + skipProbability *= SKIP_DEEP_CORNER_PROBABILITY; + } + // We assume the angle of this point is the angle for point[i], point[i - 2] + // and point[i - 3]. The reason why we don't use the angle for point[i], point[i - 1] + // and point[i - 2] is this angle can be more affected by the noise. + const float prevAngle = getPointsAngle(i, i - 2, i - 3); + if (i >= 3 && prevAngle < STRAIGHT_ANGLE_THRESHOLD + && currentAngle > CORNER_ANGLE_THRESHOLD) { + skipProbability *= SKIP_CORNER_PROBABILITY; + } + } + + // probabilities must be in [0.0, MAX_SKIP_PROBABILITY]; + ASSERT(skipProbability >= 0.0f); + ASSERT(skipProbability <= MAX_SKIP_PROBABILITY); + mCharProbabilities[i][NOT_AN_INDEX] = skipProbability; + + // Second, calculates key probabilities by dividing the rest probability + // (1.0f - skipProbability). + const float inputCharProbability = 1.0f - skipProbability; + + // TODO: The variance is critical for accuracy; thus, adjusting these parameter by machine + // learning or something would be efficient. + static const float SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION = 0.3f; + static const float MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION = 0.25f; + static const float SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION = 0.5f; + static const float MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION = 0.15f; + static const float MIN_STANDERD_DIVIATION = 0.37f; + + const float speedxAngleRate = min(relativeSpeed * currentAngle / M_PI_F + * SPEEDxANGLE_WEIGHT_FOR_STANDARD_DIVIATION, + MAX_SPEEDxANGLE_RATE_FOR_STANDERD_DIVIATION); + const float speedxNearestKeyDistanceRate = min(relativeSpeed * nearestKeyDistance + * SPEEDxNEAREST_WEIGHT_FOR_STANDARD_DIVIATION, + MAX_SPEEDxNEAREST_RATE_FOR_STANDERD_DIVIATION); + const float sigma = speedxAngleRate + speedxNearestKeyDistanceRate + MIN_STANDERD_DIVIATION; + + NormalDistribution distribution(CENTER_VALUE_OF_NORMALIZED_DISTRIBUTION, sigma); + static const float PREV_DISTANCE_WEIGHT = 0.5f; + static const float NEXT_DISTANCE_WEIGHT = 0.6f; + // Summing up probability densities of all near keys. + float sumOfProbabilityDensities = 0.0f; + for (int j = 0; j < keyCount; ++j) { + if (mNearKeysVector[i].test(j)) { + float distance = sqrtf(getPointToKeyByIdLength(i, j)); + if (i == 0 && i != mInputSize - 1) { + // For the first point, weighted average of distances from first point and the + // next point to the key is used as a point to key distance. + const float nextDistance = sqrtf(getPointToKeyByIdLength(i + 1, j)); + if (nextDistance < distance) { + // The distance of the first point tends to bigger than continuing + // points because the first touch by the user can be sloppy. + // So we promote the first point if the distance of that point is larger + // than the distance of the next point. + distance = (distance + nextDistance * NEXT_DISTANCE_WEIGHT) + / (1.0f + NEXT_DISTANCE_WEIGHT); + } + } else if (i != 0 && i == mInputSize - 1) { + // For the first point, weighted average of distances from last point and + // the previous point to the key is used as a point to key distance. + const float previousDistance = sqrtf(getPointToKeyByIdLength(i - 1, j)); + if (previousDistance < distance) { + // The distance of the last point tends to bigger than continuing points + // because the last touch by the user can be sloppy. So we promote the + // last point if the distance of that point is larger than the distance of + // the previous point. + distance = (distance + previousDistance * PREV_DISTANCE_WEIGHT) + / (1.0f + PREV_DISTANCE_WEIGHT); + } + } + // TODO: Promote the first point when the extended line from the next input is near + // from a key. Also, promote the last point as well. + sumOfProbabilityDensities += distribution.getProbabilityDensity(distance); + } + } + + // Split the probability of an input point to keys that are close to the input point. + for (int j = 0; j < keyCount; ++j) { + if (mNearKeysVector[i].test(j)) { + float distance = sqrtf(getPointToKeyByIdLength(i, j)); + if (i == 0 && i != mInputSize - 1) { + // For the first point, weighted average of distances from the first point and + // the next point to the key is used as a point to key distance. + const float prevDistance = sqrtf(getPointToKeyByIdLength(i + 1, j)); + if (prevDistance < distance) { + distance = (distance + prevDistance * NEXT_DISTANCE_WEIGHT) + / (1.0f + NEXT_DISTANCE_WEIGHT); + } + } else if (i != 0 && i == mInputSize - 1) { + // For the first point, weighted average of distances from last point and + // the previous point to the key is used as a point to key distance. + const float prevDistance = sqrtf(getPointToKeyByIdLength(i - 1, j)); + if (prevDistance < distance) { + distance = (distance + prevDistance * PREV_DISTANCE_WEIGHT) + / (1.0f + PREV_DISTANCE_WEIGHT); + } + } + const float probabilityDensity = distribution.getProbabilityDensity(distance); + const float probability = inputCharProbability * probabilityDensity + / sumOfProbabilityDensities; + mCharProbabilities[i][j] = probability; + } + } + } + + + if (DEBUG_POINTS_PROBABILITY) { + for (int i = 0; i < mInputSize; ++i) { + std::stringstream sstream; + sstream << i << ", "; + sstream << "("<< mInputXs[i] << ", "; + sstream << ", "<< mInputYs[i] << "), "; + sstream << "Speed: "<< getRelativeSpeed(i) << ", "; + sstream << "Angle: "<< getPointAngle(i) << ", \n"; + + for (hash_map_compat<int, float>::iterator it = mCharProbabilities[i].begin(); + it != mCharProbabilities[i].end(); ++it) { + if (it->first == NOT_AN_INDEX) { + sstream << it->first + << "(skip):" + << it->second + << "\n"; + } else { + sstream << it->first + << "(" + << static_cast<char>(mProximityInfo->getCodePointOf(it->first)) + << "):" + << it->second + << "\n"; + } + } + AKLOGI("%s", sstream.str().c_str()); + } + } + + // Decrease key probabilities of points which don't have the highest probability of that key + // among nearby points. Probabilities of the first point and the last point are not suppressed. + for (int i = max(start, 1); i < mInputSize; ++i) { + for (int j = i + 1; j < mInputSize; ++j) { + if (!suppressCharProbabilities(i, j)) { + break; + } + } + for (int j = i - 1; j >= max(start, 0); --j) { + if (!suppressCharProbabilities(i, j)) { + break; + } + } + } + + // Converting from raw probabilities to log probabilities to calculate spatial distance. + for (int i = start; i < mInputSize; ++i) { + for (int j = 0; j < keyCount; ++j) { + hash_map_compat<int, float>::iterator it = mCharProbabilities[i].find(j); + if (it == mCharProbabilities[i].end()){ + mNearKeysVector[i].reset(j); + } else if(it->second < MIN_PROBABILITY) { + // Erases from near keys vector because it has very low probability. + mNearKeysVector[i].reset(j); + mCharProbabilities[i].erase(j); + } else { + it->second = -logf(it->second); + } + } + mCharProbabilities[i][NOT_AN_INDEX] = -logf(mCharProbabilities[i][NOT_AN_INDEX]); + } +} + +// Decreases char probabilities of index0 by checking probabilities of a near point (index1) and +// increases char probabilities of index1 by checking probabilities of index0. +bool ProximityInfoState::suppressCharProbabilities(const int index0, const int index1) { + ASSERT(0 <= index0 && index0 < mInputSize); + ASSERT(0 <= index1 && index1 < mInputSize); + + static const float SUPPRESSION_LENGTH_WEIGHT = 1.5f; + static const float MIN_SUPPRESSION_RATE = 0.1f; + static const float SUPPRESSION_WEIGHT = 0.5f; + static const float SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN = 0.1f; + static const float SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN = 0.3f; + + const float keyWidthFloat = static_cast<float>(mProximityInfo->getMostCommonKeyWidth()); + const float diff = fabsf(static_cast<float>(mLengthCache[index0] - mLengthCache[index1])); + if (diff > keyWidthFloat * SUPPRESSION_LENGTH_WEIGHT) { + return false; + } + const float suppressionRate = MIN_SUPPRESSION_RATE + + diff / keyWidthFloat / SUPPRESSION_LENGTH_WEIGHT * SUPPRESSION_WEIGHT; + for (hash_map_compat<int, float>::iterator it = mCharProbabilities[index0].begin(); + it != mCharProbabilities[index0].end(); ++it) { + hash_map_compat<int, float>::iterator it2 = mCharProbabilities[index1].find(it->first); + if (it2 != mCharProbabilities[index1].end() && it->second < it2->second) { + const float newProbability = it->second * suppressionRate; + const float suppression = it->second - newProbability; + it->second = newProbability; + // mCharProbabilities[index0][NOT_AN_INDEX] is the probability of skipping this point. + mCharProbabilities[index0][NOT_AN_INDEX] += suppression; + + // Add the probability of the same key nearby index1 + const float probabilityGain = min(suppression * SUPPRESSION_WEIGHT_FOR_PROBABILITY_GAIN, + mCharProbabilities[index1][NOT_AN_INDEX] + * SKIP_PROBABALITY_WEIGHT_FOR_PROBABILITY_GAIN); + it2->second += probabilityGain; + mCharProbabilities[index1][NOT_AN_INDEX] -= probabilityGain; + } + } + return true; +} + +// Get a word that is detected by tracing highest probability sequence into codePointBuf and +// returns probability of generating the word. +float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const { + static const float DEMOTION_LOG_PROBABILITY = 0.3f; + int index = 0; + float sumLogProbability = 0.0f; + // TODO: Current implementation is greedy algorithm. DP would be efficient for many cases. + for (int i = 0; i < mInputSize && index < MAX_WORD_LENGTH_INTERNAL - 1; ++i) { + float minLogProbability = static_cast<float>(MAX_POINT_TO_KEY_LENGTH); + int character = NOT_AN_INDEX; + for (hash_map_compat<int, float>::const_iterator it = mCharProbabilities[i].begin(); + it != mCharProbabilities[i].end(); ++it) { + const float logProbability = (it->first != NOT_AN_INDEX) + ? it->second + DEMOTION_LOG_PROBABILITY : it->second; + if (logProbability < minLogProbability) { + minLogProbability = logProbability; + character = it->first; + } + } + if (character != NOT_AN_INDEX) { + codePointBuf[index] = mProximityInfo->getCodePointOf(character); + index++; + } + sumLogProbability += minLogProbability; + } + codePointBuf[index] = '\0'; + return sumLogProbability; +} + +// Returns a probability of mapping index to keyIndex. +float ProximityInfoState::getProbability(const int index, const int keyIndex) const { + ASSERT(0 <= index && index < mInputSize); + hash_map_compat<int, float>::const_iterator it = mCharProbabilities[index].find(keyIndex); + if (it != mCharProbabilities[index].end()) { + return it->second; + } + return static_cast<float>(MAX_POINT_TO_KEY_LENGTH); +} + } // namespace latinime diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index c1ec76c38..39a238889 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -43,36 +43,38 @@ class ProximityInfoState { // Defined in proximity_info_state.cpp // ///////////////////////////////////////// void initInputParams(const int pointerId, const float maxPointToKeyLength, - const ProximityInfo *proximityInfo, const int32_t *const inputCodes, + const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, const int *xCoordinates, const int *yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric); ///////////////////////////////////////// // Defined here // ///////////////////////////////////////// - ProximityInfoState() + AK_FORCE_INLINE ProximityInfoState() : mProximityInfo(0), mMaxPointToKeyLength(0), mHasTouchPositionCorrectionData(false), mMostCommonKeyWidthSquare(0), mLocaleStr(), mKeyCount(0), mCellHeight(0), mCellWidth(0), mGridHeight(0), mGridWidth(0), mIsContinuationPossible(false), mInputXs(), mInputYs(), mTimes(), mInputIndice(), - mDistanceCache(), mLengthCache(), mRelativeSpeeds(), mNearKeysVector(), + mDistanceCache(), mLengthCache(), mRelativeSpeeds(), mDirections(), + mCharProbabilities(), mNearKeysVector(), mSearchKeysVector(), mTouchPositionCorrectionEnabled(false), mInputSize(0) { memset(mInputCodes, 0, sizeof(mInputCodes)); memset(mNormalizedSquaredDistances, 0, sizeof(mNormalizedSquaredDistances)); memset(mPrimaryInputWord, 0, sizeof(mPrimaryInputWord)); } - virtual ~ProximityInfoState() {} + // Non virtual inline destructor -- never inherit this class + AK_FORCE_INLINE ~ProximityInfoState() {} - inline unsigned short getPrimaryCharAt(const int index) const { - return getProximityCharsAt(index)[0]; + inline int getPrimaryCodePointAt(const int index) const { + return getProximityCodePointsAt(index)[0]; } - inline bool existsCharInProximityAt(const int index, const int c) const { - const int *chars = getProximityCharsAt(index); + AK_FORCE_INLINE bool existsCodePointInProximityAt(const int index, const int c) const { + const int *codePoints = getProximityCodePointsAt(index); int i = 0; - while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { - if (chars[i++] == c) { + while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { + if (codePoints[i++] == c) { return true; } } @@ -81,90 +83,25 @@ class ProximityInfoState { inline bool existsAdjacentProximityChars(const int index) const { if (index < 0 || index >= mInputSize) return false; - const int currentChar = getPrimaryCharAt(index); + const int currentCodePoint = getPrimaryCodePointAt(index); const int leftIndex = index - 1; - if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { + if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) { return true; } const int rightIndex = index + 1; - if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) { + if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) { return true; } return false; } - // In the following function, c is the current character of the dictionary word - // currently examined. - // currentChars is an array containing the keys close to the character the - // user actually typed at the same position. We want to see if c is in it: if so, - // then the word contains at that position a character close to what the user - // typed. - // What the user typed is actually the first character of the array. - // proximityIndex is a pointer to the variable where getMatchedProximityId returns - // the index of c in the proximity chars of the input index. - // Notice : accented characters do not have a proximity list, so they are alone - // in their list. The non-accented version of the character should be considered - // "close", but not the other keys close to the non-accented version. - inline ProximityType getMatchedProximityId(const int index, - const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const { - const int *currentChars = getProximityCharsAt(index); - const int firstChar = currentChars[0]; - const unsigned short baseLowerC = toBaseLowerCase(c); - - // The first char in the array is what user typed. If it matches right away, - // that means the user typed that same char for this pos. - if (firstChar == baseLowerC || firstChar == c) { - return EQUIVALENT_CHAR; - } - - if (!checkProximityChars) return UNRELATED_CHAR; - - // If the non-accented, lowercased version of that first character matches c, - // then we have a non-accented version of the accented character the user - // typed. Treat it as a close char. - if (toBaseLowerCase(firstChar) == baseLowerC) - return NEAR_PROXIMITY_CHAR; - - // Not an exact nor an accent-alike match: search the list of close keys - int j = 1; - while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return NEAR_PROXIMITY_CHAR; - } - ++j; - } - if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - ++j; - while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return ADDITIONAL_PROXIMITY_CHAR; - } - ++j; - } - } - - // Was not included, signal this as an unrelated character. - return UNRELATED_CHAR; - } - inline int getNormalizedSquaredDistance( const int inputIndex, const int proximityIndex) const { return mNormalizedSquaredDistances[ inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex]; } - inline const unsigned short *getPrimaryInputWord() const { + inline const int *getPrimaryInputWord() const { return mPrimaryInputWord; } @@ -172,13 +109,13 @@ class ProximityInfoState { return mTouchPositionCorrectionEnabled; } - inline bool sameAsTyped(const unsigned short *word, int length) const { + inline bool sameAsTyped(const int *word, int length) const { if (length != mInputSize) { return false; } const int *inputCodes = mInputCodes; while (length--) { - if (static_cast<unsigned int>(*inputCodes) != static_cast<unsigned int>(*word)) { + if (*inputCodes != *word) { return false; } inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; @@ -213,7 +150,11 @@ class ProximityInfoState { return mIsContinuationPossible; } - float getPointToKeyLength(const int inputIndex, const int charCode, const float scale) const; + float getPointToKeyLength(const int inputIndex, const int charCode) const; + float getPointToKeyByIdLength(const int inputIndex, const int keyId) const; + + ProximityType getMatchedProximityId(const int index, const int c, + const bool checkProximityChars, int *proximityIndex = 0) const; int getSpaceY() const; @@ -223,6 +164,25 @@ class ProximityInfoState { float getRelativeSpeed(const int index) const { return mRelativeSpeeds[index]; } + + float getDirection(const int index) const { + return mDirections[index]; + } + // get xy direction + float getDirection(const int x, const int y) const; + + float getPointAngle(const int index) const; + // Returns angle of three points. x, y, and z are indices. + float getPointsAngle(const int index0, const int index1, const int index2) const; + + float getHighestProbabilitySequence(int *const codePointBuf) const; + + float getProbability(const int index, const int charCode) const; + + float getLineToKeyDistance( + const int from, const int to, const int keyId, const bool extend) const; + + bool isKeyInSerchKeysAfterIndex(const int index, const int keyId) const; private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); typedef hash_map_compat<int, float> NearKeysDistanceMap; @@ -234,8 +194,8 @@ class ProximityInfoState { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; - bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time, - const bool sample, const bool isLastPoint, + bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time, + const bool sample, const bool isLastPoint, const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances); @@ -248,7 +208,7 @@ class ProximityInfoState { return mInputXs.size() > 0 && mInputYs.size() > 0; } - inline const int *getProximityCharsAt(const int index) const { + inline const int *getProximityCodePointsAt(const int index) const { return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); } @@ -259,12 +219,14 @@ class ProximityInfoState { const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; float getPointScore( const int x, const int y, const int time, const bool last, const float nearest, - const NearKeysDistanceMap *const currentNearKeysDistances, + const float sumAngle, const NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, const NearKeysDistanceMap *const prevPrevNearKeysDistances) const; bool checkAndReturnIsContinuationPossible(const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *const times); void popInputData(); + void updateAlignPointProbabilities(const int start); + bool suppressCharProbabilities(const int index1, const int index2); // const const ProximityInfo *mProximityInfo; @@ -286,12 +248,23 @@ class ProximityInfoState { std::vector<float> mDistanceCache; std::vector<int> mLengthCache; std::vector<float> mRelativeSpeeds; + std::vector<float> mDirections; + // probabilities of skipping or mapping to a key for each point. + std::vector<hash_map_compat<int, float> > mCharProbabilities; + // The vector for the key code set which holds nearby keys for each sampled input point + // 1. Used to calculate the probability of the key + // 2. Used to calculate mSearchKeysVector std::vector<NearKeycodesSet> mNearKeysVector; + // The vector for the key code set which holds nearby keys of some trailing sampled input points + // for each sampled input point. These nearby keys contain the next characters which can be in + // the dictionary. Specifically, currently we are looking for keys nearby trailing sampled + // inputs including the current input point. + std::vector<NearKeycodesSet> mSearchKeysVector; bool mTouchPositionCorrectionEnabled; - int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; + int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mInputSize; - unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; + int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; }; } // namespace latinime #endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index e72e7e3be..fed3c7251 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -43,18 +43,16 @@ class TerminalAttributes { return mHasNextShortcutTarget; } - // Gets the shortcut target itself as a uint16_t string. For parameters and return value + // Gets the shortcut target itself as an int string. For parameters and return value // see BinaryFormat::getWordAtAddress. - // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) { + inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); - mHasNextShortcutTarget = - 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); + mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos); if (NOT_A_CODE_POINT == codePoint) break; - outWord[i] = (uint16_t)codePoint; + outWord[i] = codePoint; } *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); return i; diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index e3649bd4b..d134a47e6 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -41,14 +41,11 @@ const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[ { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE // TODO: check the header -UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultiplier, - int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags) - : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), - TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier), - // TODO : remove this variable. - ROOT_POS(0), - BYTES_IN_ONE_CHAR(sizeof(int)), - MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) { +UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, + int maxWordLength, int maxWords, const unsigned int flags) + : DICT_ROOT(streamStart), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords), + FULL_WORD_MULTIPLIER(fullWordMultiplier), // TODO : remove this variable. + ROOT_POS(0), MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH), FLAGS(flags) { if (DEBUG_DICT) { AKLOGI("UnigramDictionary - constructor"); } @@ -57,13 +54,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int typed UnigramDictionary::~UnigramDictionary() { } -static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) { - return static_cast<unsigned int>(sizeof(*codes)) * codesSize; +static inline int getCodesBufferSize(const int *codes, const int codesSize) { + return sizeof(*codes) * codesSize; } -// TODO: This needs to take a const unsigned short* and not tinker with its contents -static inline void addWord(unsigned short *word, int length, int frequency, - WordsPriorityQueue *queue, int type) { +// TODO: This needs to take a const int* and not tinker with its contents +static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) { queue->push(frequency, word, length, type); } @@ -105,6 +101,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs, const unsigned int digraphsSize) const { + assert(sizeof(codesDest[0]) == sizeof(codesSrc[0])); + assert(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0])); + assert(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0])); const int startIndex = static_cast<int>(codesDest - codesBuffer); if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) { @@ -125,9 +124,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // Make i the index of the second char of the digraph for simplicity. Forgetting // to do that results in an infinite recursion so take care! ++i; - memcpy(codesDest, codesSrc, i * BYTES_IN_ONE_CHAR); - codesDest[(i - 1) * (BYTES_IN_ONE_CHAR / sizeof(codesDest[0]))] = - replacementCodePoint; + memcpy(codesDest, codesSrc, i * sizeof(codesDest[0])); + codesDest[i - 1] = replacementCodePoint; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1, @@ -137,7 +135,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. // In our example, after "pru" in the buffer copy the "e", and continue on "fen" - memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR); + memcpy(codesDest + i, codesSrc + i, sizeof(codesDest[0])); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i, @@ -153,13 +151,13 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit // If the word contains several digraphs, we'll come it for the product of them. // eg. if the word is "ueberpruefen" we'll test, in order, against // "uberprufen", "uberpruefen", "ueberprufen", "ueberpruefen". - const unsigned int remainingBytes = BYTES_IN_ONE_CHAR * codesRemain; + const unsigned int remainingBytes = sizeof(codesDest[0]) * codesRemain; if (0 != remainingBytes) { memcpy(codesDest, codesSrc, remainingBytes); memcpy(&xCoordinatesBuffer[startIndex], &xcoordinates[codesBufferSize - codesRemain], - sizeof(int) * codesRemain); + sizeof(xCoordinatesBuffer[0]) * codesRemain); memcpy(&yCoordinatesBuffer[startIndex], &ycoordinates[codesBufferSize - codesRemain], - sizeof(int) * codesRemain); + sizeof(yCoordinatesBuffer[0]) * codesRemain); } getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer, @@ -173,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies, - int *outputTypes) const { - + const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const { WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH); queuePool.clearAll(); Correction masterCorrection; @@ -188,8 +184,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, - &queuePool, GERMAN_UMLAUT_DIGRAPHS, - sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0])); + &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS)); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { int codesBuffer[getCodesBufferSize(codes, codesSize)]; int xCoordinatesBuffer[codesSize]; @@ -197,8 +192,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, useFullEditDistance, codes, codesSize, 0, codesBuffer, &masterCorrection, - &queuePool, FRENCH_LIGATURES_DIGRAPHS, - sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0])); + &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS)); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool); @@ -222,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { - short unsigned int *w = outWords + j * MAX_WORD_LENGTH; + int *w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; (void)s; // To suppress compiler warning @@ -234,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x return suggestedWordsCount; } -void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool) const { - +void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int inputSize, + const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, + const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) + const { PROF_OPEN; PROF_START(0); PROF_END(0); @@ -288,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord *sw = queue->top(); const int score = sw->mScore; - const unsigned short *word = sw->mWord; + const int *word = sw->mWord; const int wordLength = sw->mWordLength; float ns = Correction::RankingAlgorithm::calcNormalizedScore( correction->getPrimaryInputWord(), i, word, wordLength, score); @@ -307,15 +300,13 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int Correction *correction) const { if (DEBUG_DICT) { AKLOGI("initSuggest"); - DUMP_WORD_INT(codes, inputSize); + DUMP_WORD(codes, inputSize); } correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates); const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputSize, maxDepth); } -static const char SPACE = ' '; - void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, @@ -374,7 +365,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, } } -inline void UnigramDictionary::onTerminal(const int probability, +void UnigramDictionary::onTerminal(const int probability, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, const int currentWordIndex) const { @@ -382,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability, const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; int wordLength; - unsigned short *wordPointer; + int *wordPointer; if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); @@ -410,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability, // so that the insert order is protected inside the queue for words // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. - uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; int shortcutFrequency; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); @@ -450,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion( const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { + int *wordLengthArray, int *outputWord, int *outputWordLength) const { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_ABORT; } @@ -493,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion( // TODO: Remove the safety net above // ////////////////////////////////////////////// - unsigned short *tempOutputWord = 0; + int *tempOutputWord = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction); - unsigned short word[MAX_WORD_LENGTH_INTERNAL]; + int word[MAX_WORD_LENGTH_INTERNAL]; int freq = getMostFrequentWordLike( inputWordStartPos, inputWordLength, correction, word); if (freq > 0) { @@ -570,7 +561,7 @@ int UnigramDictionary::getSubStringSuggestion( if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_SKIP; } - outputWord[tempOutputWordLength] = SPACE; + outputWord[tempOutputWordLength] = KEYCODE_SPACE; if (outputWordLength) { ++*outputWordLength; } @@ -598,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int outputWordLength, - int *freqArray, int *wordLengthArray, unsigned short *outputWord) const { + int *freqArray, int *wordLengthArray, int *outputWord) const { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; @@ -684,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit } // Allocating fixed length array on stack - unsigned short outputWord[MAX_WORD_LENGTH]; + int outputWord[MAX_WORD_LENGTH]; int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; const int outputWordLength = 0; @@ -698,12 +689,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. -inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputSize, Correction *correction, unsigned short *word) const { - uint16_t inWord[inputSize]; - +int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, const int inputSize, + Correction *correction, int *word) const { + int inWord[inputSize]; for (int i = 0; i < inputSize; ++i) { - inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i); + inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i); } return getMostFrequentWordLikeInner(inWord, inputSize, word); } @@ -721,14 +711,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, - const uint8_t *const root, const int startPos, const uint16_t *const inWord, - const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex, + const uint8_t *const root, const int startPos, const int *const inWord, + const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex, int *outPos) { const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; - int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); - int32_t baseChar = toBaseLowerCase(codePoint); - const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); + int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + int baseChar = toBaseLowerCase(codePoint); + const int wChar = toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; @@ -759,8 +749,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, // It will compare the frequency to the max frequency, and if greater, will // copy the word into the output buffer. In output value maxFreq, it will // write the new maximum frequency if it changed. -static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, - short unsigned int *outWord, int *maxFreq) { +static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord, + int *maxFreq) { if (freq > *maxFreq) { for (int q = 0; q < length; ++q) { outWord[q] = newWord[q]; @@ -772,9 +762,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in // Will find the highest frequency of the words like the one passed as an argument, // that is, everything that only differs by case/accents. -int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, - const int inputSize, short unsigned int *outWord) const { - int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; +int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize, + int *outWord) const { + int newWord[MAX_WORD_LENGTH_INTERNAL]; int depth = 0; int maxFreq = -1; const uint8_t *const root = DICT_ROOT; @@ -834,7 +824,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord return maxFreq; } -int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const { +int UnigramDictionary::getFrequency(const int *const inWord, const int length) const { const uint8_t *const root = DICT_ROOT; int pos = BinaryFormat::getTerminalPosition(root, inWord, length, false /* forceLowerCaseSearch */); @@ -859,8 +849,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt } // TODO: remove this function. -int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset, - int length) const { +int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const { return -1; } @@ -878,7 +867,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // there aren't any more nodes at this level, it merely returns the address of the first byte after // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. -inline bool UnigramDictionary::processCurrentNode(const int initialPos, +bool UnigramDictionary::processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) const { @@ -906,7 +895,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // else if FLAG_IS_TERMINAL: the frequency // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // Note that you can't have a node that both is not a terminal and has no children. - int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); + int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); assert(NOT_A_CODE_POINT != c); // We are going to loop through each character and make it look like it's a different @@ -920,7 +909,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // We prefetch the next char. If 'c' is the last char of this node, we will have // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node // should behave as a terminal or not and whether we have children. - const int32_t nextc = hasMultipleChars + const int nextc = hasMultipleChars ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT; const bool isLastChar = (NOT_A_CODE_POINT == nextc); // If there are more chars in this nodes, then this virtual node is not a terminal. diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 57129bb07..248b09db1 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -39,14 +39,14 @@ class UnigramDictionary { static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0; static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1; static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; - UnigramDictionary(const uint8_t *const streamStart, int typedLetterMultipler, - int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - int getFrequency(const int32_t *const inWord, const int length) const; - int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; + UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength, + int maxWords, const unsigned int flags); + int getFrequency(const int *const inWord, const int length) const; + int getBigramPosition(int pos, int *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies, + const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const; virtual ~UnigramDictionary(); @@ -93,9 +93,9 @@ class UnigramDictionary { int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) const; int getMostFrequentWordLike(const int startInputIndex, const int inputSize, - Correction *correction, unsigned short *word) const; - int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize, - short unsigned int *outWord) const; + Correction *correction, int *word) const; + int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize, + int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, @@ -103,22 +103,19 @@ class UnigramDictionary { const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; - void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputSize, - Correction *correction, WordsPriorityQueuePool *queuePool, + int *wordLengthArray, int *outputWord, int *outputWordLength) const; + void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const int outputWordLength, int *freqArray, int *wordLengthArray, - unsigned short *outputWord) const; + int *outputWord) const; const uint8_t *const DICT_ROOT; const int MAX_WORD_LENGTH; const int MAX_WORDS; - const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; const int ROOT_POS; - const unsigned int BYTES_IN_ONE_CHAR; const int MAX_DIGRAPH_SEARCH_DEPTH; const int FLAGS; diff --git a/native/jni/src/words_priority_queue.cpp b/native/jni/src/words_priority_queue.cpp new file mode 100644 index 000000000..7e18d0f87 --- /dev/null +++ b/native/jni/src/words_priority_queue.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2012, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "words_priority_queue.h" + +namespace latinime { + +int WordsPriorityQueue::outputSuggestions(const int *before, const int beforeLength, + int *frequencies, int *outputCodePoints, int* outputTypes) { + mHighestSuggestedWord = 0; + const int size = min(MAX_WORDS, static_cast<int>(mSuggestions.size())); + SuggestedWord *swBuffer[size]; + int index = size - 1; + while (!mSuggestions.empty() && index >= 0) { + SuggestedWord *sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + AKLOGI("dump word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + swBuffer[index] = sw; + mSuggestions.pop(); + --index; + } + if (size >= 2) { + SuggestedWord *nsMaxSw = 0; + int maxIndex = 0; + float maxNs = 0; + for (int i = 0; i < size; ++i) { + SuggestedWord *tempSw = swBuffer[i]; + if (!tempSw) { + continue; + } + const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0); + if (tempNs >= maxNs) { + maxNs = tempNs; + maxIndex = i; + nsMaxSw = tempSw; + } + } + if (maxIndex > 0 && nsMaxSw) { + memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0])); + swBuffer[0] = nsMaxSw; + } + } + for (int i = 0; i < size; ++i) { + SuggestedWord *sw = swBuffer[i]; + if (!sw) { + AKLOGE("SuggestedWord is null %d", i); + continue; + } + const int wordLength = sw->mWordLength; + int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH; + frequencies[i] = sw->mScore; + outputTypes[i] = sw->mType; + memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0])); + if (wordLength < MAX_WORD_LENGTH) { + targetAddress[wordLength] = 0; + } + sw->mUsed = false; + } + return size; +} +} // namespace latinime diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 19efa5da3..7d0c4d17d 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -30,15 +30,15 @@ class WordsPriorityQueue { class SuggestedWord { public: int mScore; - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH_INTERNAL]; int mWordLength; bool mUsed; int mType; - void setParams(int score, unsigned short *word, int wordLength, int type) { + void setParams(int score, int *word, int wordLength, int type) { mScore = score; mWordLength = wordLength; - memcpy(mWord, word, sizeof(unsigned short) * wordLength); + memcpy(mWord, word, sizeof(mWord[0]) * wordLength); mUsed = true; mType = type; } @@ -53,13 +53,14 @@ class WordsPriorityQueue { } } - virtual ~WordsPriorityQueue() { + // Non virtual inline destructor -- never inherit this class + AK_FORCE_INLINE ~WordsPriorityQueue() { delete[] mSuggestedWords; } - void push(int score, unsigned short *word, int wordLength, int type) { + void push(int score, int *word, int wordLength, int type) { SuggestedWord *sw = 0; - if (mSuggestions.size() >= MAX_WORDS) { + if (size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; if (minScore >= score) { @@ -94,68 +95,11 @@ class WordsPriorityQueue { return sw; } - int outputSuggestions(const unsigned short *before, const int beforeLength, - int *frequencies, unsigned short *outputChars, int* outputTypes) { - mHighestSuggestedWord = 0; - const unsigned int size = min( - MAX_WORDS, static_cast<unsigned int>(mSuggestions.size())); - SuggestedWord *swBuffer[size]; - int index = size - 1; - while (!mSuggestions.empty() && index >= 0) { - SuggestedWord *sw = mSuggestions.top(); - if (DEBUG_WORDS_PRIORITY_QUEUE) { - AKLOGI("dump word. %d", sw->mScore); - DUMP_WORD(sw->mWord, sw->mWordLength); - } - swBuffer[index] = sw; - mSuggestions.pop(); - --index; - } - if (size >= 2) { - SuggestedWord *nsMaxSw = 0; - unsigned int maxIndex = 0; - float maxNs = 0; - for (unsigned int i = 0; i < size; ++i) { - SuggestedWord *tempSw = swBuffer[i]; - if (!tempSw) { - continue; - } - const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0); - if (tempNs >= maxNs) { - maxNs = tempNs; - maxIndex = i; - nsMaxSw = tempSw; - } - } - if (maxIndex > 0 && nsMaxSw) { - memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(SuggestedWord *)); - swBuffer[0] = nsMaxSw; - } - } - for (unsigned int i = 0; i < size; ++i) { - SuggestedWord *sw = swBuffer[i]; - if (!sw) { - AKLOGE("SuggestedWord is null %d", i); - continue; - } - const unsigned int wordLength = sw->mWordLength; - unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH; - frequencies[i] = sw->mScore; - outputTypes[i] = sw->mType; - memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short)); - if (wordLength < MAX_WORD_LENGTH) { - targetAddress[wordLength] = 0; - } - sw->mUsed = false; - } - return size; - } - int size() const { - return mSuggestions.size(); + return static_cast<int>(mSuggestions.size()); } - void clear() { + AK_FORCE_INLINE void clear() { mHighestSuggestedWord = 0; while (!mSuggestions.empty()) { SuggestedWord *sw = mSuggestions.top(); @@ -175,15 +119,18 @@ class WordsPriorityQueue { DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); } - float getHighestNormalizedScore(const unsigned short *before, const int beforeLength, - unsigned short **outWord, int *outScore, int *outLength) { + float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord, + int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } - return getNormalizedScore( - mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength); + return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore, + outLength); } + int outputSuggestions(const int *before, const int beforeLength, int *frequencies, + int *outputCodePoints, int* outputTypes); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue); struct wordComparator { @@ -192,9 +139,8 @@ class WordsPriorityQueue { } }; - SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word, - int wordLength, int type) { - for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) { + for (int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { mSuggestedWords[i].setParams(score, word, wordLength, type); return &mSuggestedWords[i]; @@ -203,10 +149,10 @@ class WordsPriorityQueue { return 0; } - static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before, - const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) { + static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength, + int **outWord, int *outScore, int *outLength) { const int score = sw->mScore; - unsigned short *word = sw->mWord; + int *word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; @@ -217,15 +163,15 @@ class WordsPriorityQueue { if (outLength) { *outLength = wordLength; } - return Correction::RankingAlgorithm::calcNormalizedScore( - before, beforeLength, word, wordLength, score); + return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word, + wordLength, score); } typedef std::priority_queue<SuggestedWord *, std::vector<SuggestedWord *>, wordComparator> Suggestions; Suggestions mSuggestions; - const unsigned int MAX_WORDS; - const unsigned int MAX_WORD_LENGTH; + const int MAX_WORDS; + const int MAX_WORD_LENGTH; SuggestedWord *mSuggestedWords; SuggestedWord *mHighestSuggestedWord; }; diff --git a/native/jni/src/words_priority_queue_pool.h b/native/jni/src/words_priority_queue_pool.h index 2d52903e0..c14afa07b 100644 --- a/native/jni/src/words_priority_queue_pool.h +++ b/native/jni/src/words_priority_queue_pool.h @@ -36,7 +36,8 @@ class WordsPriorityQueuePool { } } - virtual ~WordsPriorityQueuePool() { + // Non virtual inline destructor -- never inherit this class + ~WordsPriorityQueuePool() { // Note: these explicit calls to the destructor match the calls to placement new() above. if (mMasterQueue) mMasterQueue->~WordsPriorityQueue(); for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i) { @@ -68,7 +69,7 @@ class WordsPriorityQueuePool { } } - inline void clearSubQueue(const int wordIndex) { + AK_FORCE_INLINE void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { WordsPriorityQueue *queue = getSubQueue(wordIndex, i); if (queue) { |