aboutsummaryrefslogtreecommitdiffstats
path: root/native/jni/src
diff options
context:
space:
mode:
Diffstat (limited to 'native/jni/src')
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.cpp7
-rw-r--r--native/jni/src/suggest/core/dictionary/dictionary.h4
-rw-r--r--native/jni/src/suggest/core/dictionary/property/word_property.cpp15
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info.cpp2
-rw-r--r--native/jni/src/suggest/core/layout/proximity_info_state_utils.h2
-rw-r--r--native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h4
-rw-r--r--native/jni/src/suggest/core/result/suggestion_results.cpp12
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp)4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h)0
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp16
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp)26
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h)7
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h (renamed from native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h)2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp22
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h8
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp14
-rw-r--r--native/jni/src/utils/char_utils.cpp3
-rw-r--r--native/jni/src/utils/char_utils.h7
-rw-r--r--native/jni/src/utils/jni_data_utils.h12
33 files changed, 138 insertions, 86 deletions
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index e288413a3..fdc893653 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -88,11 +88,10 @@ void Dictionary::addUnigramWord(const int *const word, const int length,
mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
}
-void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
+void Dictionary::addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) {
TimeKeeper::setCurrentTime();
- mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, word1, length1,
- probability, timestamp);
+ mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty);
}
void Dictionary::removeBigramWords(const int *const word0, const int length0,
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index b6149b338..f0a7e5b6a 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -76,8 +76,8 @@ class Dictionary {
void addUnigramWord(const int *const codePoints, const int codePointCount,
const UnigramProperty *const unigramProperty);
- void addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
+ void addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty);
void removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
index 95608dcf8..6f5f808f8 100644
--- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp
+++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp
@@ -16,14 +16,17 @@
#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/jni_data_utils.h"
+
namespace latinime {
void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
jobject outBigramProbabilities, jobject outShortcutTargets,
jobject outShortcutProbabilities) const {
- env->SetIntArrayRegion(outCodePoints, 0 /* start */, mCodePoints.size(), &mCodePoints[0]);
-
+ JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
+ MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
+ false /* needsNullTermination */);
jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(),
!mBigrams.empty(), mUnigramProperty.hasShortcuts()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
@@ -41,8 +44,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
for (const auto &bigramProperty : mBigrams) {
const std::vector<int> *const word1CodePoints = bigramProperty.getTargetCodePoints();
jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
- env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
- word1CodePoints->size(), word1CodePoints->data());
+ JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
+ word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
+ false /* needsNullTermination */);
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
env->DeleteLocalRef(bigramWord1CodePointArray);
@@ -62,6 +66,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
env->SetIntArrayRegion(shortcutTargetCodePointArray, 0 /* start */,
targetCodePoints->size(), targetCodePoints->data());
+ JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
+ targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
+ false /* needsNullTermination */);
env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
env->DeleteLocalRef(shortcutTargetCodePointArray);
jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp
index c40a2bdca..4c75a188e 100644
--- a/native/jni/src/suggest/core/layout/proximity_info.cpp
+++ b/native/jni/src/suggest/core/layout/proximity_info.cpp
@@ -226,7 +226,7 @@ int ProximityInfo::getKeyCenterXOfKeyIdG(
// When the referencePointY is NOT_A_COORDINATE, this method calculates the return value without
// using the line segment.
int ProximityInfo::getKeyCenterYOfKeyIdG(
- const int keyId, const int referencePointY, const bool isGeometric) const {
+ const int keyId, const int referencePointY, const bool isGeometric) const {
// TODO: Remove "isGeometric" and have separate "proximity_info"s for gesture and typing.
if (keyId < 0) {
return 0;
diff --git a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
index 71e83a80c..211a79737 100644
--- a/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
+++ b/native/jni/src/suggest/core/layout/proximity_info_state_utils.h
@@ -56,7 +56,7 @@ class ProximityInfoStateUtils {
const std::vector<int> *const sampledLengthCache,
const std::vector<int> *const sampledInputIndice,
std::vector<float> *sampledSpeedRates, std::vector<float> *sampledDirections);
- static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
+ static void refreshBeelineSpeedRates(const int mostCommonKeyWidth, const float averageSpeed,
const int inputSize, const int *const xCoordinates, const int *const yCoordinates,
const int *times, const int sampledInputSize,
const std::vector<int> *const sampledInputXs,
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 807f9b8dd..ce5a49f83 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -73,8 +73,8 @@ class DictionaryStructureWithBufferPolicy {
const UnigramProperty *const unigramProperty) = 0;
// Returns whether the update was success or not.
- virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) = 0;
+ virtual bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) = 0;
// Returns whether the update was success or not.
virtual bool removeBigramWords(const int *const word0, const int length0,
diff --git a/native/jni/src/suggest/core/result/suggestion_results.cpp b/native/jni/src/suggest/core/result/suggestion_results.cpp
index 088a55f6f..6594a1292 100644
--- a/native/jni/src/suggest/core/result/suggestion_results.cpp
+++ b/native/jni/src/suggest/core/result/suggestion_results.cpp
@@ -16,6 +16,8 @@
#include "suggest/core/result/suggestion_results.h"
+#include "utils/jni_data_utils.h"
+
namespace latinime {
void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCount,
@@ -27,13 +29,9 @@ void SuggestionResults::outputSuggestions(JNIEnv *env, jintArray outSuggestionCo
const SuggestedWord &suggestedWord = mSuggestedWords.top();
suggestedWord.getCodePointCount();
const int start = outputIndex * MAX_WORD_LENGTH;
- env->SetIntArrayRegion(outputCodePointsArray, start, suggestedWord.getCodePointCount(),
- suggestedWord.getCodePoint());
- if (suggestedWord.getCodePointCount() < MAX_WORD_LENGTH) {
- const int terminal = 0;
- env->SetIntArrayRegion(outputCodePointsArray, start + suggestedWord.getCodePointCount(),
- 1 /* len */, &terminal);
- }
+ JniDataUtils::outputCodePoints(env, outputCodePointsArray, start,
+ MAX_WORD_LENGTH /* maxLength */, suggestedWord.getCodePoint(),
+ suggestedWord.getCodePointCount(), true /* needsNullTermination */);
const int score = suggestedWord.getScore();
env->SetIntArrayRegion(outScoresArray, outputIndex, 1 /* len */, &score);
const int indexToPartialCommit = suggestedWord.getIndexToPartialCommit();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
index 7d0d09631..08b4e0b5e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
index 15f924a6a..15f924a6a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
index a527f03bd..9e575858a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -85,13 +85,13 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
}
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability, const int timestamp, bool *const outAddedNewBigram) {
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
const PtNodeParams sourcePtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
const PtNodeParams targetPtNodeParams(
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
- return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
- timestamp, outAddedNewBigram);
+ return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams,
+ bigramProperty, outAddedNewBigram);
}
// Remove a bigram relation from word0Pos to word1Pos.
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
index 44914fe4c..f10d15a9b 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h
@@ -22,6 +22,7 @@
namespace latinime {
+class BigramProperty;
class BufferWithExtendableBuffer;
class DynamicPtReadingHelper;
class PtNodeReader;
@@ -42,8 +43,8 @@ class DynamicPtUpdatingHelper {
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
- const int timestamp, bool *const outAddedNewBigram);
+ bool addBigramWords(const int word0Pos, const int word1Pos,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram);
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 91192fc57..bef401f87 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -23,6 +23,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -158,6 +159,10 @@ class PtNodeParams {
return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
}
+ AK_FORCE_INLINE bool representsNonWordInfo() const {
+ return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]);
+ }
+
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
index cbca3fe35..a8029f73f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h
@@ -24,6 +24,7 @@
namespace latinime {
+class BigramProperty;
class UnigramProperty;
// Interface class used to write PtNode information.
@@ -70,7 +71,7 @@ class PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram) = 0;
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
index 847dcdee5..91c76941c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
@@ -44,7 +44,7 @@ const int ShortcutListReadingUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
}
/* static */ int ShortcutListReadingUtils::readShortcutTarget(
- const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
+ const uint8_t *const dictRoot, const int maxLength, int *const outWord, int *const pos) {
return ByteArrayUtils::readStringAndAdvancePosition(dictRoot, maxLength, outWord, pos);
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
index d065bf7fd..d065bf7fd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
index a898e2afc..00bb502dc 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index b3af1f47a..30dcfba37 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+#include "utils/char_utils.h"
namespace latinime {
@@ -318,12 +319,15 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
- childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
- PatriciaTrieReadingUtils::isTerminal(flags),
- PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
- PatriciaTrieReadingUtils::isBlacklisted(flags)
- || PatriciaTrieReadingUtils::isNotAWord(flags),
- mergedNodeCodePointCount, mergedNodeCodePoints);
+ // Skip PtNodes don't start with Unicode code point because they represent non-word information.
+ if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
+ childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
+ PatriciaTrieReadingUtils::isTerminal(flags),
+ PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
+ PatriciaTrieReadingUtils::isBlacklisted(flags)
+ || PatriciaTrieReadingUtils::isNotAWord(flags),
+ mergedNodeCodePointCount, mergedNodeCodePoints);
+ }
return siblingPos;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 85f46603e..54d1e0f6d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -22,9 +22,9 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
@@ -88,8 +88,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp) {
+ bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
index 6d2b4778c..8e16ccc05 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h
@@ -21,7 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
namespace latinime {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
index 1645039d3..7a52fd180 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
@@ -14,10 +14,11 @@
* limitations under the License.
*/
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
+#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -49,13 +50,12 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
- const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
// 4. The word has bigrams. We have to append new bigram entry to the list.
// 5. Same as 4, but the list is the last entry of the content file.
-
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}
@@ -69,7 +69,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
- newProbability, timestamp);
+ bigramProperty);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
@@ -102,7 +102,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &newBigramEntry, newProbability, timestamp);
+ &newBigramEntry, bigramProperty);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false;
}
@@ -128,7 +128,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
- &updatedBigramEntry, newProbability, timestamp);
+ &updatedBigramEntry, bigramProperty);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
@@ -253,19 +253,19 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
}
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
- const BigramEntry *const originalBigramEntry, const int newProbability,
- const int timestamp) const {
+ const BigramEntry *const originalBigramEntry,
+ const BigramProperty *const bigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
- // Use 1 for count to indicate the bigram has inputed.
- const HistoricalInfo historicalInfoForUpdate(timestamp, 0 /* level */, 1 /* count */);
+ const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
+ bigramProperty->getLevel(), bigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
- originalBigramEntry->getHistoricalInfo(), newProbability,
+ originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
&historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
- return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
+ return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
}
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
index c1f33359b..1613941c4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -24,6 +24,7 @@
namespace latinime {
class BigramDictContent;
+class BigramProperty;
class HeaderPolicy;
class TerminalPositionLookupTable;
@@ -43,8 +44,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
}
- bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
- const int timestamp, bool *const outAddedNewEntry);
+ bool addNewEntry(const int terminalId, const int newTargetTerminalId,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);
@@ -60,7 +61,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
- const int newProbability, const int timestamp) const;
+ const BigramProperty *const bigramProperty) const;
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
index fe984615c..790273541 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
@@ -19,7 +19,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
index 77ed38b89..5aa6b9a92 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
@@ -56,6 +56,7 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
return false;
}
}
+ umask(S_IWGRP | S_IWOTH);
if (mkdir(tmpDirPath, S_IRWXU) == -1) {
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
return false;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
index 67420a252..0a435e91c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
@@ -95,4 +95,4 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
}
}
-}
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
index cc3a24a22..f89d3d7a0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
@@ -17,13 +17,13 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/core/dictionary/property/unigram_property.h"
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
@@ -76,7 +76,7 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
- false /* isDeleted */, false /* willBecomeNonTerminal */);
+ false /* isDeleted */, false /* willBecomeNonTerminal */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
@@ -223,11 +223,10 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
- const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
- bool *const outAddedNewBigram) {
+ const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
+ const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
- targetPtNodeParam->getTerminalId(), probability, timestamp, outAddedNewBigram)) {
+ targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
@@ -416,4 +415,4 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
-}
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
index f20d3a241..e90bc44c0 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
@@ -76,7 +76,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
- const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+ const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 9999e0692..8373dc549 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -59,13 +59,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// valid terminal DicNode.
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
+ readingHelper.readNextSiblingNode(ptNodeParams);
+ if (!ptNodeParams.representsNonWordInfo()) {
+ // Skip PtNodes that represent non-word information.
+ continue;
+ }
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
- readingHelper.readNextSiblingNode(ptNodeParams);
}
if (readingHelper.isError()) {
mIsCorrupted = true;
@@ -209,8 +213,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
}
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
- const int *const word1, const int length1, const int probability,
- const int timestamp) {
+ const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
@@ -220,9 +223,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
mDictBuffer->getTailPosition());
return false;
}
- if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
+ if (length0 > MAX_WORD_LENGTH
+ || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
- "length0: %d, length1: %d", length0, length1);
+ "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
@@ -230,14 +234,14 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
- const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
- false /* forceLowerCaseSearch */);
+ const int word1Pos = getTerminalPtNodePositionOfWord(
+ bigramProperty->getTargetCodePoints()->data(),
+ bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
bool addedNewBigram = false;
- if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
- &addedNewBigram)) {
+ if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 8f981def5..b78576484 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -21,10 +21,10 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
@@ -93,8 +93,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
- bool addBigramWords(const int *const word0, const int length0, const int *const word1,
- const int length1, const int probability, const int timestamp);
+ bool addBigramWords(const int *const word0, const int length0,
+ const BigramProperty *const bigramProperty);
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
index 12298d967..f31c50253 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
@@ -19,9 +19,9 @@
#include <cstring>
#include <queue>
-#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
+#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 7bc7b0a48..80970c7f8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -17,6 +17,10 @@
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include <cstdio>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
@@ -100,9 +104,15 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
const BufferWithExtendableBuffer *const buffer) {
- FILE *const file = fopen(filePath, "wb");
+ const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (fd == -1) {
+ AKLOGE("File %s cannot be opened. errno: %d", filePath, errno);
+ ASSERT(false);
+ return false;
+ }
+ FILE *const file = fdopen(fd, "wb");
if (!file) {
- AKLOGE("File %s cannot be opened.", filePath);
+ AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno);
ASSERT(false);
return false;
}
diff --git a/native/jni/src/utils/char_utils.cpp b/native/jni/src/utils/char_utils.cpp
index adc474b4c..b17e0847d 100644
--- a/native/jni/src/utils/char_utils.cpp
+++ b/native/jni/src/utils/char_utils.cpp
@@ -22,6 +22,9 @@
namespace latinime {
+const int CharUtils::MIN_UNICODE_CODE_POINT = 0;
+const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF;
+
struct LatinCapitalSmallPair {
unsigned short capital;
unsigned short small;
diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h
index 239419d5b..634c45b04 100644
--- a/native/jni/src/utils/char_utils.h
+++ b/native/jni/src/utils/char_utils.h
@@ -86,12 +86,19 @@ class CharUtils {
return spaceCount;
}
+ static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) {
+ return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT;
+ }
+
static unsigned short latin_tolower(const unsigned short c);
static const std::vector<int> EMPTY_STRING;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils);
+ static const int MIN_UNICODE_CODE_POINT;
+ static const int MAX_UNICODE_CODE_POINT;
+
/**
* Table mapping most combined Latin, Greek, and Cyrillic characters
* to their base characters. If c is in range, BASE_CHARS[c] == c
diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h
index 2ce02dc05..0e393e315 100644
--- a/native/jni/src/utils/jni_data_utils.h
+++ b/native/jni/src/utils/jni_data_utils.h
@@ -65,6 +65,18 @@ class JniDataUtils {
return attributeMap;
}
+ static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
+ const int maxLength, const int *const codePoints, const int codePointCount,
+ const bool needsNullTermination) {
+ const int outputCodePointCount = std::min(maxLength, codePointCount);
+ env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, codePoints);
+ if (needsNullTermination && outputCodePointCount < maxLength) {
+ const int terminal = 0;
+ env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
+ 1 /* len */, &terminal);
+ }
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
};