aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h3
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp9
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp2
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h11
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp6
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h50
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h37
9 files changed, 113 insertions, 15 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index a9c7805a8..7c06a7117 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -30,8 +30,8 @@ namespace latinime {
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
public:
// Reads information from existing dictionary buffer.
- HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
- : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
+ : mDictFormatVersion(formatVersion),
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 4b4c39dfa..31e6186b7 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -34,7 +35,7 @@ class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_2),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
index 2c722e8ed..1a924c177 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h
@@ -23,6 +23,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
@@ -33,9 +34,10 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
- : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
+ : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_3),
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
- mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
+ mBuffer->getBufferSize() - mHeaderPolicy.getSize(),
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
index e149d6371..d856c50f4 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp
@@ -149,7 +149,8 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
@@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
- MAX_DICTIONARY_SIZE);
+ BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
int unigramCount = 0;
int bigramCount = 0;
if (mNeedsToDecay) {
@@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return;
}
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
return;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index f692882f2..5032131ab 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,7 +18,7 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
+const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
// TODO: Needs to allocate larger memory corresponding to the current vector size.
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 9dc34823c..1e27a1bec 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,12 +32,20 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
+ static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
+
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
- const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
+ const int maxAdditionalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+ // Without original buffer.
+ BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
+ : mOriginalBuffer(0), mOriginalBufferSize(0),
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
+
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
@@ -86,7 +94,6 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index f65583ee4..b48e5b005 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -44,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
0 /* extendedRegionSize */);
- BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer bodyBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
new file mode 100644
index 000000000..59b894fa6
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_FILE_UTILS_H
+#define LATINIME_FILE_UTILS_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "defines.h"
+
+namespace latinime {
+
+class FileUtils {
+ public:
+ // Returns -1 on error.
+ static int getFileSize(const char *const filePath) {
+ const int fd = open(filePath, O_RDONLY);
+ if (fd == -1) {
+ return -1;
+ }
+ struct stat statBuf;
+ if (fstat(fd, &statBuf) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return static_cast<int>(statBuf.st_size);
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
+};
+} // namespace latinime
+#endif /* LATINIME_FILE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
index 6b69116eb..82138355d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h
@@ -18,18 +18,21 @@
#define LATINIME_MMAPPED_BUFFER_H
#include <cerrno>
+#include <climits>
+#include <cstdio>
#include <fcntl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include "defines.h"
+#include "suggest/policyimpl/dictionary/utils/file_utils.h"
namespace latinime {
class MmappedBuffer {
public:
- static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
+ static MmappedBuffer *openBuffer(const char *const path, const int bufferOffset,
const int bufferSize, const bool isUpdatable) {
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
const int mmapFd = open(path, openMode);
@@ -59,7 +62,34 @@ class MmappedBuffer {
isUpdatable);
}
+ // Mmap entire file.
+ static MmappedBuffer *openBuffer(const char *const path, const bool isUpdatable) {
+ const int fileSize = FileUtils::getFileSize(path);
+ if (fileSize == -1) {
+ return 0;
+ } else if (fileSize == 0) {
+ return new MmappedBuffer(isUpdatable);
+ } else {
+ return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
+ }
+ }
+
+ static MmappedBuffer *openBuffer(const char *const dirPath, const char *const fileName,
+ const bool isUpdatable) {
+ const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
+ char filePath[filePathBufferSize];
+ const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
+ fileName);
+ if (filePathLength >= filePathBufferSize) {
+ return 0;
+ }
+ return openBuffer(filePath, isUpdatable);
+ }
+
~MmappedBuffer() {
+ if (mAlignedSize == 0) {
+ return;
+ }
int ret = munmap(mMmappedBuffer, mAlignedSize);
if (ret != 0) {
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
@@ -89,6 +119,11 @@ class MmappedBuffer {
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
+ // Empty file. We have to handle an empty file as a valid part of a dictionary.
+ AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
+ : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
+ mIsUpdatable(isUpdatable) {}
+
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
uint8_t *const mBuffer;