aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeisuke Kuroyanagi <ksk@google.com>2013-09-24 10:24:28 +0000
committerAndroid (Google) Code Review <android-gerrit@google.com>2013-09-24 10:24:28 +0000
commit3e6b68bc0ef63b7e527976dee00d02770f7d3e5c (patch)
tree191dbbe821d3661e941b62625a774c6b8f71455e
parent3d68b066626d7e58cbe2853cd186b1ad75b90259 (diff)
parent3d2802ef2e28a45374ebb70fa2303093571101ad (diff)
downloadlatinime-3e6b68bc0ef63b7e527976dee00d02770f7d3e5c.tar.gz
latinime-3e6b68bc0ef63b7e527976dee00d02770f7d3e5c.tar.xz
latinime-3e6b68bc0ef63b7e527976dee00d02770f7d3e5c.zip
Merge "Allow large memory space for GC."
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp4
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp5
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h1
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp15
-rw-r--r--native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h25
-rw-r--r--tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java46
6 files changed, 79 insertions, 17 deletions
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 051da1be3..42397c19e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -268,8 +268,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC() const {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
- // TODO: Implement.
- return false;
+ // TODO: Implement more properly.
+ return mBufferWithExtendableBuffer.isNearSizeLimit();
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index d2b9a5e97..a51ae5e1d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -35,6 +35,8 @@ namespace latinime {
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
".tmp";
+// TODO: Make MAX_DICTIONARY_SIZE 8MB.
+const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
@@ -154,7 +156,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
return;
}
- BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
+ MAX_DICTIONARY_SIZE);
if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
return;
}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index d8058cc4e..028fa6075 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -86,6 +86,7 @@ class DynamicPatriciaTrieWritingHelper {
static const int CHILDREN_POSITION_FIELD_SIZE;
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
+ static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
index 0fed275e9..f692882f2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp
@@ -18,9 +18,10 @@
namespace latinime {
-const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
-const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
+const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
+// TODO: Needs to allocate larger memory corresponding to the current vector size.
+const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size,
int *const pos) {
@@ -64,6 +65,16 @@ bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *co
return true;
}
+bool BufferWithExtendableBuffer::extendBuffer() {
+ const size_t sizeAfterExtending =
+ mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
+ if (sizeAfterExtending > mMaxAdditionalBufferSize) {
+ return false;
+ }
+ mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
+ return true;
+}
+
bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int size) {
if (isInAdditionalBuffer(pos)) {
const int tailPosition = getTailPosition();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index c6a484131..17d2e39c2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -32,9 +32,11 @@ namespace latinime {
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public:
- BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
+ BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
+ const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
- mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
+ mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
+ mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
AK_FORCE_INLINE int getTailPosition() const {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
@@ -61,6 +63,11 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize;
}
+ AK_FORCE_INLINE bool isNearSizeLimit() const {
+ return mAdditionalBuffer.size() >= ((mMaxAdditionalBufferSize
+ * NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE) / 100);
+ }
+
/**
* For writing.
*
@@ -75,28 +82,22 @@ class BufferWithExtendableBuffer {
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
- static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
+ static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
+ const size_t mMaxAdditionalBufferSize;
// Return if the buffer is successfully extended or not.
- AK_FORCE_INLINE bool extendBuffer() {
- if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
- > MAX_ADDITIONAL_BUFFER_SIZE) {
- return false;
- }
- mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
- return true;
- }
+ bool extendBuffer();
// Returns if it is possible to write size-bytes from pos. When pos is at the tail position of
// the additional buffer, try extending the buffer.
- AK_FORCE_INLINE bool checkAndPrepareWriting(const int pos, const int size);
+ bool checkAndPrepareWriting(const int pos, const int size);
};
}
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index e26b300a8..96a2217a3 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -604,4 +604,50 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
+
+ public void testAddManyUnigramsAndFlushWithGC() {
+ final int flashWithGCIterationCount = 3;
+ final int codePointSetSize = 50;
+ final int seed = 22360679;
+
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ } catch (UnsupportedFormatException e) {
+ fail("UnsupportedFormatException while writing an initial dictionary : " + e);
+ }
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ BinaryDictionary binaryDictionary;
+ for (int i = 0; i < flashWithGCIterationCount; i++) {
+ binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ while(!binaryDictionary.needsToRunGC()) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ final int unigramProbability = random.nextInt(0xFF);
+ unigramProbabilities.put(word, unigramProbability);
+ binaryDictionary.addUnigramWord(word, unigramProbability);
+ }
+
+ for (int j = 0; j < words.size(); j++) {
+ final String word = words.get(j);
+ final int unigramProbability = unigramProbabilities.get(word);
+ assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
+ }
+
+ binaryDictionary.flushWithGC();
+ binaryDictionary.close();
+ }
+
+ dictFile.delete();
+ }
}