mecab_ko_dict/
entry_store.rs1use std::sync::Arc;
12
13use crate::dictionary::DictEntry;
14use crate::error::{DictError, Result};
15use crate::lazy_entries::LazyEntries;
16use crate::lazy_entries_v3::LazyEntriesV3;
17
18pub trait EntryStore: Send + Sync {
22 fn get(&self, index: u32) -> Result<Arc<DictEntry>>;
29
30 fn get_entries_at(&self, first_index: u32, surface: &str) -> Result<Vec<Arc<DictEntry>>>;
37
38 fn len(&self) -> usize;
40
41 fn is_empty(&self) -> bool {
43 self.len() == 0
44 }
45}
46
47pub struct EagerStore {
52 entries: Vec<Arc<DictEntry>>,
54}
55
56impl EagerStore {
57 #[must_use]
61 pub fn new(entries: Vec<DictEntry>) -> Self {
62 Self {
63 entries: entries.into_iter().map(Arc::new).collect(),
64 }
65 }
66
67 #[must_use]
69 pub const fn from_arc_vec(entries: Vec<Arc<DictEntry>>) -> Self {
70 Self { entries }
71 }
72
73 #[cfg(test)]
75 #[must_use]
76 pub fn entries(&self) -> &[Arc<DictEntry>] {
77 &self.entries
78 }
79}
80
81impl EntryStore for EagerStore {
82 fn get(&self, index: u32) -> Result<Arc<DictEntry>> {
83 self.entries.get(index as usize).cloned().ok_or_else(|| {
84 DictError::Format(format!(
85 "entry index out of bounds: {} >= {}",
86 index,
87 self.entries.len()
88 ))
89 })
90 }
91
92 fn get_entries_at(&self, first_index: u32, surface: &str) -> Result<Vec<Arc<DictEntry>>> {
93 let start = first_index as usize;
94 let results: Vec<Arc<DictEntry>> = self
95 .entries
96 .get(start..)
97 .unwrap_or(&[])
98 .iter()
99 .take_while(|e| e.surface == surface)
100 .cloned()
101 .collect();
102 Ok(results)
103 }
104
105 fn len(&self) -> usize {
106 self.entries.len()
107 }
108}
109
110macro_rules! impl_lazy_store {
126 (
127 struct_doc = $struct_doc:literal,
128 new_doc = $new_doc:literal,
129 $name:ident,
130 $inner_type:ty
131 ) => {
132 #[doc = $struct_doc]
133 pub struct $name {
134 lazy_entries: $inner_type,
135 }
136
137 impl $name {
138 #[doc = $new_doc]
139 #[must_use]
140 pub const fn new(lazy_entries: $inner_type) -> Self {
141 Self { lazy_entries }
142 }
143
144 #[must_use]
146 pub fn cached_count(&self) -> usize {
147 self.lazy_entries.cached_count()
148 }
149
150 pub fn set_cache_size(&self, size: usize) {
152 self.lazy_entries.set_cache_size(size);
153 }
154
155 pub fn clear_cache(&self) {
157 self.lazy_entries.clear_cache();
158 }
159 }
160
161 impl EntryStore for $name {
162 fn get(&self, index: u32) -> Result<Arc<DictEntry>> {
163 self.lazy_entries.get(index)
164 }
165
166 fn get_entries_at(
167 &self,
168 first_index: u32,
169 surface: &str,
170 ) -> Result<Vec<Arc<DictEntry>>> {
171 self.lazy_entries.get_entries_at(first_index, surface)
172 }
173
174 fn len(&self) -> usize {
175 self.lazy_entries.len()
176 }
177 }
178 };
179}
180
181impl_lazy_store!(
182 struct_doc = "Lazy 로드 저장소.\n\n엔트리를 필요할 때만 디스크에서 읽어옵니다.\n메모리 사용량을 줄이고 싶을 때 사용합니다.",
183 new_doc = "새 Lazy 저장소 생성.",
184 LazyStore,
185 LazyEntries
186);
187
188impl_lazy_store!(
189 struct_doc = "Lazy 로드 저장소 (v3 포맷).\n\nMKE3 v3 포맷의 엔트리를 필요할 때만 디스크에서 읽어옵니다.",
190 new_doc = "새 v3 Lazy 저장소 생성.",
191 LazyStoreV3,
192 LazyEntriesV3
193);
194
195#[cfg(test)]
196mod tests {
197 #![allow(clippy::expect_used, clippy::unwrap_used)]
198
199 use super::*;
200
201 fn sample_entries() -> Vec<DictEntry> {
202 vec![
203 DictEntry::new("가", 1, 1, 100, "NNG"),
204 DictEntry::new("가", 2, 2, 50, "JKS"),
205 DictEntry::new("나", 3, 3, 200, "NP"),
206 ]
207 }
208
209 #[test]
210 fn test_eager_store_get() {
211 let store = EagerStore::new(sample_entries());
212
213 let entry = store.get(0).expect("should get entry 0");
214 assert_eq!(entry.surface, "가");
215 assert_eq!(entry.left_id, 1);
216
217 let entry = store.get(1).expect("should get entry 1");
218 assert_eq!(entry.surface, "가");
219 assert_eq!(entry.left_id, 2);
220
221 assert!(store.get(100).is_err());
222 }
223
224 #[test]
225 fn test_eager_store_get_entries_at() {
226 let store = EagerStore::new(sample_entries());
227
228 let entries = store.get_entries_at(0, "가").expect("should get entries");
230 assert_eq!(entries.len(), 2);
231 assert_eq!(entries[0].feature, "NNG");
232 assert_eq!(entries[1].feature, "JKS");
233
234 let entries = store.get_entries_at(2, "나").expect("should get entries");
236 assert_eq!(entries.len(), 1);
237 assert_eq!(entries[0].surface, "나");
238
239 let entries = store.get_entries_at(0, "다").expect("should get entries");
241 assert!(entries.is_empty());
242 }
243
244 #[test]
245 fn test_eager_store_len() {
246 let store = EagerStore::new(sample_entries());
247 assert_eq!(store.len(), 3);
248 assert!(!store.is_empty());
249
250 let empty_store = EagerStore::new(Vec::new());
251 assert_eq!(empty_store.len(), 0);
252 assert!(empty_store.is_empty());
253 }
254
255 #[test]
256 fn test_lazy_store_roundtrip() {
257 use tempfile::tempdir;
258
259 let entries = sample_entries();
260 let dir = tempdir().expect("create temp dir");
261 let path = dir.path().join("entries.bin");
262
263 LazyEntries::save_entries(&entries, &path).expect("save");
265
266 let lazy = LazyEntries::from_file(&path).expect("load");
268 let store = LazyStore::new(lazy);
269
270 assert_eq!(store.len(), 3);
271
272 let entry = store.get(0).expect("get 0");
273 assert_eq!(entry.surface, "가");
274
275 let entries = store.get_entries_at(0, "가").expect("get_entries_at");
276 assert_eq!(entries.len(), 2);
277 }
278
279 #[test]
280 fn test_lazy_store_cache() {
281 use tempfile::tempdir;
282
283 let entries = sample_entries();
284 let dir = tempdir().expect("create temp dir");
285 let path = dir.path().join("entries.bin");
286
287 LazyEntries::save_entries(&entries, &path).expect("save");
288 let lazy = LazyEntries::from_file(&path).expect("load");
289 let store = LazyStore::new(lazy);
290
291 assert_eq!(store.cached_count(), 0);
292
293 let _ = store.get(0).expect("get 0");
294 assert_eq!(store.cached_count(), 1);
295
296 store.clear_cache();
297 assert_eq!(store.cached_count(), 0);
298 }
299
300 #[test]
301 fn test_lazy_store_v3_roundtrip() {
302 use crate::lazy_entries_v3::{save_entries_v3, LazyEntriesV3};
303 use tempfile::tempdir;
304
305 let entries = sample_entries();
306 let dir = tempdir().expect("create temp dir");
307 let path = dir.path().join("entries_v3.bin");
308
309 save_entries_v3(&entries, &path).expect("save");
310
311 let lazy = LazyEntriesV3::from_file(&path).expect("load");
312 let store = LazyStoreV3::new(lazy);
313
314 assert_eq!(store.len(), 3);
315
316 let entry = store.get(0).expect("get 0");
317 assert_eq!(entry.surface, "가");
318
319 let entries = store.get_entries_at(0, "가").expect("get_entries_at");
320 assert_eq!(entries.len(), 2);
321 }
322
323 #[test]
324 fn test_lazy_store_v3_cache() {
325 use crate::lazy_entries_v3::{save_entries_v3, LazyEntriesV3};
326 use tempfile::tempdir;
327
328 let entries = sample_entries();
329 let dir = tempdir().expect("create temp dir");
330 let path = dir.path().join("entries_v3.bin");
331
332 save_entries_v3(&entries, &path).expect("save");
333 let lazy = LazyEntriesV3::from_file(&path).expect("load");
334 let store = LazyStoreV3::new(lazy);
335
336 assert_eq!(store.cached_count(), 0);
337 let _ = store.get(0).expect("get 0");
338 assert_eq!(store.cached_count(), 1);
339 store.clear_cache();
340 assert_eq!(store.cached_count(), 0);
341 }
342}