Skip to main content

mecab_ko_dict/
lazy_entries_v3.rs

1//! # Lazy Entry Loading — entries.bin v3 (MKE3)
2//!
3//! v3 upgrades `feature_len` from `u16` to `u32`, removing the 65535-byte
4//! feature-string limit present in v2.
5//!
6//! ## Format
7//!
8//! ```text
9//! [Header — 24 bytes]
10//!   magic:        [u8; 4]  = "MKE3"
11//!   version:      u32 (LE) = 3
12//!   count:        u32 (LE)
13//!   flags:        u16 (LE)  bit 0 = FEATURE_U32 (always set)
14//!   reserved:     u16 (LE) = 0
15//!   index_offset: u64 (LE)
16//!
17//! [Entry Records — variable length]
18//!   left_id:      u16 (LE)
19//!   right_id:     u16 (LE)
20//!   cost:         i16 (LE)
21//!   surface_len:  u16 (LE)
22//!   feature_len:  u32 (LE)   ← upgraded from u16
23//!   surface:      [u8; surface_len]
24//!   feature:      [u8; feature_len]
25//!
26//! [Index Table — count × 8 bytes]
27//!   offset_N: u64 (LE)
28//! ```
29
30use std::io::{Read, Seek, SeekFrom};
31use std::num::NonZeroUsize;
32use std::path::{Path, PathBuf};
33use std::sync::{Arc, RwLock};
34
35use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
36use memmap2::Mmap;
37
38use crate::dictionary::DictEntry;
39use crate::error::{DictError, Result};
40
41/// Magic bytes identifying an MKE3 file.
42pub const ENTRIES_V3_MAGIC: &[u8; 4] = b"MKE3";
43/// Format version stored in the header.
44pub const ENTRIES_V3_VERSION: u32 = 3;
45/// Size of the MKE3 file header in bytes.
46pub const HEADER_V3_SIZE: usize = 24;
47
48/// Header flag bit: `feature_len` field is `u32`.  Always set in v3.
49pub const FEATURE_U32: u16 = 1;
50
51const DEFAULT_CACHE_SIZE: usize = 10_000;
52
53// SAFETY: DEFAULT_CACHE_SIZE = 10_000 > 0.
54const DEFAULT_CACHE_SIZE_NZ: NonZeroUsize = {
55    match NonZeroUsize::new(DEFAULT_CACHE_SIZE) {
56        Some(n) => n,
57        None => panic!("DEFAULT_CACHE_SIZE must be > 0"),
58    }
59};
60
61/// Detected entries.bin format version.
62#[non_exhaustive]
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum EntriesFormat {
65    /// `MKED` — legacy v1
66    V1,
67    /// `MKE2` — v2 with u16 `feature_len`
68    V2,
69    /// `MKE3` — v3 with u32 `feature_len`
70    V3,
71}
72
73/// Read the first 4 bytes of a file and return the format.
74///
75/// # Errors
76///
77/// Returns `DictError::Io` if the file cannot be read, or
78/// `DictError::Format` if the magic bytes are unrecognised.
79pub fn detect_entries_format<P: AsRef<Path>>(path: P) -> Result<EntriesFormat> {
80    use std::io::Read as _;
81    let mut file = std::fs::File::open(path.as_ref()).map_err(DictError::Io)?;
82    let mut magic = [0u8; 4];
83    file.read_exact(&mut magic)
84        .map_err(|e| DictError::Format(format!("cannot read magic: {e}")))?;
85    match &magic {
86        b"MKE3" => Ok(EntriesFormat::V3),
87        b"MKE2" => Ok(EntriesFormat::V2),
88        b"MKED" => Ok(EntriesFormat::V1),
89        _ => Err(DictError::Format(format!(
90            "unknown magic bytes: {magic:?}"
91        ))),
92    }
93}
94
95/// Lazy-loading entry store backed by an MKE3 memory-mapped file.
96pub struct LazyEntriesV3 {
97    #[allow(dead_code)]
98    path: PathBuf,
99    mmap: Mmap,
100    count: u32,
101    index_offset: u64,
102    flags: u16,
103    cache: RwLock<lru::LruCache<u32, Arc<DictEntry>>>,
104}
105
106impl LazyEntriesV3 {
107    /// Open an MKE3 file and memory-map it.
108    ///
109    /// # Errors
110    ///
111    /// Returns `DictError::Format` for invalid headers and
112    /// `DictError::Io` for I/O failures.
113    #[allow(unsafe_code)]
114    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
115        let path = path.as_ref().to_path_buf();
116        let file = std::fs::File::open(&path).map_err(DictError::Io)?;
117        // SAFETY: The file is opened read-only; the mmap is immutable for
118        // the lifetime of this struct, and no writes occur through it.
119        let mmap = unsafe { Mmap::map(&file).map_err(DictError::Io)? };
120
121        if mmap.len() < HEADER_V3_SIZE {
122            return Err(DictError::Format("MKE3: file too small".into()));
123        }
124
125        let mut cur = std::io::Cursor::new(&mmap[..]);
126
127        let mut magic = [0u8; 4];
128        cur.read_exact(&mut magic)
129            .map_err(|e| DictError::Format(format!("MKE3: cannot read magic: {e}")))?;
130        if &magic != ENTRIES_V3_MAGIC {
131            return Err(DictError::Format(
132                "MKE3: invalid magic (expected MKE3)".into(),
133            ));
134        }
135
136        let version = cur
137            .read_u32::<LittleEndian>()
138            .map_err(|e| DictError::Format(format!("MKE3: cannot read version: {e}")))?;
139        if version != ENTRIES_V3_VERSION {
140            return Err(DictError::Format(format!(
141                "MKE3: unsupported version {version}"
142            )));
143        }
144
145        let count = cur
146            .read_u32::<LittleEndian>()
147            .map_err(|e| DictError::Format(format!("MKE3: cannot read count: {e}")))?;
148
149        let flags = cur
150            .read_u16::<LittleEndian>()
151            .map_err(|e| DictError::Format(format!("MKE3: cannot read flags: {e}")))?;
152
153        // reserved u16 — skip
154        cur.read_u16::<LittleEndian>()
155            .map_err(|e| DictError::Format(format!("MKE3: cannot read reserved: {e}")))?;
156
157        let index_offset = cur
158            .read_u64::<LittleEndian>()
159            .map_err(|e| DictError::Format(format!("MKE3: cannot read index_offset: {e}")))?;
160
161        let expected_index_end = index_offset + u64::from(count) * 8;
162        if expected_index_end > mmap.len() as u64 {
163            return Err(DictError::Format(format!(
164                "MKE3: index table extends beyond file (offset={index_offset}, count={count}, file_len={})",
165                mmap.len()
166            )));
167        }
168
169        Ok(Self {
170            path,
171            mmap,
172            count,
173            index_offset,
174            flags,
175            cache: RwLock::new(lru::LruCache::new(DEFAULT_CACHE_SIZE_NZ)),
176        })
177    }
178
179    /// Total number of entries in the file.
180    #[must_use]
181    pub const fn len(&self) -> usize {
182        self.count as usize
183    }
184
185    /// Returns `true` if there are no entries.
186    #[must_use]
187    pub const fn is_empty(&self) -> bool {
188        self.count == 0
189    }
190
191    /// Number of entries currently held in the LRU cache.
192    #[must_use]
193    pub fn cached_count(&self) -> usize {
194        self.cache.read().map(|c| c.len()).unwrap_or(0)
195    }
196
197    /// Resize the LRU cache (minimum 1).
198    pub fn set_cache_size(&self, size: usize) {
199        if let Ok(mut cache) = self.cache.write() {
200            cache.resize(NonZeroUsize::new(size).unwrap_or(NonZeroUsize::new(1).unwrap()));
201        }
202    }
203
204    /// Evict all cached entries.
205    pub fn clear_cache(&self) {
206        if let Ok(mut cache) = self.cache.write() {
207            cache.clear();
208        }
209    }
210
211    /// Flags field from the header.
212    #[must_use]
213    pub const fn flags(&self) -> u16 {
214        self.flags
215    }
216
217    /// Look up entry `index`, returning a shared `Arc`.
218    ///
219    /// Cache check uses `peek` (no LRU promotion) to avoid a write lock on
220    /// every hit; the write lock is taken only on a miss.
221    ///
222    /// # Errors
223    ///
224    /// Returns `DictError::Format` when `index` is out of bounds or the
225    /// on-disk record is corrupt.
226    pub fn get(&self, index: u32) -> Result<Arc<DictEntry>> {
227        {
228            let cache = self
229                .cache
230                .read()
231                .map_err(|_| DictError::Format("MKE3: cache lock poisoned".into()))?;
232            if let Some(entry) = cache.peek(&index) {
233                return Ok(Arc::clone(entry));
234            }
235        }
236
237        let entry = Arc::new(self.load_entry_from_mmap(index)?);
238        {
239            let mut cache = self
240                .cache
241                .write()
242                .map_err(|_| DictError::Format("MKE3: cache lock poisoned".into()))?;
243            if let Some(existing) = cache.get(&index) {
244                return Ok(Arc::clone(existing));
245            }
246            cache.put(index, Arc::clone(&entry));
247        }
248        Ok(entry)
249    }
250
251    /// Collect all consecutive entries starting at `first_index` that share `surface`.
252    ///
253    /// Iterates forward from `first_index` until either the end of the file or
254    /// an entry whose surface differs from `surface`.
255    ///
256    /// # Errors
257    ///
258    /// Returns `DictError::Format` if any individual entry cannot be loaded.
259    pub fn get_entries_at(&self, first_index: u32, surface: &str) -> Result<Vec<Arc<DictEntry>>> {
260        let mut results = Vec::new();
261        let mut index = first_index;
262        while index < self.count {
263            let entry = self.get(index)?;
264            if entry.surface == surface {
265                results.push(entry);
266                index += 1;
267            } else {
268                break;
269            }
270        }
271        Ok(results)
272    }
273
274    fn entry_offset(&self, index: u32) -> Result<u64> {
275        if index >= self.count {
276            return Err(DictError::Format(format!(
277                "MKE3: index {index} out of bounds (count={})",
278                self.count
279            )));
280        }
281        let table_pos = self.index_offset + u64::from(index) * 8;
282        let mmap_len = u64::try_from(self.mmap.len())
283            .map_err(|_| DictError::Format("MKE3: mmap length overflow".into()))?;
284        if table_pos + 8 > mmap_len {
285            return Err(DictError::Format(format!(
286                "MKE3: index table overflow at position {table_pos}"
287            )));
288        }
289        let pos = usize::try_from(table_pos)
290            .map_err(|_| DictError::Format("MKE3: table position overflow".into()))?;
291        let mut cur = std::io::Cursor::new(&self.mmap[pos..]);
292        cur.read_u64::<LittleEndian>()
293            .map_err(|e| DictError::Format(format!("MKE3: cannot read entry offset: {e}")))
294    }
295
296    fn load_entry_from_mmap(&self, index: u32) -> Result<DictEntry> {
297        let offset = self.entry_offset(index)?;
298        let offset_usize = usize::try_from(offset)
299            .map_err(|_| DictError::Format("MKE3: offset overflow".into()))?;
300        if offset_usize >= self.mmap.len() {
301            return Err(DictError::Format(format!(
302                "MKE3: entry {index} offset {offset} out of mmap bounds"
303            )));
304        }
305
306        let mut cur = std::io::Cursor::new(&self.mmap[offset_usize..]);
307
308        let left_id = cur
309            .read_u16::<LittleEndian>()
310            .map_err(|e| DictError::Format(format!("MKE3 entry {index} left_id: {e}")))?;
311        let right_id = cur
312            .read_u16::<LittleEndian>()
313            .map_err(|e| DictError::Format(format!("MKE3 entry {index} right_id: {e}")))?;
314        let cost = cur
315            .read_i16::<LittleEndian>()
316            .map_err(|e| DictError::Format(format!("MKE3 entry {index} cost: {e}")))?;
317        let surface_len = cur
318            .read_u16::<LittleEndian>()
319            .map_err(|e| DictError::Format(format!("MKE3 entry {index} surface_len: {e}")))?
320            as usize;
321        let feature_len = cur
322            .read_u32::<LittleEndian>()
323            .map_err(|e| DictError::Format(format!("MKE3 entry {index} feature_len: {e}")))?
324            as usize;
325
326        let record_header = 2 + 2 + 2 + 2 + 4;
327        let remaining = self.mmap.len().saturating_sub(offset_usize + record_header);
328        if surface_len + feature_len > remaining {
329            return Err(DictError::Format(format!(
330                "MKE3 entry {index}: surface_len({surface_len}) + feature_len({feature_len}) exceeds remaining bytes({remaining})"
331            )));
332        }
333
334        let mut surface_bytes = vec![0u8; surface_len];
335        cur.read_exact(&mut surface_bytes)
336            .map_err(|e| DictError::Format(format!("MKE3 entry {index} surface: {e}")))?;
337        let surface = String::from_utf8(surface_bytes)
338            .map_err(|e| DictError::Format(format!("MKE3 entry {index} surface utf8: {e}")))?;
339
340        let mut feature_bytes = vec![0u8; feature_len];
341        cur.read_exact(&mut feature_bytes)
342            .map_err(|e| DictError::Format(format!("MKE3 entry {index} feature: {e}")))?;
343        let feature = String::from_utf8(feature_bytes)
344            .map_err(|e| DictError::Format(format!("MKE3 entry {index} feature utf8: {e}")))?;
345
346        Ok(DictEntry {
347            surface,
348            left_id,
349            right_id,
350            cost,
351            feature,
352        })
353    }
354}
355
356/// Write `entries` as an MKE3 file at `path`.
357///
358/// # Errors
359///
360/// Returns `DictError::Io` on write failure, or `DictError::Format` when
361/// the entry count or surface length exceeds supported limits.
362pub fn save_entries_v3<P: AsRef<Path>>(entries: &[DictEntry], path: P) -> Result<()> {
363    use std::io::Write;
364
365    let path = path.as_ref();
366    let mut file = std::fs::File::create(path).map_err(DictError::Io)?;
367
368    let count = u32::try_from(entries.len())
369        .map_err(|_| DictError::Format("MKE3: too many entries".into()))?;
370
371    // Header — index_offset written as 0 placeholder, patched at the end.
372    file.write_all(ENTRIES_V3_MAGIC).map_err(DictError::Io)?;
373    file.write_u32::<LittleEndian>(ENTRIES_V3_VERSION)
374        .map_err(DictError::Io)?;
375    file.write_u32::<LittleEndian>(count)
376        .map_err(DictError::Io)?;
377    file.write_u16::<LittleEndian>(FEATURE_U32)
378        .map_err(DictError::Io)?;
379    file.write_u16::<LittleEndian>(0) // reserved
380        .map_err(DictError::Io)?;
381    file.write_u64::<LittleEndian>(0) // placeholder
382        .map_err(DictError::Io)?;
383
384    let mut offsets: Vec<u64> = Vec::with_capacity(entries.len());
385
386    for entry in entries {
387        let offset = file.stream_position().map_err(DictError::Io)?;
388        offsets.push(offset);
389
390        file.write_u16::<LittleEndian>(entry.left_id)
391            .map_err(DictError::Io)?;
392        file.write_u16::<LittleEndian>(entry.right_id)
393            .map_err(DictError::Io)?;
394        file.write_i16::<LittleEndian>(entry.cost)
395            .map_err(DictError::Io)?;
396
397        let surface_bytes = entry.surface.as_bytes();
398        let surface_len = u16::try_from(surface_bytes.len())
399            .map_err(|_| DictError::Format("MKE3: surface too long".into()))?;
400        file.write_u16::<LittleEndian>(surface_len)
401            .map_err(DictError::Io)?;
402
403        let feature_bytes = entry.feature.as_bytes();
404        let feature_len = u32::try_from(feature_bytes.len())
405            .map_err(|_| DictError::Format("MKE3: feature too long".into()))?;
406        file.write_u32::<LittleEndian>(feature_len)
407            .map_err(DictError::Io)?;
408
409        file.write_all(surface_bytes).map_err(DictError::Io)?;
410        file.write_all(feature_bytes).map_err(DictError::Io)?;
411    }
412
413    // Index table
414    let index_offset = file.stream_position().map_err(DictError::Io)?;
415    for offset in offsets {
416        file.write_u64::<LittleEndian>(offset)
417            .map_err(DictError::Io)?;
418    }
419
420    // Patch index_offset in header (starts at byte 16: 4+4+4+2+2 = 16)
421    file.seek(SeekFrom::Start(16)).map_err(DictError::Io)?;
422    file.write_u64::<LittleEndian>(index_offset)
423        .map_err(DictError::Io)?;
424
425    Ok(())
426}
427
428/// Migrate an entries.bin file from v2 (MKE2) format to v3 (MKE3).
429///
430/// Reads all entries from the v2 file, then writes them in v3 format.
431///
432/// # Errors
433///
434/// Returns an error if the source cannot be read or the destination
435/// cannot be written.
436pub fn migrate_v2_to_v3<P: AsRef<Path>, Q: AsRef<Path>>(
437    v2_path: P,
438    v3_path: Q,
439) -> Result<usize> {
440    use crate::lazy_entries::LazyEntries;
441
442    let v2 = LazyEntries::from_file(v2_path)?;
443    let count = v2.len();
444    let entries = v2.load_all()?;
445    save_entries_v3(&entries, v3_path)?;
446    Ok(count)
447}
448
449#[cfg(test)]
450mod tests {
451    #![allow(clippy::expect_used, clippy::unwrap_used)]
452
453    use super::*;
454    use tempfile::tempdir;
455
456    fn sample_entries() -> Vec<DictEntry> {
457        vec![
458            DictEntry::new("안녕", 1, 1, 100, "NNG,*,T,안녕,*,*,*,*"),
459            DictEntry::new("하세요", 2, 2, 50, "VV,*,F,하세요,*,*,*,*"),
460            DictEntry::new("감사", 3, 3, 80, "NNG,*,F,감사,*,*,*,*"),
461            DictEntry::new("합니다", 4, 4, -10, "XSV,*,F,합니다,*,*,*,*"),
462            DictEntry::new("가", 5, 5, 200, "JKS,*,F,가,*,*,*,*"),
463        ]
464    }
465
466    #[test]
467    fn test_v3_roundtrip() {
468        let entries = sample_entries();
469        let dir = tempdir().expect("tempdir");
470        let path = dir.path().join("entries_v3.bin");
471
472        save_entries_v3(&entries, &path).expect("save");
473
474        let lazy = LazyEntriesV3::from_file(&path).expect("load");
475        assert_eq!(lazy.len(), 5);
476        assert!(!lazy.is_empty());
477        assert_eq!(lazy.flags() & FEATURE_U32, FEATURE_U32);
478
479        for (i, expected) in entries.iter().enumerate() {
480            let got = lazy.get(i as u32).expect("get");
481            assert_eq!(got.surface, expected.surface, "surface[{i}]");
482            assert_eq!(got.left_id, expected.left_id, "left_id[{i}]");
483            assert_eq!(got.right_id, expected.right_id, "right_id[{i}]");
484            assert_eq!(got.cost, expected.cost, "cost[{i}]");
485            assert_eq!(got.feature, expected.feature, "feature[{i}]");
486        }
487
488        assert!(lazy.get(5).is_err());
489    }
490
491    #[test]
492    fn test_v3_large_feature() {
493        // v2's u16 cap is 65535; this exceeds it.
494        let large_feature = "X".repeat(70_000);
495        let entry = DictEntry::new("테스트", 10, 10, 0, &large_feature);
496
497        let dir = tempdir().expect("tempdir");
498        let path = dir.path().join("large_feature.bin");
499
500        save_entries_v3(&[entry], &path).expect("save large feature");
501
502        let lazy = LazyEntriesV3::from_file(&path).expect("load");
503        assert_eq!(lazy.len(), 1);
504
505        let got = lazy.get(0).expect("get");
506        assert_eq!(got.surface, "테스트");
507        assert_eq!(got.feature.len(), 70_000);
508        assert!(got.feature.chars().all(|c| c == 'X'));
509    }
510
511    #[test]
512    fn test_detect_format() {
513        let dir = tempdir().expect("tempdir");
514
515        let v3_path = dir.path().join("v3.bin");
516        save_entries_v3(&sample_entries(), &v3_path).expect("save v3");
517        assert_eq!(
518            detect_entries_format(&v3_path).expect("detect v3"),
519            EntriesFormat::V3
520        );
521
522        // Write a fake v2 header to check detection.
523        let v2_path = dir.path().join("v2.bin");
524        {
525            use std::io::Write;
526            let mut f = std::fs::File::create(&v2_path).expect("create v2 file");
527            f.write_all(b"MKE2").expect("write magic");
528        }
529        assert_eq!(
530            detect_entries_format(&v2_path).expect("detect v2"),
531            EntriesFormat::V2
532        );
533
534        // Write a fake v1 header.
535        let v1_path = dir.path().join("v1.bin");
536        {
537            use std::io::Write;
538            let mut f = std::fs::File::create(&v1_path).expect("create v1 file");
539            f.write_all(b"MKED").expect("write magic");
540        }
541        assert_eq!(
542            detect_entries_format(&v1_path).expect("detect v1"),
543            EntriesFormat::V1
544        );
545
546        // Unknown magic → error.
547        let unk_path = dir.path().join("unk.bin");
548        {
549            use std::io::Write;
550            let mut f = std::fs::File::create(&unk_path).expect("create unk file");
551            f.write_all(b"????").expect("write magic");
552        }
553        assert!(detect_entries_format(&unk_path).is_err());
554    }
555
556    #[test]
557    fn test_get_entries_at() {
558        let dir = tempfile::tempdir().expect("tempdir");
559        let path = dir.path().join("entries_v3.bin");
560
561        let entries = vec![
562            DictEntry::new("가", 1, 1, 100, "NNG"),
563            DictEntry::new("가", 2, 2, 50, "JKS"),
564            DictEntry::new("나", 3, 3, 200, "NP"),
565        ];
566        save_entries_v3(&entries, &path).expect("save");
567
568        let lazy = LazyEntriesV3::from_file(&path).expect("load");
569
570        let results = lazy.get_entries_at(0, "가").expect("get_entries_at");
571        assert_eq!(results.len(), 2);
572        assert_eq!(results[0].feature, "NNG");
573        assert_eq!(results[1].feature, "JKS");
574
575        let results = lazy.get_entries_at(2, "나").expect("get_entries_at");
576        assert_eq!(results.len(), 1);
577        assert_eq!(results[0].surface, "나");
578
579        let results = lazy.get_entries_at(0, "다").expect("get_entries_at");
580        assert!(results.is_empty());
581    }
582
583    #[test]
584    fn test_v3_cache() {
585        let entries = sample_entries();
586        let dir = tempdir().expect("tempdir");
587        let path = dir.path().join("cache_test.bin");
588
589        save_entries_v3(&entries, &path).expect("save");
590
591        let lazy = LazyEntriesV3::from_file(&path).expect("load");
592
593        assert_eq!(lazy.cached_count(), 0);
594
595        let _ = lazy.get(0).expect("get 0");
596        assert_eq!(lazy.cached_count(), 1);
597
598        let _ = lazy.get(0).expect("get 0 again");
599        assert_eq!(lazy.cached_count(), 1, "no duplicate on repeated get");
600
601        let _ = lazy.get(1).expect("get 1");
602        assert_eq!(lazy.cached_count(), 2);
603
604        lazy.clear_cache();
605        assert_eq!(lazy.cached_count(), 0);
606
607        // set_cache_size to 1 and verify LRU eviction.
608        lazy.set_cache_size(1);
609        let _ = lazy.get(0).expect("get 0");
610        let _ = lazy.get(1).expect("get 1");
611        assert_eq!(lazy.cached_count(), 1);
612    }
613
614    #[test]
615    fn test_migrate_v2_to_v3() {
616        use crate::lazy_entries::LazyEntries;
617
618        let entries = vec![
619            DictEntry::new("가", 1, 1, 100, "NNG"),
620            DictEntry::new("가", 2, 2, 50, "JKS"),
621            DictEntry::new("나", 3, 3, 200, "NP"),
622        ];
623
624        let dir = tempdir().expect("tempdir");
625        let v2_path = dir.path().join("entries_v2.bin");
626        let v3_path = dir.path().join("entries_v3.bin");
627
628        LazyEntries::save_entries(&entries, &v2_path).expect("save v2");
629
630        let count = migrate_v2_to_v3(&v2_path, &v3_path).expect("migrate");
631        assert_eq!(count, 3);
632
633        assert_eq!(
634            detect_entries_format(&v3_path).expect("detect"),
635            EntriesFormat::V3
636        );
637
638        let v3 = LazyEntriesV3::from_file(&v3_path).expect("load v3");
639        assert_eq!(v3.len(), 3);
640
641        let e0 = v3.get(0).expect("get 0");
642        assert_eq!(e0.surface, "가");
643        assert_eq!(e0.left_id, 1);
644        assert_eq!(e0.feature, "NNG");
645
646        let e2 = v3.get(2).expect("get 2");
647        assert_eq!(e2.surface, "나");
648        assert_eq!(e2.feature, "NP");
649    }
650}