mecab_ko_dict/matrix/
mmap.rs1use std::path::Path;
4
5use crate::error::{DictError, Result};
6
7use super::{dense::DenseMatrix, parse_matrix_header, Matrix, INVALID_CONNECTION_COST};
8
9pub struct MmapMatrix {
19 lsize: usize,
21 rsize: usize,
23 header_size: usize,
25 mmap: memmap2::Mmap,
27}
28
29impl MmapMatrix {
30 #[allow(unsafe_code)]
41 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
42 let file = std::fs::File::open(path.as_ref()).map_err(DictError::Io)?;
43
44 let mmap = unsafe { memmap2::Mmap::map(&file).map_err(DictError::Io)? };
47
48 let header = parse_matrix_header(&mmap)?;
50
51 let expected_size = header.header_size + header.lsize * header.rsize * 2;
52 if mmap.len() != expected_size {
53 return Err(DictError::Format(format!(
54 "Matrix file size mismatch: expected {} bytes, got {}",
55 expected_size,
56 mmap.len()
57 )));
58 }
59
60 Ok(Self {
61 lsize: header.lsize,
62 rsize: header.rsize,
63 header_size: header.header_size,
64 mmap,
65 })
66 }
67
68 pub fn from_compressed_file<P: AsRef<Path>>(path: P) -> Result<DenseMatrix> {
76 DenseMatrix::from_compressed_file(path)
78 }
79
80 #[inline]
81 const fn offset(&self, right_id: u16, left_id: u16) -> usize {
82 self.header_size + (right_id as usize + self.lsize * left_id as usize) * 2
83 }
84}
85
86impl Matrix for MmapMatrix {
87 #[inline(always)]
88 fn get(&self, right_id: u16, left_id: u16) -> i32 {
89 let offset = self.offset(right_id, left_id);
90 if offset + 2 <= self.mmap.len() {
91 let bytes = [self.mmap[offset], self.mmap[offset + 1]];
92 i32::from(i16::from_le_bytes(bytes))
93 } else {
94 INVALID_CONNECTION_COST
95 }
96 }
97
98 fn left_size(&self) -> usize {
99 self.lsize
100 }
101
102 fn right_size(&self) -> usize {
103 self.rsize
104 }
105}