mecab_ko_dict/trie/
owned.rs1use std::borrow::Cow;
4#[cfg(feature = "zstd")]
5use std::io::{Read, Write as IoWrite};
6use std::path::Path;
7
8use yada::{builder::DoubleArrayBuilder, DoubleArray};
9
10use crate::error::{DictError, Result};
11
12use super::backend::PrefixSearchResult;
13
14pub struct Trie<'a> {
19 da: DoubleArray<Cow<'a, [u8]>>,
21}
22
23impl<'a> Trie<'a> {
24 #[must_use]
26 pub fn new(bytes: &'a [u8]) -> Self {
27 Self {
28 da: DoubleArray::new(Cow::Borrowed(bytes)),
29 }
30 }
31
32 #[must_use]
34 pub fn from_vec(bytes: Vec<u8>) -> Trie<'static> {
35 Trie {
36 da: DoubleArray::new(Cow::Owned(bytes)),
37 }
38 }
39
40 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Trie<'static>> {
46 let bytes = std::fs::read(path.as_ref()).map_err(DictError::Io)?;
47 Ok(Self::from_vec(bytes))
48 }
49
50 #[cfg(feature = "zstd")]
56 pub fn from_compressed_file<P: AsRef<Path>>(path: P) -> Result<Trie<'static>> {
57 let file = std::fs::File::open(path.as_ref()).map_err(DictError::Io)?;
58 let mut decoder = zstd::Decoder::new(file).map_err(DictError::Io)?;
59 let mut bytes = Vec::new();
60 decoder.read_to_end(&mut bytes).map_err(DictError::Io)?;
61 Ok(Self::from_vec(bytes))
62 }
63
64 #[cfg(not(feature = "zstd"))]
70 pub fn from_compressed_file<P: AsRef<Path>>(_path: P) -> Result<Trie<'static>> {
71 Err(DictError::Format(
72 "zstd feature is not enabled. Use uncompressed files or enable the 'zstd' feature."
73 .to_string(),
74 ))
75 }
76
77 #[must_use]
87 pub fn exact_match(&self, key: &str) -> Option<u32> {
88 self.da.exact_match_search(key.as_bytes())
89 }
90
91 #[must_use]
93 pub fn exact_match_bytes(&self, key: &[u8]) -> Option<u32> {
94 self.da.exact_match_search(key)
95 }
96
97 pub fn common_prefix_search<'b>(
111 &'b self,
112 text: &'b str,
113 ) -> impl Iterator<Item = (u32, usize)> + 'b {
114 self.da.common_prefix_search(text.as_bytes())
115 }
116
117 pub fn common_prefix_search_bytes<'b>(
119 &'b self,
120 key: &'b [u8],
121 ) -> impl Iterator<Item = (u32, usize)> + 'b {
122 self.da.common_prefix_search(key)
123 }
124
125 #[must_use]
127 pub fn common_prefix_search_at(
128 &self,
129 text: &str,
130 start_byte: usize,
131 ) -> PrefixSearchResult {
132 if start_byte >= text.len() {
133 return PrefixSearchResult::new();
134 }
135
136 let suffix = &text[start_byte..];
137 self.da
138 .common_prefix_search(suffix.as_bytes())
139 .map(|(value, len)| (value, start_byte + len))
140 .collect()
141 }
142}
143
144pub struct TrieBuilder;
148
149impl TrieBuilder {
150 pub fn build(entries: &[(&str, u32)]) -> Result<Vec<u8>> {
164 if entries.is_empty() {
165 return Err(DictError::Format(
166 "Cannot build Trie from empty entries".to_string(),
167 ));
168 }
169
170 let keyset: Vec<_> = entries.iter().map(|(k, v)| (k.as_bytes(), *v)).collect();
171
172 DoubleArrayBuilder::build(&keyset)
173 .ok_or_else(|| DictError::Format("Failed to build Double-Array Trie".to_string()))
174 }
175
176 pub fn build_bytes(entries: &[(&[u8], u32)]) -> Result<Vec<u8>> {
182 if entries.is_empty() {
183 return Err(DictError::Format(
184 "Cannot build Trie from empty entries".to_string(),
185 ));
186 }
187
188 DoubleArrayBuilder::build(entries)
189 .ok_or_else(|| DictError::Format("Failed to build Double-Array Trie".to_string()))
190 }
191
192 pub fn build_unsorted(entries: &mut [(&str, u32)]) -> Result<Vec<u8>> {
198 entries.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
199 Self::build(entries)
200 }
201
202 pub fn save_to_file<P: AsRef<Path>>(bytes: &[u8], path: P) -> Result<()> {
208 std::fs::write(path.as_ref(), bytes).map_err(DictError::Io)
209 }
210
211 #[cfg(feature = "zstd")]
217 pub fn save_to_compressed_file<P: AsRef<Path>>(
218 bytes: &[u8],
219 path: P,
220 level: i32,
221 ) -> Result<()> {
222 let file = std::fs::File::create(path.as_ref()).map_err(DictError::Io)?;
223 let mut encoder = zstd::Encoder::new(file, level).map_err(DictError::Io)?;
224 encoder.write_all(bytes).map_err(DictError::Io)?;
225 encoder.finish().map_err(DictError::Io)?;
226 Ok(())
227 }
228
229 #[cfg(not(feature = "zstd"))]
235 pub fn save_to_compressed_file<P: AsRef<Path>>(
236 _bytes: &[u8],
237 _path: P,
238 _level: i32,
239 ) -> Result<()> {
240 Err(DictError::Format(
241 "zstd feature is not enabled. Use uncompressed files or enable the 'zstd' feature."
242 .to_string(),
243 ))
244 }
245}