Skip to main content

mecab_ko_dict/matrix/
sparse.rs

1//! 희소 연접 비용 행렬 (Sparse Connection Cost Matrix)
2
3use super::{dense::DenseMatrix, Matrix};
4
5/// 희소 연접 비용 행렬 (Sparse Matrix)
6///
7/// 희소 행렬을 효율적으로 저장하는 구현입니다.
8/// 대부분의 값이 기본값인 경우 메모리를 절약합니다.
9#[derive(Debug, Clone)]
10pub struct SparseMatrix {
11    /// 좌문맥 크기
12    lsize: usize,
13    /// 우문맥 크기
14    rsize: usize,
15    /// 기본 비용 (희소 엔트리에 없는 경우)
16    default_cost: i16,
17    /// 희소 엔트리 (key: `right_id` + lsize * `left_id`, value: cost)
18    entries: std::collections::HashMap<usize, i16>,
19}
20
21impl SparseMatrix {
22    /// 새로운 희소 행렬 생성
23    #[must_use]
24    pub fn new(lsize: usize, rsize: usize, default_cost: i16) -> Self {
25        Self {
26            lsize,
27            rsize,
28            default_cost,
29            entries: std::collections::HashMap::new(),
30        }
31    }
32
33    /// 비용 설정
34    pub fn set(&mut self, right_id: u16, left_id: u16, cost: i16) {
35        let index = right_id as usize + self.lsize * left_id as usize;
36        if cost == self.default_cost {
37            self.entries.remove(&index);
38        } else {
39            self.entries.insert(index, cost);
40        }
41    }
42
43    /// `DenseMatrix에서` 변환 (기본값과 다른 엔트리만 저장)
44    #[must_use]
45    pub fn from_dense(dense: &DenseMatrix, default_cost: i16) -> Self {
46        let mut sparse = Self::new(dense.lsize, dense.rsize, default_cost);
47        for (index, &cost) in dense.costs.iter().enumerate() {
48            if cost != default_cost {
49                sparse.entries.insert(index, cost);
50            }
51        }
52        sparse
53    }
54
55    /// `DenseMatrix로` 변환
56    #[must_use]
57    pub fn to_dense(&self) -> DenseMatrix {
58        let mut costs = vec![self.default_cost; self.lsize * self.rsize];
59        for (&index, &cost) in &self.entries {
60            if index < costs.len() {
61                costs[index] = cost;
62            }
63        }
64        DenseMatrix {
65            lsize: self.lsize,
66            rsize: self.rsize,
67            costs,
68        }
69    }
70
71    /// 엔트리 수
72    #[must_use]
73    pub fn entry_count_stored(&self) -> usize {
74        self.entries.len()
75    }
76
77    /// 희소도 (0.0 ~ 1.0, 1.0 = 완전 희소)
78    #[must_use]
79    pub fn sparsity(&self) -> f64 {
80        let total = self.lsize * self.rsize;
81        if total == 0 {
82            return 0.0;
83        }
84        #[allow(clippy::cast_precision_loss)]
85        let entries_len = self.entries.len() as f64;
86        #[allow(clippy::cast_precision_loss)]
87        let total_f64 = total as f64;
88        1.0 - (entries_len / total_f64)
89    }
90
91    /// 메모리 사용량 (바이트, 대략적)
92    #[must_use]
93    pub fn memory_size(&self) -> usize {
94        std::mem::size_of::<Self>()
95            + self.entries.capacity() * (std::mem::size_of::<usize>() + std::mem::size_of::<i16>())
96    }
97}
98
99impl Matrix for SparseMatrix {
100    #[inline(always)]
101    fn get(&self, right_id: u16, left_id: u16) -> i32 {
102        let index = right_id as usize + self.lsize * left_id as usize;
103        self.entries
104            .get(&index)
105            .map_or_else(|| i32::from(self.default_cost), |&c| i32::from(c))
106    }
107
108    fn left_size(&self) -> usize {
109        self.lsize
110    }
111
112    fn right_size(&self) -> usize {
113        self.rsize
114    }
115}