slvm/
interner.rs

1use bridge_types::{Keyword, Symbol};
2use std::collections::HashMap;
3use std::hash::{Hash, Hasher};
4use std::mem;
5
6#[derive(Clone, Copy, Debug)]
7pub struct Interned {
8    pub id: u32,
9}
10
11impl From<Symbol> for Interned {
12    fn from(value: Symbol) -> Self {
13        Self {
14            id: u32::from(value),
15        }
16    }
17}
18
19impl From<Interned> for Symbol {
20    fn from(value: Interned) -> Self {
21        Symbol::from(value.id)
22    }
23}
24impl From<Keyword> for Interned {
25    fn from(value: Keyword) -> Self {
26        Self {
27            id: u32::from(value),
28        }
29    }
30}
31
32impl From<Interned> for Keyword {
33    fn from(value: Interned) -> Self {
34        Keyword::from(value.id)
35    }
36}
37
38impl PartialEq for Interned {
39    fn eq(&self, other: &Self) -> bool {
40        self.id == other.id
41    }
42}
43
44impl Eq for Interned {}
45
46impl Hash for Interned {
47    fn hash<H: Hasher>(&self, state: &mut H) {
48        state.write_u32(self.id);
49    }
50}
51
52// This interner is initially based on this: https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html
53// Inspiration also from: https://github.com/CAD97/simple-interner/blob/master/src/interner.rs
54// See https://www.reddit.com/r/rust/comments/fn1jxf/blog_post_fast_and_simple_rust_interner/
55// This is a simple string interner.  It hands out &'static str and it WILL leak memory
56// to keep them valid.  Intended to live for the programs lifetime.
57#[derive(Clone, Debug)]
58pub struct Interner {
59    map: HashMap<&'static str, Interned>,
60    vals: Vec<&'static str>,
61    // Leak buffers to keep the static lifetimes we hand out valid.
62    buf: mem::ManuallyDrop<String>,
63    capacity: usize,
64    used: usize,
65}
66
67impl Interner {
68    /// Create an interner with capacity cap (to the next power of two).
69    pub fn with_capacity(cap: usize) -> Interner {
70        let cap = cap.next_power_of_two();
71        Interner {
72            map: HashMap::default(),
73            vals: Vec::new(),
74            buf: mem::ManuallyDrop::new(String::with_capacity(cap)),
75            capacity: cap,
76            used: 0,
77        }
78    }
79
80    /// True if name is an interned symbol.
81    pub fn contains(&self, name: &str) -> bool {
82        self.map.contains_key(name)
83    }
84
85    fn intern_final(&mut self, name: &'static str) -> Interned {
86        let id = self.vals.len() as u32;
87        self.vals.push(name);
88        let interned = Interned { id };
89        self.map.insert(name, interned);
90        interned
91    }
92
93    /// If name is interned then return it, otherwise None.
94    pub fn get_if_interned(&self, name: &str) -> Option<Interned> {
95        self.map.get(name).copied()
96    }
97
98    /// Intern name in this interner.  Will return the existing symbol if it
99    /// exists or add it and and return it if not.  Use this if you already have
100    /// a static str reference to avoid wasting space on making another.
101    pub fn intern_static(&mut self, name: &'static str) -> Interned {
102        if let Some(&id) = self.map.get(name) {
103            return id;
104        }
105        self.intern_final(name)
106    }
107
108    /// Intern name in this interner.  Will return the existing symbol if it
109    /// exists or add it and return it if not.
110    pub fn intern(&mut self, name: &str) -> Interned {
111        if let Some(&id) = self.map.get(name) {
112            return id;
113        }
114        let name = {
115            let cap = self.buf.capacity();
116            if cap < self.buf.len() + name.len() {
117                let new_cap = (cap.max(name.len()) + 1).next_power_of_two();
118                let new_buf = mem::ManuallyDrop::new(String::with_capacity(new_cap));
119                self.capacity += new_cap;
120                // Leak memory to keep the static lifetimes valid.
121                let _old_buf = mem::replace(&mut self.buf, new_buf);
122            }
123
124            let start = self.buf.len();
125            self.buf.push_str(name);
126            self.used += name.len();
127            unsafe { &*(&self.buf[start..] as *const str) }
128        };
129        self.intern_final(name)
130    }
131
132    pub fn get_string(&self, interned: Interned) -> Option<&'static str> {
133        if let Some(s) = self.vals.get(interned.id as usize) {
134            Some(s)
135        } else {
136            None
137        }
138    }
139
140    /// Return the amount of memory allocated by the interner.
141    pub fn capacity(&self) -> usize {
142        self.capacity
143    }
144
145    /// Return the amount of memory used to store symbols in the interner.
146    pub fn used(&self) -> usize {
147        self.used
148    }
149
150    /// Return the number of symbols in the interner.
151    pub fn len(&self) -> usize {
152        self.map.len()
153    }
154
155    /// Are there no symbols in this interner?
156    pub fn is_empty(&self) -> bool {
157        self.map.is_empty()
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn test_basic() {
167        let mut i = Interner::with_capacity(7);
168        assert!(i.capacity() == 8);
169        assert!(i.used() == 0);
170        assert!(i.is_empty());
171        let one = i.intern("one");
172        assert!(i.get_string(one).unwrap() == "one");
173        assert!(i.used() == 3);
174        assert!(i.len() == 1);
175        let fives = i.intern("fives");
176        assert!(i.get_string(fives).unwrap() == "fives");
177        assert!(i.capacity() == 8);
178        assert!(i.used() == 8);
179        assert!(i.len() == 2);
180        let v1 = i.intern("xxx");
181        assert!(i.get_string(one).unwrap() == "one");
182        assert!(i.get_string(fives).unwrap() == "fives");
183        assert!(i.get_string(v1).unwrap() == "xxx");
184        assert!(i.capacity() == 24);
185        assert!(i.used() == 11);
186        assert!(i.len() == 3);
187
188        let one2 = i.intern("one");
189        assert!(i.get_string(one).unwrap() == "one");
190        assert!(i.get_string(one2).unwrap() == "one");
191        assert!(one == one2);
192        assert!(std::ptr::eq(
193            i.get_string(one).unwrap(),
194            i.get_string(one2).unwrap()
195        ));
196        assert!(i.capacity() == 24);
197        assert!(i.used() == 11);
198        assert!(i.len() == 3);
199
200        let v2 = i.intern("1234567890");
201        assert!(i.get_string(one).unwrap() == "one");
202        assert!(i.get_string(fives).unwrap() == "fives");
203        assert!(i.get_string(v1).unwrap() == "xxx");
204        assert!(i.get_string(v2).unwrap() == "1234567890");
205        assert!(i.capacity() == 24);
206        assert!(i.used() == 21);
207        assert!(i.len() == 4);
208
209        let v3 = i.intern("1234");
210        assert!(i.get_string(one).unwrap() == "one");
211        assert!(i.get_string(fives).unwrap() == "fives");
212        assert!(i.get_string(v1).unwrap() == "xxx");
213        assert!(i.get_string(v2).unwrap() == "1234567890");
214        assert!(i.get_string(v3).unwrap() == "1234");
215        assert!(i.capacity() == 56);
216        assert!(i.used() == 25);
217        assert!(i.len() == 5);
218
219        let v2_2 = i.intern("1234567890");
220        assert!(i.get_string(v2).unwrap() == "1234567890");
221        assert!(i.get_string(v2_2).unwrap() == "1234567890");
222        assert!(v2 == v2_2);
223        assert!(std::ptr::eq(
224            i.get_string(v2).unwrap(),
225            i.get_string(v2_2).unwrap()
226        ));
227        assert!(i.capacity() == 56);
228        assert!(i.used() == 25);
229        assert!(i.len() == 5);
230    }
231}