Thanks to visit codestin.com
Credit goes to docs.rs

Skip to main content

graphify_cache/
lib.rs

1//! SHA256-based semantic caching for graphify.
2//!
3//! Caches extraction results keyed by content hash so unchanged files are not
4//! re-processed.
5
6use std::fs;
7use std::path::Path;
8
9use serde::{Serialize, de::DeserializeOwned};
10use sha2::{Digest, Sha256};
11use thiserror::Error;
12use tracing::debug;
13
14/// Default cache directory relative to the working directory.
15const CACHE_DIR: &str = "graphify-out/cache";
16
17/// Errors from the cache layer.
18#[derive(Debug, Error)]
19pub enum CacheError {
20    #[error("IO error: {0}")]
21    Io(#[from] std::io::Error),
22
23    #[error("serialization error: {0}")]
24    Serde(#[from] serde_json::Error),
25}
26
27/// Compute the SHA256 hex digest of a file's content.
28///
29/// Returns `None` if the file cannot be read.
30pub fn file_hash(path: &Path) -> Option<String> {
31    let content = fs::read(path).ok()?;
32    Some(content_hash(&content))
33}
34
35/// Compute the SHA256 hex digest of arbitrary bytes.
36pub fn content_hash(data: &[u8]) -> String {
37    let hash = Sha256::digest(data);
38    format!("{hash:x}")
39}
40
41/// Build a cache filename from a file path relative to `root`.
42///
43/// The key is `{sha256}.json` where the hash is computed over the file content,
44/// so any change in content naturally invalidates the cache entry.
45fn cache_key(path: &Path, _root: &Path) -> String {
46    let hash = file_hash(path).unwrap_or_default();
47    format!("{hash}.json")
48}
49
50/// Load a cached extraction result for `path`, returning `None` on cache miss.
51///
52/// A cache miss occurs when:
53/// - The source file cannot be read (hash fails).
54/// - No cache entry exists for the current content hash.
55/// - The cached JSON cannot be deserialized into `T`.
56pub fn load_cached<T: DeserializeOwned>(path: &Path, root: &Path) -> Option<T> {
57    load_cached_from(path, root, Path::new(CACHE_DIR))
58}
59
60/// Like [`load_cached`] but with an explicit cache directory.
61pub fn load_cached_from<T: DeserializeOwned>(
62    path: &Path,
63    root: &Path,
64    cache_dir: &Path,
65) -> Option<T> {
66    let key = cache_key(path, root);
67    let cache_path = cache_dir.join(&key);
68    if !cache_path.exists() {
69        debug!(?cache_path, "cache miss");
70        return None;
71    }
72    let data = fs::read_to_string(&cache_path).ok()?;
73    serde_json::from_str(&data).ok()
74}
75
76/// Save an extraction result to cache.
77///
78/// Returns `true` on success, `false` on any I/O or serialization failure.
79pub fn save_cached<T: Serialize>(path: &Path, result: &T, root: &Path) -> bool {
80    save_cached_to(path, result, root, Path::new(CACHE_DIR))
81}
82
83/// Like [`save_cached`] but with an explicit cache directory.
84pub fn save_cached_to<T: Serialize>(
85    path: &Path,
86    result: &T,
87    root: &Path,
88    cache_dir: &Path,
89) -> bool {
90    let key = cache_key(path, root);
91    let cache_path = cache_dir.join(&key);
92
93    if let Some(parent) = cache_path.parent()
94        && fs::create_dir_all(parent).is_err()
95    {
96        return false;
97    }
98
99    let tmp = cache_path.with_extension("tmp");
100    match serde_json::to_string(result) {
101        Ok(json) => {
102            if fs::write(&tmp, &json).is_ok() {
103                debug!(?cache_path, "cache write");
104                let ok = fs::rename(&tmp, &cache_path).is_ok();
105                if !ok {
106                    let _ = fs::remove_file(&tmp);
107                }
108                ok
109            } else {
110                false
111            }
112        }
113        Err(_) => false,
114    }
115}
116
117/// Remove all cached files from the default cache directory.
118pub fn clear_cache() -> std::io::Result<()> {
119    clear_cache_dir(Path::new(CACHE_DIR))
120}
121
122/// Remove all cached files from the given cache directory.
123pub fn clear_cache_dir(cache_dir: &Path) -> std::io::Result<()> {
124    if cache_dir.exists() {
125        fs::remove_dir_all(cache_dir)?;
126    }
127    Ok(())
128}
129
130/// Invalidate the cache entry for a specific file.
131///
132/// Since caching is content-hash based, changing the file already causes a
133/// cache miss on the next read. This function pre-deletes entries matching
134/// the *current* content hash so stale data is cleaned up eagerly. It is a
135/// no-op when the file can't be read (already deleted, etc.).
136pub fn invalidate_cached(path: &Path, root: &Path, cache_dir: &Path) -> bool {
137    let key = cache_key(path, root);
138    let cache_path = cache_dir.join(&key);
139    if cache_path.exists() {
140        debug!(?cache_path, "cache invalidate");
141        fs::remove_file(&cache_path).is_ok()
142    } else {
143        true
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use serde::{Deserialize, Serialize};
151    use std::fs;
152    use tempfile::TempDir;
153
154    #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155    struct DummyResult {
156        entities: Vec<String>,
157        score: f64,
158    }
159
160    fn make_dummy() -> DummyResult {
161        DummyResult {
162            entities: vec!["Alice".into(), "Bob".into()],
163            score: 0.95,
164        }
165    }
166
167    #[test]
168    fn test_file_hash_consistent() {
169        let dir = TempDir::new().unwrap();
170        let file = dir.path().join("hello.txt");
171        fs::write(&file, "hello world").unwrap();
172
173        let h1 = file_hash(&file).unwrap();
174        let h2 = file_hash(&file).unwrap();
175        assert_eq!(h1, h2, "hash must be deterministic");
176
177        assert_eq!(
178            h1,
179            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
180        );
181    }
182
183    #[test]
184    fn test_file_hash_nonexistent() {
185        assert!(file_hash(Path::new("/no/such/file")).is_none());
186    }
187
188    #[test]
189    fn test_save_load_roundtrip() {
190        let dir = TempDir::new().unwrap();
191        let cache_dir = dir.path().join("cache");
192        let root = dir.path();
193
194        // Create a source file.
195        let src = dir.path().join("src.rs");
196        fs::write(&src, "fn main() {}").unwrap();
197
198        let value = make_dummy();
199        assert!(save_cached_to(&src, &value, root, &cache_dir));
200
201        let loaded: Option<DummyResult> = load_cached_from(&src, root, &cache_dir);
202        assert_eq!(loaded, Some(value));
203    }
204
205    #[test]
206    fn test_cache_miss_returns_none() {
207        let dir = TempDir::new().unwrap();
208        let cache_dir = dir.path().join("cache");
209        let root = dir.path();
210
211        let src = dir.path().join("not_cached.rs");
212        fs::write(&src, "let x = 1;").unwrap();
213
214        let loaded: Option<DummyResult> = load_cached_from(&src, root, &cache_dir);
215        assert!(loaded.is_none());
216    }
217
218    #[test]
219    fn test_content_change_invalidates_cache() {
220        let dir = TempDir::new().unwrap();
221        let cache_dir = dir.path().join("cache");
222        let root = dir.path();
223
224        let src = dir.path().join("mutable.rs");
225        fs::write(&src, "version 1").unwrap();
226
227        let value = make_dummy();
228        assert!(save_cached_to(&src, &value, root, &cache_dir));
229
230        fs::write(&src, "version 2").unwrap();
231
232        let loaded: Option<DummyResult> = load_cached_from(&src, root, &cache_dir);
233        assert!(loaded.is_none(), "modified file must not match old cache");
234    }
235
236    #[test]
237    fn test_clear_cache_removes_files() {
238        let dir = TempDir::new().unwrap();
239        let cache_dir = dir.path().join("cache");
240        let root = dir.path();
241
242        let src = dir.path().join("f.txt");
243        fs::write(&src, "data").unwrap();
244
245        assert!(save_cached_to(&src, &make_dummy(), root, &cache_dir));
246        assert!(cache_dir.exists());
247
248        clear_cache_dir(&cache_dir).unwrap();
249        assert!(!cache_dir.exists());
250    }
251}