Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2dfefdf

Browse files
committed
Initial commit as copy from crates-io-cli
0 parents  commit 2dfefdf

11 files changed

Lines changed: 1412 additions & 0 deletions

File tree

Cargo.toml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[package]
2+
name = "criner"
3+
version = "0.1.0"
4+
authors = ["Sebastian Thiel <[email protected]>"]
5+
edition = "2018"
6+
description = "a platform for resumable mining of crates.io for knowledge and information"
7+
publish = false
8+
include = ["src/**/*", "Cargo.*"]
9+
license = "MIT"
10+
repository = "https://github.com/Byron/crates-io-cli-rs"
11+
readme = "README.md"
12+
13+
[lib]
14+
doctest = false
15+
16+
[dependencies]
17+
quick-error = "1.2.3"
18+
futures = { version = "0.3.4", features = ["thread-pool"]}
19+
futures-timer = "3.0.1"
20+
sled = { version = "0.31.0", default-features = false }
21+
crates-index-diff = "5.0.4"
22+
prodash = { version = "1.1", default-features = false, features = ["tui-renderer", "log-renderer"] }
23+
rmp-serde = "0.14.0"
24+
serde_derive = "1.0.104"
25+
serde = "1.0.104"
26+
humantime = "2.0.0"
27+
log = "0.4.8"
28+
async-std = { version = "1.5.0", default-features = false, features = ["unstable"] }
29+
reqwest = "0.10.1"
30+
http = { version = "0.2.0", default-features = false }
31+
tokio = { version = "0.2", default-features = false, features = ["fs", "rt-threaded"]}

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
## Tasks
3+
4+
* [x] Move data types to model module
5+
* [x] replace from traits with macro
6+
* [x] tree-access can be generalized - do it for each type we store
7+
* [x] integrate 'context' tree into base trait as much as feasible
8+
* [x] replace async-io with futures-rs for future-proofing
9+
* [x] integrate async progress
10+
* [ ] downloads with backpressure
11+
* [ ] _(investigate)_ resumable downloads
12+
* [ ] resilience: protect against ThreadPanics - they prevent the program from shutting down
13+
14+
## Lessons learned
15+
16+
* futures::ThreadPools - panicking futures crash only one thread
17+
* long-running futures need error and potentially panick recovery. Futures has a panick catcher that could be useful.

src/engine/changes.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use crate::{
2+
error::{Error, Result},
3+
persistence::TreeAccess,
4+
utils::*,
5+
Context,
6+
};
7+
use crates_index_diff::Index;
8+
use futures::task::Spawn;
9+
use std::{
10+
path::Path,
11+
time::{Duration, SystemTime},
12+
};
13+
14+
pub async fn process(
15+
crates_io_path: impl AsRef<Path>,
16+
pool: impl Spawn,
17+
Context {
18+
db,
19+
mut progress,
20+
deadline,
21+
}: Context,
22+
) -> Result<()> {
23+
let start = SystemTime::now();
24+
let mut subprogress =
25+
progress.add_child("Potentially cloning crates index - this can take a while…");
26+
let index = enforce_blocking(
27+
deadline,
28+
{
29+
let path = crates_io_path.as_ref().to_path_buf();
30+
|| Index::from_path_or_cloned(path)
31+
},
32+
&pool,
33+
)
34+
.await??;
35+
subprogress.set_name("Fetching crates index to see changes");
36+
let crate_versions = enforce_blocking(deadline, move || index.fetch_changes(), &pool).await??;
37+
38+
progress.done(format!("Fetched {} changed crates", crate_versions.len()));
39+
drop(subprogress);
40+
41+
let mut store_progress = progress.add_child("processing new crates");
42+
store_progress.init(Some(crate_versions.len() as u32), Some("crate versions"));
43+
44+
enforce_future(
45+
deadline,
46+
{
47+
let db = db.clone();
48+
async move {
49+
let versions = db.crate_versions();
50+
let krate = db.crates();
51+
let context = db.context();
52+
// NOTE: this loop can also be a stream, but that makes computation slower due to overhead
53+
// Thus we just do this 'quickly' on the main thread, knowing that criner really needs its
54+
// own executor or resources.
55+
// We could chunk things, but that would only make the code harder to read. No gains here…
56+
// NOTE: Even chunks of 1000 were not faster, didn't even saturate a single core...
57+
for (versions_stored, version) in crate_versions.iter().enumerate() {
58+
// NOTE: For now, not transactional, but we *could*!
59+
{
60+
versions.insert(&version)?;
61+
context.update_today(|c| c.counts.crate_versions += 1)?;
62+
}
63+
if krate.upsert(&version)? {
64+
context.update_today(|c| c.counts.crates += 1)?;
65+
}
66+
67+
store_progress.set((versions_stored + 1) as u32);
68+
}
69+
context.update_today(|c| {
70+
c.durations.fetch_crate_versions += SystemTime::now()
71+
.duration_since(start)
72+
.unwrap_or_else(|_| Duration::default())
73+
})?;
74+
store_progress.done(format!(
75+
"Stored {} crate versions to database",
76+
crate_versions.len()
77+
));
78+
Ok::<_, Error>(())
79+
}
80+
},
81+
&pool,
82+
)
83+
.await??;
84+
Ok(())
85+
}

src/engine/mod.rs

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
use crate::{error::Result, model, persistence::Db, utils::*};
2+
use futures::{
3+
future::Either,
4+
future::FutureExt,
5+
stream::StreamExt,
6+
task::{Spawn, SpawnExt},
7+
};
8+
use log::{info, warn};
9+
use prodash::tui::{Event, Line};
10+
use std::{
11+
io::Write,
12+
path::Path,
13+
path::PathBuf,
14+
time::{Duration, SystemTime},
15+
};
16+
17+
mod changes;
18+
mod tasks;
19+
mod worker;
20+
21+
pub struct Context {
22+
db: Db,
23+
progress: prodash::tree::Item,
24+
deadline: Option<SystemTime>,
25+
}
26+
27+
/// Runs the statistics and mining engine.
28+
/// May run for a long time unless a deadline is specified.
29+
/// Even though timeouts can be achieved from outside of the future, knowing the deadline may be used
30+
/// by the engine to manage its time even more efficiently.
31+
pub async fn run(
32+
db: Db,
33+
crates_io_path: PathBuf,
34+
deadline: Option<SystemTime>,
35+
progress: prodash::Tree,
36+
num_workers: u32,
37+
downloads_dir: Option<PathBuf>,
38+
pool: impl Spawn + Clone,
39+
tokio: tokio::runtime::Handle,
40+
) -> Result<()> {
41+
check(deadline)?;
42+
43+
let mut downloaders = progress.add_child("Downloads");
44+
let (tx, rx) = async_std::sync::channel(1);
45+
for idx in 0..num_workers {
46+
// Can only use the pool if the downloader uses a futures-compatible runtime
47+
// Tokio is its very own thing, and futures requiring it need to run there.
48+
tokio.spawn(
49+
worker::download(
50+
db.clone(),
51+
downloaders.add_child(format!("DL {} - idle", idx + 1)),
52+
rx.clone(),
53+
downloads_dir.clone(),
54+
)
55+
.map(|_| ()),
56+
);
57+
}
58+
59+
let interval_s = 5;
60+
pool.spawn(
61+
repeat_every_s(
62+
interval_s,
63+
{
64+
let p = progress.clone();
65+
move || p.add_child("Processing Timer")
66+
},
67+
deadline,
68+
{
69+
let progress = progress.clone();
70+
let db = db.clone();
71+
move || {
72+
tasks::process(
73+
db.clone(),
74+
progress.add_child("Process Crate Versions"),
75+
tx.clone(),
76+
)
77+
}
78+
},
79+
)
80+
.map(|_| ()),
81+
)?;
82+
83+
let interval_s = 60;
84+
repeat_every_s(
85+
interval_s,
86+
{
87+
let p = progress.clone();
88+
move || p.add_child("Fetch Timer")
89+
},
90+
deadline,
91+
move || {
92+
changes::process(
93+
crates_io_path.clone(),
94+
pool.clone(),
95+
Context {
96+
db: db.clone(),
97+
progress: progress.add_child("crates.io refresh"),
98+
deadline,
99+
},
100+
)
101+
},
102+
)
103+
.await
104+
}
105+
106+
/// For convenience, run the engine and block until done.
107+
pub fn run_blocking(
108+
db: impl AsRef<Path>,
109+
crates_io_path: impl AsRef<Path>,
110+
deadline: Option<SystemTime>,
111+
num_workers: u32,
112+
downloads_dir: Option<PathBuf>,
113+
root: prodash::Tree,
114+
gui: Option<prodash::tui::TuiOptions>,
115+
) -> Result<()> {
116+
// required for request
117+
let tokio_rt = tokio::runtime::Builder::new()
118+
.enable_all()
119+
.core_threads(1)
120+
.max_threads(2) // needs to be two or nothing happens
121+
.threaded_scheduler()
122+
.build()?;
123+
let start_of_computation = SystemTime::now();
124+
// NOTE: pool should be big enough to hold all possible blocking tasks running in parallel, +1 for
125+
// additional non-blocking tasks.
126+
// The main thread is expected to pool non-blocking tasks.
127+
// I admit I don't fully understand why multi-pool setups aren't making progress… . So just one pool for now.
128+
let pool_size = 1 + 1;
129+
let task_pool = futures::executor::ThreadPool::builder()
130+
.pool_size(pool_size)
131+
.create()?;
132+
let db = Db::open(db)?;
133+
if let Some(path) = downloads_dir.as_ref() {
134+
std::fs::create_dir_all(path)?;
135+
}
136+
137+
// dropping the work handle will stop (non-blocking) futures
138+
let work_handle = task_pool.spawn_with_handle(run(
139+
db.clone(),
140+
crates_io_path.as_ref().into(),
141+
deadline,
142+
root.clone(),
143+
num_workers,
144+
downloads_dir,
145+
task_pool.clone(),
146+
tokio_rt.handle().clone(),
147+
))?;
148+
149+
match gui {
150+
Some(gui_options) => {
151+
let (gui, abort_handle) = futures::future::abortable(prodash::tui::render_with_input(
152+
root,
153+
gui_options,
154+
context_stream(&db, start_of_computation),
155+
)?);
156+
157+
let either = futures::executor::block_on(futures::future::select(
158+
work_handle,
159+
gui.boxed_local(),
160+
));
161+
match either {
162+
Either::Left((work_result, gui)) => {
163+
abort_handle.abort();
164+
futures::executor::block_on(gui).ok();
165+
if let Err(e) = work_result {
166+
warn!("{}", e);
167+
}
168+
}
169+
Either::Right((_, work_handle)) => work_handle.forget(),
170+
}
171+
172+
// Make sure the terminal can reset when the gui is done.
173+
std::io::stdout().flush()?;
174+
}
175+
None => {
176+
let work_result = futures::executor::block_on(work_handle);
177+
if let Err(e) = work_result {
178+
warn!("{}", e);
179+
}
180+
}
181+
};
182+
183+
// at this point, we forget all currently running computation, and since it's in the local thread, it's all
184+
// destroyed/dropped properly.
185+
info!("{}", wallclock(start_of_computation));
186+
Ok(())
187+
}
188+
189+
fn wallclock(since: SystemTime) -> String {
190+
format!(
191+
"Wallclock elapsed: {}",
192+
humantime::format_duration(SystemTime::now().duration_since(since).unwrap_or_default())
193+
)
194+
}
195+
196+
fn context_stream(db: &Db, start_of_computation: SystemTime) -> impl futures::Stream<Item = Event> {
197+
prodash::tui::ticker(Duration::from_secs(1)).map({
198+
let db = db.clone();
199+
move |_| {
200+
db.context()
201+
.iter()
202+
.next_back()
203+
.and_then(Result::ok)
204+
.map(|(_, c): (_, model::Context)| {
205+
let lines = vec![
206+
Line::Text(wallclock(start_of_computation)),
207+
Line::Title("Durations".into()),
208+
Line::Text(format!(
209+
"fetch-crate-versions: {:?}",
210+
c.durations.fetch_crate_versions
211+
)),
212+
Line::Title("Counts".into()),
213+
Line::Text(format!("crate-versions: {}", c.counts.crate_versions)),
214+
Line::Text(format!(" crates: {}", c.counts.crates)),
215+
];
216+
Event::SetInformation(lines)
217+
})
218+
.unwrap_or(Event::Tick)
219+
}
220+
})
221+
}

0 commit comments

Comments
 (0)