Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0d7aee0

Browse files
committed
perf: hot path optimizations (analyzer, renderer & ring buffer)
1 parent 86b80dd commit 0d7aee0

10 files changed

Lines changed: 96 additions & 49 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/analyzer/bands.rs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::dsp::{a_weighting, ema_tc};
1+
use crate::dsp::{a_weighting, ema_precomputed, ema_tc};
22

33
use super::SpectrumAnalyzer;
44

@@ -18,6 +18,8 @@ fn accumulate_band_db(
1818
dt_s: f32,
1919
filters_len: usize,
2020
) {
21+
let alpha_eq = (-dt_s / 6.0).exp();
22+
2123
for (i, tri) in sa.filters.iter().enumerate().take(filters_len) {
2224
let mut acc = 0.0f32;
2325
for &(idx, wgt) in &tri.taps {
@@ -28,7 +30,8 @@ fn accumulate_band_db(
2830
let amp_weighted = acc.sqrt() * a_weighting(tri.center_hz);
2931

3032
if let Some(eq) = sa.eq_ref.get_mut(i) {
31-
*eq = ema_tc(*eq, amp_weighted, 6.0, dt_s).max(1e-9);
33+
*eq = ema_precomputed(*eq, amp_weighted, alpha_eq)
34+
.max(1e-9);
3235
let rel = amp_weighted / *eq;
3336
if let Some(target) = sa.bars_target.get_mut(i) {
3437
*target = 20.0 * rel.max(1e-12).log10();
@@ -54,17 +57,24 @@ fn update_db_range(sa: &mut SpectrumAnalyzer, len: usize, dt_s: f32) {
5457
return;
5558
}
5659

57-
if let Some(slice) = sa.sort_scratch.get_mut(..len) {
58-
slice.sort_by(f32::total_cmp);
59-
}
60-
6160
let len_f = len as f32;
6261
let idx_low = ((len_f - 1.0) * 0.10).round().max(0.0) as usize;
6362
let idx_high = ((len_f - 1.0) * 0.90).round().max(0.0) as usize;
6463

65-
if let (Some(&q10), Some(&q90)) =
66-
(sa.sort_scratch.get(idx_low), sa.sort_scratch.get(idx_high))
67-
{
64+
let Some(slice) = sa.sort_scratch.get_mut(..len) else {
65+
return;
66+
};
67+
68+
slice.select_nth_unstable_by(idx_low, f32::total_cmp);
69+
let q10 = slice.get(idx_low).copied();
70+
71+
#[allow(clippy::arithmetic_side_effects)]
72+
let split = idx_high.saturating_sub(idx_low);
73+
let (_, remaining) = slice.split_at_mut(idx_low);
74+
remaining.select_nth_unstable_by(split, f32::total_cmp);
75+
let q90 = remaining.get(split).copied();
76+
77+
if let (Some(q10), Some(q90)) = (q10, q90) {
6878
sa.db_low = ema_tc(sa.db_low, q10, 0.30, dt_s);
6979
sa.db_high = ema_tc(sa.db_high, q90, 0.50, dt_s);
7080
}

src/analyzer/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ pub struct SpectrumAnalyzer {
1818
pub(crate) sort_scratch: Vec<f32>,
1919
pub bars_target: Vec<f32>,
2020
pub(crate) flowed_scratch: Vec<f32>,
21+
pub render_fulls: Vec<usize>,
22+
pub render_fracs: Vec<f32>,
2123
}
2224

2325
impl SpectrumAnalyzer {
@@ -34,6 +36,8 @@ impl SpectrumAnalyzer {
3436
sort_scratch: Vec::new(),
3537
bars_target: Vec::new(),
3638
flowed_scratch: Vec::new(),
39+
render_fulls: Vec::new(),
40+
render_fracs: Vec::new(),
3741
}
3842
}
3943

@@ -46,6 +50,8 @@ impl SpectrumAnalyzer {
4650
self.bars_target = vec![0.0; num_bars];
4751
self.sort_scratch = vec![0.0; num_bars];
4852
self.flowed_scratch = vec![0.0; num_bars];
53+
self.render_fulls = vec![0; num_bars];
54+
self.render_fracs = vec![0.0; num_bars];
4955
}
5056
}
5157

src/analyzer/spring.rs

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,14 @@ fn decay_to_silence(sa: &mut SpectrumAnalyzer, n: usize, dt_s: f32) {
2424
let tau_silence = 0.22f32;
2525
let a = (-dt_s / tau_silence).exp();
2626

27-
for i in 0..n {
28-
if let Some(y) = sa.bars_y.get_mut(i) {
29-
*y *= a;
30-
if *y < 0.001 {
31-
*y = 0.0;
32-
}
33-
}
34-
if let Some(v) = sa.bars_v.get_mut(i) {
35-
*v = 0.0;
27+
for (y, v) in
28+
sa.bars_y.iter_mut().zip(sa.bars_v.iter_mut()).take(n)
29+
{
30+
*y *= a;
31+
if *y < 0.001 {
32+
*y = 0.0;
3633
}
34+
*v = 0.0;
3735
}
3836
}
3937

@@ -72,15 +70,14 @@ fn integrate_spring(
7270
) {
7371
let c = 2.0 * params.spr_k.sqrt() * params.spr_zeta;
7472

75-
for i in 0..n {
76-
if let (Some(y), Some(v), Some(scratch)) = (
77-
sa.bars_y.get_mut(i),
78-
sa.bars_v.get_mut(i),
79-
sa.flowed_scratch.get(i),
80-
) {
81-
let a = params.spr_k.mul_add(scratch - *y, -(c * *v));
82-
*v = a.mul_add(dt_s, *v);
83-
*y = (*v).mul_add(dt_s, *y).clamp(0.0, 1.0);
84-
}
73+
for (y, (v, scratch)) in sa
74+
.bars_y
75+
.iter_mut()
76+
.zip(sa.bars_v.iter_mut().zip(sa.flowed_scratch.iter()))
77+
.take(n)
78+
{
79+
let a = params.spr_k.mul_add(*scratch - *y, -(c * *v));
80+
*v = a.mul_add(dt_s, *v);
81+
*y = (*v).mul_add(dt_s, *y).clamp(0.0, 1.0);
8582
}
8683
}

src/app/fft.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ pub fn compute_spectrum(ctx: &mut FftContext<'_>) {
2323
}
2424

2525
#[allow(clippy::cast_precision_loss)]
26-
let norm = (ctx.fft_size as f32) * (ctx.fft_size as f32);
26+
let norm_inv =
27+
1.0 / ((ctx.fft_size as f32) * (ctx.fft_size as f32));
2728
#[allow(clippy::indexing_slicing)]
2829
for i in 0..ctx.half {
2930
let re = ctx.fft_out[i].re;
3031
let im = ctx.fft_out[i].im;
31-
ctx.spec_pow[i] = re.mul_add(re, im * im) / norm;
32+
ctx.spec_pow[i] = re.mul_add(re, im * im) * norm_inv;
3233
}
3334
}

src/app/run.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ use super::{
3232

3333
#[allow(clippy::arithmetic_side_effects)]
3434
fn ring_cap(fft_size: usize) -> usize {
35-
((48_000usize / 10).max(fft_size * 3)).max(fft_size * 6)
35+
((48_000usize / 10).max(fft_size * 3))
36+
.max(fft_size * 6)
37+
.next_power_of_two()
3638
}
3739

3840
struct AppResources {
@@ -217,6 +219,8 @@ fn tick<W: Write>(
217219
fs.w,
218220
fs.h,
219221
&fs.lay,
222+
&mut fs.analyzer.render_fulls,
223+
&mut fs.analyzer.render_fracs,
220224
)?;
221225
out.write_all(&fs.frame)?;
222226
out.flush()?;

src/buffer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ impl SharedBuf {
2222
}
2323
let cap = self.data.len();
2424
if cap > 0 {
25-
self.write_idx = (self.write_idx + 1) % cap;
25+
let mask = cap - 1;
26+
self.write_idx = (self.write_idx + 1) & mask;
2627
if self.write_idx == 0 {
2728
self.filled = true;
2829
}

src/dsp/ema.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@ pub fn ema_tc(prev: f32, x: f32, tau_s: f32, dt_s: f32) -> f32 {
44
let a = (-dt_s / tau_s).exp();
55
a.mul_add(prev, (1.0 - a) * x)
66
}
7+
8+
#[inline]
9+
#[must_use]
10+
pub fn ema_precomputed(prev: f32, x: f32, alpha: f32) -> f32 {
11+
alpha.mul_add(prev, (1.0 - alpha) * x)
12+
}

src/dsp/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ mod mel;
33
mod weighting;
44
mod window;
55

6-
pub use ema::ema_tc;
6+
pub use ema::{ema_precomputed, ema_tc};
77
pub use mel::{hz_to_mel, mel_to_hz};
88
pub use weighting::a_weighting;
99
pub use window::{hann, prepare_fft_input_inplace};

src/render/draw.rs

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,32 @@ use std::io::Write;
22

33
use super::{Layout, BAR_W, GAP_W};
44

5-
const VBLOCKS: [char; 9] =
6-
[' ', '▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];
5+
// Pre-encoded UTF-8 byte sequences for each block character.
6+
// Space is ASCII (1 byte); the 8 block elements are all 3-byte U+2580..U+2588.
7+
// Entries that are 1-byte sequences store the byte in index 0; the write path
8+
// selects the correct slice length via VBLOCKS_LEN.
9+
const VBLOCKS_ENCODED: [[u8; 3]; 9] = [
10+
[b' ', 0, 0], // ' ' U+0020 1 byte
11+
[0xE2, 0x96, 0x81], // '▁' U+2581
12+
[0xE2, 0x96, 0x82], // '▂' U+2582
13+
[0xE2, 0x96, 0x83], // '▃' U+2583
14+
[0xE2, 0x96, 0x84], // '▄' U+2584
15+
[0xE2, 0x96, 0x85], // '▅' U+2585
16+
[0xE2, 0x96, 0x86], // '▆' U+2586
17+
[0xE2, 0x96, 0x87], // '▇' U+2587
18+
[0xE2, 0x96, 0x88], // '█' U+2588
19+
];
20+
21+
const VBLOCKS_LEN: [usize; 9] = [1, 3, 3, 3, 3, 3, 3, 3, 3];
22+
23+
// Index of the full-block character (U+2588) in the table.
24+
const FULL_BLOCK: usize = 8;
725

826
#[inline]
927
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
10-
fn v_partial(frac: f32) -> char {
28+
fn v_partial_idx(frac: f32) -> usize {
1129
let f = frac.clamp(0.0, 0.9999);
12-
let idx = f.mul_add(8.0, 0.5).floor() as usize;
13-
*VBLOCKS.get(idx.min(8)).unwrap_or(&' ')
30+
f.mul_add(8.0, 0.5).floor() as usize
1431
}
1532

1633
#[inline]
@@ -46,6 +63,8 @@ pub fn draw_blocks_vertical<W: Write>(
4663
w: u16,
4764
h: u16,
4865
lay: &Layout,
66+
fulls: &mut [usize],
67+
fracs: &mut [f32],
4968
) -> std::io::Result<()> {
5069
let rows = h.saturating_sub(lay.top_pad) as usize;
5170
let cols = w
@@ -58,10 +77,10 @@ pub fn draw_blocks_vertical<W: Write>(
5877
let per = BAR_W + GAP_W;
5978
let n = bars
6079
.len()
61-
.min(cols.checked_div(per).map_or(1, |v| v.max(1)));
80+
.min(cols.checked_div(per).map_or(1, |v| v.max(1)))
81+
.min(fulls.len())
82+
.min(fracs.len());
6283

63-
let mut fulls = vec![0usize; n];
64-
let mut fracs = vec![0f32; n];
6584
for i in 0..n {
6685
let height =
6786
bars.get(i).copied().unwrap_or(0.0).clamp(0.0, 1.0)
@@ -81,20 +100,23 @@ pub fn draw_blocks_vertical<W: Write>(
81100

82101
for i in 0..n {
83102
let f_val = fulls.get(i).copied().unwrap_or(0);
84-
let ch = if row < f_val {
85-
'█'
103+
let idx = if row < f_val {
104+
FULL_BLOCK
86105
} else if row == f_val
87106
&& fracs.get(i).copied().unwrap_or(0.0) > 0.0
88107
{
89-
v_partial(fracs.get(i).copied().unwrap_or(0.0))
108+
v_partial_idx(fracs.get(i).copied().unwrap_or(0.0))
90109
} else {
91-
' '
110+
0
92111
};
93112

113+
let enc = VBLOCKS_ENCODED
114+
.get(idx)
115+
.unwrap_or(&VBLOCKS_ENCODED[0]);
116+
let len = VBLOCKS_LEN.get(idx).copied().unwrap_or(1);
117+
let bytes = enc.get(..len).unwrap_or(enc.as_slice());
94118
for _ in 0..BAR_W {
95-
out.write_all(
96-
ch.encode_utf8(&mut [0; 4]).as_bytes(),
97-
)?;
119+
out.write_all(bytes)?;
98120
}
99121
write_spaces(out, GAP_W)?;
100122
}

0 commit comments

Comments
 (0)