139 lines
4.5 KiB
Rust
139 lines
4.5 KiB
Rust
use crossbeam::atomic::AtomicCell;
|
|
use df::tract::{mut_slice_as_arrayviewmut, slice_as_arrayview};
|
|
use df::tract::{DfParams, DfTract, RuntimeParams};
|
|
use dioxus::prelude::{asset, manganis, Asset};
|
|
use dioxus_asset_resolver::read_asset_bytes;
|
|
use std::cell::RefCell;
|
|
use std::sync::Arc;
|
|
use tracing::{error, info};
|
|
|
|
use crate::imp;
|
|
|
|
static DF_MODEL: Asset = asset!("/assets/DeepFilterNet3_ll_onnx.tar.gz");
|
|
|
|
enum DenoisingModelState {
|
|
Nothing,
|
|
Downloading(Arc<AtomicCell<Option<DfParams>>>),
|
|
Availible(Box<DfTract>),
|
|
}
|
|
|
|
fn with_denoising_model<O>(
|
|
spawn: &imp::SpawnHandle,
|
|
func: impl FnOnce(&mut DfTract) -> O,
|
|
) -> Option<O> {
|
|
// Using a thread local is super gross, but DfTract is not Send (so it can never leave the current
|
|
// thread) while AudioProcessing itself might change threads whenever.
|
|
thread_local! {
|
|
static STATE: RefCell<DenoisingModelState> = const { RefCell::new(DenoisingModelState::Nothing) };
|
|
}
|
|
|
|
STATE.with_borrow_mut(|state| match state {
|
|
DenoisingModelState::Nothing => {
|
|
let cell = Arc::new(AtomicCell::new(None));
|
|
let cell_task = cell.clone();
|
|
*state = DenoisingModelState::Downloading(cell);
|
|
spawn.spawn(async move {
|
|
let model_bytes = match read_asset_bytes(&DF_MODEL).await {
|
|
Ok(b) => b,
|
|
Err(e) => {
|
|
error!("could not read denoising model from \"{DF_MODEL}\": {e:?}");
|
|
return;
|
|
}
|
|
};
|
|
let params = match DfParams::from_bytes(&model_bytes) {
|
|
Ok(p) => p,
|
|
Err(e) => {
|
|
error!("could not load denoising model parameters: {e:?}");
|
|
return;
|
|
}
|
|
};
|
|
cell_task.store(Some(params));
|
|
});
|
|
None
|
|
}
|
|
DenoisingModelState::Downloading(cell) => {
|
|
if let Some(params) = cell.take() {
|
|
let mut tract = match DfTract::new(params, &RuntimeParams::default_with_ch(1)) {
|
|
Ok(t) => Box::new(t),
|
|
Err(e) => {
|
|
error!("could not create denoising engine: {e:?}");
|
|
return None;
|
|
}
|
|
};
|
|
info!("instantiated denoising engine");
|
|
let out = func(&mut tract);
|
|
*state = DenoisingModelState::Availible(tract);
|
|
Some(out)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
DenoisingModelState::Availible(tract) => Some(func(tract)),
|
|
})
|
|
}
|
|
|
|
pub struct AudioProcessor {
|
|
denoise: bool,
|
|
spawn: imp::SpawnHandle,
|
|
buffer: Vec<f32>,
|
|
}
|
|
|
|
impl AudioProcessor {
|
|
pub fn new_plain() -> Self {
|
|
AudioProcessor {
|
|
denoise: false,
|
|
spawn: imp::SpawnHandle::current(),
|
|
buffer: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn new_denoising() -> Self {
|
|
AudioProcessor {
|
|
denoise: true,
|
|
spawn: imp::SpawnHandle::current(),
|
|
buffer: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl AudioProcessor {
|
|
pub fn process(&mut self, audio: &[f32], channels: usize, output: &mut Vec<f32>) {
|
|
let mut include_raw = true;
|
|
if self.denoise {
|
|
with_denoising_model(&self.spawn, |df| {
|
|
include_raw = false;
|
|
|
|
self.buffer.extend(audio.iter().step_by(channels).copied());
|
|
output.reserve(audio.len());
|
|
|
|
let hop = df.hop_size;
|
|
let mut i = 0;
|
|
while self.buffer[i..].len() >= hop {
|
|
let audio = &self.buffer[i..][..hop];
|
|
i += audio.len();
|
|
|
|
let j = output.len();
|
|
output.extend(std::iter::repeat_n(0f32, audio.len()));
|
|
let output = &mut output[j..];
|
|
|
|
df.process(
|
|
slice_as_arrayview(audio, &[audio.len()])
|
|
.into_shape((1, audio.len()))
|
|
.unwrap(),
|
|
mut_slice_as_arrayviewmut(output, &[output.len()])
|
|
.into_shape((1, output.len()))
|
|
.unwrap(),
|
|
);
|
|
}
|
|
self.buffer.splice(..i, []);
|
|
});
|
|
}
|
|
|
|
if include_raw {
|
|
output.extend(audio.iter().step_by(channels).copied());
|
|
}
|
|
}
|
|
}
|
|
|
|
pub type AudioProcessorSender = Arc<AtomicCell<Option<AudioProcessor>>>;
|