diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 70d765f..d6377db 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -1,8 +1,8 @@ #[cfg(windows)] use std::{ - collections::{HashMap, VecDeque}, + collections::VecDeque, str::FromStr, - sync::Mutex, + sync::{Arc, Mutex}, }; use lazy_static::lazy_static; @@ -17,7 +17,9 @@ use windows::{ }, }; -use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; +use crate::{ + Backend, BackendId, Callbacks, Error, Features, Gender, UtteranceId, Voice, CALLBACKS, +}; impl From for Error { fn from(e: windows::core::Error) -> Self { @@ -28,14 +30,16 @@ impl From for Error { #[derive(Clone)] pub struct WinRt { id: BackendId, - synth: SpeechSynthesizer, + synth: Arc, player: MediaPlayer, + utterances: Arc>>, rate: f32, pitch: f32, volume: f32, voice: VoiceInformation, } +#[derive(Debug)] struct Utterance { id: UtteranceId, text: String, @@ -45,99 +49,85 @@ struct Utterance { voice: VoiceInformation, } +impl Utterance { + fn speak( + &self, + synth: &SpeechSynthesizer, + player: &MediaPlayer, + callbacks: &mut Callbacks, + ) -> Result<(), windows::core::Error> { + synth.Options()?.SetSpeakingRate(self.rate.into())?; + synth.Options()?.SetAudioPitch(self.pitch.into())?; + synth.Options()?.SetAudioVolume(self.volume.into())?; + synth.SetVoice(&self.voice)?; + + let stream = synth + .SynthesizeTextToStreamAsync(&self.text.clone().into())? + .get()?; + let content_type = stream.ContentType()?; + let source = MediaSource::CreateFromStream(&stream, &content_type)?; + + player.SetSource(&source)?; + player.Play()?; + + if let Some(callback) = callbacks.utterance_begin.as_mut() { + callback(self.id); + } + + Ok(()) + } +} + lazy_static! { static ref NEXT_BACKEND_ID: Mutex = Mutex::new(0); static ref NEXT_UTTERANCE_ID: Mutex = Mutex::new(0); - static ref BACKEND_TO_SPEECH_SYNTHESIZER: Mutex> = { - let v: HashMap = HashMap::new(); - Mutex::new(v) - }; - static ref BACKEND_TO_MEDIA_PLAYER: Mutex> = { - let v: HashMap = HashMap::new(); - Mutex::new(v) - }; - static ref UTTERANCES: Mutex>> = { - let utterances: HashMap> = HashMap::new(); - Mutex::new(utterances) - }; } impl WinRt { pub fn new() -> std::result::Result { info!("Initializing WinRT backend"); - let synth = SpeechSynthesizer::new()?; + let player = MediaPlayer::new()?; player.SetRealTimePlayback(true)?; player.SetAudioCategory(MediaPlayerAudioCategory::Speech)?; - let mut backend_id = NEXT_BACKEND_ID.lock().unwrap(); - let bid = BackendId::WinRt(*backend_id); - *backend_id += 1; - drop(backend_id); - { - let mut utterances = UTTERANCES.lock().unwrap(); - utterances.insert(bid, VecDeque::new()); - } - let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); - backend_to_media_player.insert(bid, player.clone()); - drop(backend_to_media_player); - let mut backend_to_speech_synthesizer = BACKEND_TO_SPEECH_SYNTHESIZER.lock().unwrap(); - backend_to_speech_synthesizer.insert(bid, synth.clone()); - drop(backend_to_speech_synthesizer); - let bid_clone = bid; - player.MediaEnded(&TypedEventHandler::new( - move |sender: &Option, _args| { - if let Some(sender) = sender { - let backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); - let id = backend_to_media_player.iter().find(|v| v.1 == sender); - if let Some((id, _)) = id { - let mut utterances = UTTERANCES.lock().unwrap(); - if let Some(utterances) = utterances.get_mut(id) { - if let Some(utterance) = utterances.pop_front() { - let mut callbacks = CALLBACKS.lock().unwrap(); - let callbacks = callbacks.get_mut(id).unwrap(); - if let Some(callback) = callbacks.utterance_end.as_mut() { - callback(utterance.id); - } - if let Some(utterance) = utterances.front() { - let backend_to_speech_synthesizer = - BACKEND_TO_SPEECH_SYNTHESIZER.lock().unwrap(); - let id = backend_to_speech_synthesizer - .iter() - .find(|v| *v.0 == bid_clone); - if let Some((_, tts)) = id { - tts.Options()?.SetSpeakingRate(utterance.rate.into())?; - tts.Options()?.SetAudioPitch(utterance.pitch.into())?; - tts.Options()?.SetAudioVolume(utterance.volume.into())?; - tts.SetVoice(&utterance.voice)?; - let text = &utterance.text; - let stream = - tts.SynthesizeTextToStreamAsync(&text.into())?.get()?; - let content_type = stream.ContentType()?; - let source = - MediaSource::CreateFromStream(&stream, &content_type)?; - sender.SetSource(&source)?; - sender.Play()?; - if let Some(callback) = callbacks.utterance_begin.as_mut() { - callback(utterance.id); - } - } - } - } - } - } - } - Ok(()) - }, - ))?; - Ok(Self { + + let bid = { + let mut backend_id = NEXT_BACKEND_ID.lock().unwrap(); + let bid = BackendId::WinRt(*backend_id); + *backend_id += 1; + + bid + }; + + let tts = Self { id: bid, - synth, + synth: Arc::new(SpeechSynthesizer::new()?), player, + utterances: Arc::new(Mutex::new(VecDeque::new())), rate: 1., pitch: 1., volume: 1., voice: SpeechSynthesizer::DefaultVoice()?, - }) + }; + + let synth_clone = tts.synth.clone(); + let utterances_clone = tts.utterances.clone(); + tts.player.MediaEnded(&TypedEventHandler::new( + move |player: &Option, _args| { + utterances_clone.lock().unwrap().pop_front(); // Utterance that just ended + + if let Some(utterance) = utterances_clone.lock().unwrap().front() { + utterance.speak( + &synth_clone, + player.as_ref().unwrap(), + CALLBACKS.lock().unwrap().get_mut(&bid).unwrap(), + )?; + } + Ok(()) + }, + ))?; + + Ok(tts) } } @@ -167,47 +157,32 @@ impl Backend for WinRt { if interrupt && self.is_speaking()? { self.stop()?; } + let utterance_id = { let mut uid = NEXT_UTTERANCE_ID.lock().unwrap(); let utterance_id = UtteranceId::WinRt(*uid); *uid += 1; utterance_id }; - let mut no_utterances = false; - { - let mut utterances = UTTERANCES.lock().unwrap(); - if let Some(utterances) = utterances.get_mut(&self.id) { - no_utterances = utterances.is_empty(); - let utterance = Utterance { - id: utterance_id, - text: text.into(), - rate: self.rate, - pitch: self.pitch, - volume: self.volume, - voice: self.voice.clone(), - }; - utterances.push_back(utterance); - } - } - if no_utterances { - self.synth.Options()?.SetSpeakingRate(self.rate.into())?; - self.synth.Options()?.SetAudioPitch(self.pitch.into())?; - self.synth.Options()?.SetAudioVolume(self.volume.into())?; - self.synth.SetVoice(&self.voice)?; - let stream = self - .synth - .SynthesizeTextToStreamAsync(&text.into())? - .get()?; - let content_type = stream.ContentType()?; - let source = MediaSource::CreateFromStream(&stream, &content_type)?; - self.player.SetSource(&source)?; - self.player.Play()?; - let mut callbacks = CALLBACKS.lock().unwrap(); - let callbacks = callbacks.get_mut(&self.id).unwrap(); - if let Some(callback) = callbacks.utterance_begin.as_mut() { - callback(utterance_id); - } + + let utterance = Utterance { + id: utterance_id, + text: text.to_string(), + rate: self.rate, + pitch: self.pitch, + volume: self.volume, + voice: self.voice.clone(), + }; + + if !self.is_speaking()? { + utterance.speak( + &self.synth, + &self.player, + CALLBACKS.lock().unwrap().get_mut(&self.id).unwrap(), + )?; } + + self.utterances.lock().unwrap().push_back(utterance); Ok(Some(utterance_id)) } @@ -216,19 +191,16 @@ impl Backend for WinRt { if !self.is_speaking()? { return Ok(()); } - let mut utterances = UTTERANCES.lock().unwrap(); - if let Some(utterances) = utterances.get(&self.id) { - let mut callbacks = CALLBACKS.lock().unwrap(); - let callbacks = callbacks.get_mut(&self.id).unwrap(); - if let Some(callback) = callbacks.utterance_stop.as_mut() { - for utterance in utterances { - callback(utterance.id); - } + let mut utterances = self.utterances.lock().unwrap(); + let mut callbacks = CALLBACKS.lock().unwrap(); + let callbacks = callbacks.get_mut(&self.id).unwrap(); + if let Some(callback) = callbacks.utterance_stop.as_mut() { + let utterances = utterances.iter(); + for utterance in utterances { + callback(utterance.id); } } - if let Some(utterances) = utterances.get_mut(&self.id) { - utterances.clear(); - } + utterances.clear(); self.player.Pause()?; Ok(()) } @@ -246,8 +218,7 @@ impl Backend for WinRt { } fn get_rate(&self) -> std::result::Result { - let rate = self.synth.Options()?.SpeakingRate()?; - Ok(rate as f32) + Ok(self.rate) } fn set_rate(&mut self, rate: f32) -> std::result::Result<(), Error> { @@ -268,8 +239,7 @@ impl Backend for WinRt { } fn get_pitch(&self) -> std::result::Result { - let pitch = self.synth.Options()?.AudioPitch()?; - Ok(pitch as f32) + Ok(self.pitch) } fn set_pitch(&mut self, pitch: f32) -> std::result::Result<(), Error> { @@ -290,8 +260,7 @@ impl Backend for WinRt { } fn get_volume(&self) -> std::result::Result { - let volume = self.synth.Options()?.AudioVolume()?; - Ok(volume as f32) + Ok(self.volume) } fn set_volume(&mut self, volume: f32) -> std::result::Result<(), Error> { @@ -300,21 +269,17 @@ impl Backend for WinRt { } fn is_speaking(&self) -> std::result::Result { - let utterances = UTTERANCES.lock().unwrap(); - let utterances = utterances.get(&self.id).unwrap(); - Ok(!utterances.is_empty()) + Ok(!self.utterances.lock().unwrap().is_empty()) } fn voice(&self) -> Result, Error> { - let voice = self.synth.Voice()?; - let voice = voice.try_into()?; - Ok(Some(voice)) + Ok(Some((&self.voice).try_into()?)) } fn voices(&self) -> Result, Error> { let mut rv: Vec = vec![]; for voice in SpeechSynthesizer::AllVoices()? { - rv.push(voice.try_into()?); + rv.push((&voice).try_into()?); } Ok(rv) } @@ -331,19 +296,7 @@ impl Backend for WinRt { } } -impl Drop for WinRt { - fn drop(&mut self) { - let id = self.id; - let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); - backend_to_media_player.remove(&id); - let mut backend_to_speech_synthesizer = BACKEND_TO_SPEECH_SYNTHESIZER.lock().unwrap(); - backend_to_speech_synthesizer.remove(&id); - let mut utterances = UTTERANCES.lock().unwrap(); - utterances.remove(&id); - } -} - -impl TryInto for VoiceInformation { +impl TryInto for &VoiceInformation { type Error = Error; fn try_into(self) -> Result {