diff --git a/src/backends/android.rs b/src/backends/android.rs index 3f6d422..bc54ad4 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -250,6 +250,7 @@ impl Backend for Android { utterance_callbacks: true, voice: false, get_voice: false, + synthesize: false, } } @@ -284,6 +285,10 @@ impl Backend for Android { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { let vm = Self::vm()?; let env = vm.get_env()?; diff --git a/src/backends/appkit.rs b/src/backends/appkit.rs index cf1375a..ddc5c22 100644 --- a/src/backends/appkit.rs +++ b/src/backends/appkit.rs @@ -123,6 +123,10 @@ impl Backend for AppKit { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/av_foundation.rs b/src/backends/av_foundation.rs index a64d9b4..69379c4 100644 --- a/src/backends/av_foundation.rs +++ b/src/backends/av_foundation.rs @@ -169,6 +169,7 @@ impl Backend for AvFoundation { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -205,6 +206,10 @@ impl Backend for AvFoundation { Ok(Some(UtteranceId::AvFoundation(utterance))) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/speech_dispatcher.rs b/src/backends/speech_dispatcher.rs index b3034a1..b91bd3b 100644 --- a/src/backends/speech_dispatcher.rs +++ b/src/backends/speech_dispatcher.rs @@ -85,6 +85,7 @@ impl Backend for SpeechDispatcher { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -108,6 +109,10 @@ impl Backend for SpeechDispatcher { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.cancel()?; diff --git a/src/backends/tolk.rs b/src/backends/tolk.rs index 056d75e..408c1f5 100644 --- a/src/backends/tolk.rs +++ b/src/backends/tolk.rs @@ -39,6 +39,10 @@ impl Backend for Tolk { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.silence(); diff --git a/src/backends/web.rs b/src/backends/web.rs index 578a213..e1c140f 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -59,6 +59,7 @@ impl Backend for Web { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: false, } } @@ -121,6 +122,10 @@ impl Backend for Web { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); if let Some(window) = web_sys::window() { diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 70d765f..7c62597 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -13,8 +13,11 @@ use windows::{ Media::{ Core::MediaSource, Playback::{MediaPlayer, MediaPlayerAudioCategory}, - SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation}, + SpeechSynthesis::{ + SpeechSynthesisStream, SpeechSynthesizer, VoiceGender, VoiceInformation, + }, }, + Storage::Streams::DataReader, }; use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; @@ -139,6 +142,20 @@ impl WinRt { voice: SpeechSynthesizer::DefaultVoice()?, }) } + + fn create_synthesis_stream(&mut self, text: &str) -> Result { + self.synth.Options()?.SetSpeakingRate(self.rate.into())?; + self.synth.Options()?.SetAudioPitch(self.pitch.into())?; + self.synth.Options()?.SetAudioVolume(self.volume.into())?; + + self.synth.SetVoice(&self.voice)?; + let stream = self + .synth + .SynthesizeTextToStreamAsync(&text.into())? + .get()?; + + Ok(stream) + } } impl Backend for WinRt { @@ -156,6 +173,7 @@ impl Backend for WinRt { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: true, } } @@ -190,18 +208,12 @@ impl Backend for WinRt { } } if no_utterances { - self.synth.Options()?.SetSpeakingRate(self.rate.into())?; - self.synth.Options()?.SetAudioPitch(self.pitch.into())?; - self.synth.Options()?.SetAudioVolume(self.volume.into())?; - self.synth.SetVoice(&self.voice)?; - let stream = self - .synth - .SynthesizeTextToStreamAsync(&text.into())? - .get()?; - let content_type = stream.ContentType()?; - let source = MediaSource::CreateFromStream(&stream, &content_type)?; - self.player.SetSource(&source)?; + let stream = self.create_synthesis_stream(text)?; + + let media_source = MediaSource::CreateFromStream(&stream, &stream.ContentType()?)?; + self.player.SetSource(&media_source)?; self.player.Play()?; + let mut callbacks = CALLBACKS.lock().unwrap(); let callbacks = callbacks.get_mut(&self.id).unwrap(); if let Some(callback) = callbacks.utterance_begin.as_mut() { @@ -211,6 +223,18 @@ impl Backend for WinRt { Ok(Some(utterance_id)) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + let stream = self.create_synthesis_stream(text)?; + + let size = stream.Size()?; + let data_reader = DataReader::CreateDataReader(&stream.GetInputStreamAt(0)?)?; + let mut bytes = vec![0; size as usize]; + data_reader.LoadAsync(size as u32)?; + data_reader.ReadBytes(&mut bytes)?; + + Ok(bytes) + } + fn stop(&mut self) -> std::result::Result<(), Error> { trace!("stop()"); if !self.is_speaking()? { diff --git a/src/lib.rs b/src/lib.rs index 8a8df57..9a6a55f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -162,6 +162,7 @@ unsafe impl Sync for UtteranceId {} #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Features { pub is_speaking: bool, + pub synthesize: bool, pub pitch: bool, pub rate: bool, pub stop: bool, @@ -217,6 +218,7 @@ pub trait Backend: Clone { fn id(&self) -> Option; fn supported_features(&self) -> Features; fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error>; + fn synthesize(&mut self, text: &str) -> Result, Error>; fn stop(&mut self) -> Result<(), Error>; fn min_rate(&self) -> f32; fn max_rate(&self) -> f32; @@ -373,6 +375,15 @@ impl Tts { .speak(text.into().as_str(), interrupt) } + pub fn synthesize>(&mut self, text: S) -> Result, Error> { + let Features { synthesize, .. } = self.supported_features(); + if synthesize { + self.0.write().unwrap().synthesize(text.into().as_str()) + } else { + Err(Error::UnsupportedFeature) + } + } + /// Stops current speech. pub fn stop(&mut self) -> Result<&Self, Error> { let Features { stop, .. } = self.supported_features();