diff --git a/src/backends/android.rs b/src/backends/android.rs index 2d54d80..c06dae5 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -250,6 +250,7 @@ impl Backend for Android { utterance_callbacks: true, voice: false, get_voice: false, + synthesize: false, } } @@ -284,6 +285,10 @@ impl Backend for Android { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { let vm = Self::vm()?; let mut env = vm.get_env()?; diff --git a/src/backends/appkit.rs b/src/backends/appkit.rs index f8a02b7..7157767 100644 --- a/src/backends/appkit.rs +++ b/src/backends/appkit.rs @@ -125,6 +125,10 @@ impl Backend for AppKit { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/av_foundation.rs b/src/backends/av_foundation.rs index 72e2aa5..4e383ab 100644 --- a/src/backends/av_foundation.rs +++ b/src/backends/av_foundation.rs @@ -170,6 +170,7 @@ impl Backend for AvFoundation { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -206,6 +207,10 @@ impl Backend for AvFoundation { Ok(Some(UtteranceId::AvFoundation(utterance))) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/speech_dispatcher.rs b/src/backends/speech_dispatcher.rs index 81aff57..9fd13d9 100644 --- a/src/backends/speech_dispatcher.rs +++ b/src/backends/speech_dispatcher.rs @@ -85,6 +85,7 @@ impl Backend for SpeechDispatcher { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -108,6 +109,10 @@ impl Backend for SpeechDispatcher { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.cancel()?; diff --git a/src/backends/tolk.rs b/src/backends/tolk.rs index 056d75e..408c1f5 100644 --- a/src/backends/tolk.rs +++ b/src/backends/tolk.rs @@ -39,6 +39,10 @@ impl Backend for Tolk { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.silence(); diff --git a/src/backends/web.rs b/src/backends/web.rs index 095cbf9..27501f3 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -59,6 +59,7 @@ impl Backend for Web { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: false, } } @@ -121,6 +122,10 @@ impl Backend for Web { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); if let Some(window) = web_sys::window() { diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 009243c..f4c3653 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -12,8 +12,11 @@ use windows::{ Media::{ Core::MediaSource, Playback::{MediaPlayer, MediaPlayerAudioCategory}, - SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation}, + SpeechSynthesis::{ + SpeechSynthesisStream, SpeechSynthesizer, VoiceGender, VoiceInformation, + }, }, + Storage::Streams::DataReader, }; use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; @@ -138,6 +141,20 @@ impl WinRt { voice: SpeechSynthesizer::DefaultVoice()?, }) } + + fn create_synthesis_stream(&mut self, text: &str) -> Result { + self.synth.Options()?.SetSpeakingRate(self.rate.into())?; + self.synth.Options()?.SetAudioPitch(self.pitch.into())?; + self.synth.Options()?.SetAudioVolume(self.volume.into())?; + + self.synth.SetVoice(&self.voice)?; + let stream = self + .synth + .SynthesizeTextToStreamAsync(&text.into())? + .get()?; + + Ok(stream) + } } impl Backend for WinRt { @@ -155,6 +172,7 @@ impl Backend for WinRt { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: true, } } @@ -189,18 +207,12 @@ impl Backend for WinRt { } } if no_utterances { - self.synth.Options()?.SetSpeakingRate(self.rate.into())?; - self.synth.Options()?.SetAudioPitch(self.pitch.into())?; - self.synth.Options()?.SetAudioVolume(self.volume.into())?; - self.synth.SetVoice(&self.voice)?; - let stream = self - .synth - .SynthesizeTextToStreamAsync(&text.into())? - .get()?; - let content_type = stream.ContentType()?; - let source = MediaSource::CreateFromStream(&stream, &content_type)?; - self.player.SetSource(&source)?; + let stream = self.create_synthesis_stream(text)?; + + let media_source = MediaSource::CreateFromStream(&stream, &stream.ContentType()?)?; + self.player.SetSource(&media_source)?; self.player.Play()?; + let mut callbacks = CALLBACKS.lock().unwrap(); let callbacks = callbacks.get_mut(&self.id).unwrap(); if let Some(callback) = callbacks.utterance_begin.as_mut() { @@ -210,6 +222,18 @@ impl Backend for WinRt { Ok(Some(utterance_id)) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + let stream = self.create_synthesis_stream(text)?; + + let size = stream.Size()?; + let data_reader = DataReader::CreateDataReader(&stream.GetInputStreamAt(0)?)?; + let mut bytes = vec![0; size as usize]; + data_reader.LoadAsync(size as u32)?; + data_reader.ReadBytes(&mut bytes)?; + + Ok(bytes) + } + fn stop(&mut self) -> std::result::Result<(), Error> { trace!("stop()"); if !self.is_speaking()? { diff --git a/src/lib.rs b/src/lib.rs index c754750..34395d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -163,6 +163,7 @@ unsafe impl Sync for UtteranceId {} #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Features { pub is_speaking: bool, + pub synthesize: bool, pub pitch: bool, pub rate: bool, pub stop: bool, @@ -218,6 +219,7 @@ pub trait Backend: Clone { fn id(&self) -> Option; fn supported_features(&self) -> Features; fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error>; + fn synthesize(&mut self, text: &str) -> Result, Error>; fn stop(&mut self) -> Result<(), Error>; fn min_rate(&self) -> f32; fn max_rate(&self) -> f32; @@ -375,6 +377,15 @@ impl Tts { .speak(text.into().as_str(), interrupt) } + pub fn synthesize>(&mut self, text: S) -> Result, Error> { + let Features { synthesize, .. } = self.supported_features(); + if synthesize { + self.0.write().unwrap().synthesize(text.into().as_str()) + } else { + Err(Error::UnsupportedFeature) + } + } + /// Stops current speech. pub fn stop(&mut self) -> Result<&Self, Error> { let Features { stop, .. } = self.supported_features();