From bf522b42a72e9de863706d44e8468d487f955746 Mon Sep 17 00:00:00 2001 From: Bear-03 <64696287+Bear-03@users.noreply.github.com> Date: Fri, 22 Jul 2022 17:29:48 +0200 Subject: [PATCH 1/5] Prepare Backend and Features for synthesis --- src/backends/android.rs | 5 +++++ src/backends/appkit.rs | 4 ++++ src/backends/av_foundation.rs | 5 +++++ src/backends/speech_dispatcher.rs | 5 +++++ src/backends/tolk.rs | 4 ++++ src/backends/web.rs | 5 +++++ src/backends/winrt.rs | 5 +++++ src/lib.rs | 2 ++ 8 files changed, 35 insertions(+) diff --git a/src/backends/android.rs b/src/backends/android.rs index 3f6d422..bc54ad4 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -250,6 +250,7 @@ impl Backend for Android { utterance_callbacks: true, voice: false, get_voice: false, + synthesize: false, } } @@ -284,6 +285,10 @@ impl Backend for Android { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { let vm = Self::vm()?; let env = vm.get_env()?; diff --git a/src/backends/appkit.rs b/src/backends/appkit.rs index cf1375a..ddc5c22 100644 --- a/src/backends/appkit.rs +++ b/src/backends/appkit.rs @@ -123,6 +123,10 @@ impl Backend for AppKit { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/av_foundation.rs b/src/backends/av_foundation.rs index a64d9b4..69379c4 100644 --- a/src/backends/av_foundation.rs +++ b/src/backends/av_foundation.rs @@ -169,6 +169,7 @@ impl Backend for AvFoundation { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -205,6 +206,10 @@ impl Backend for AvFoundation { Ok(Some(UtteranceId::AvFoundation(utterance))) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); unsafe { diff --git a/src/backends/speech_dispatcher.rs b/src/backends/speech_dispatcher.rs index b3034a1..b91bd3b 100644 --- a/src/backends/speech_dispatcher.rs +++ b/src/backends/speech_dispatcher.rs @@ -85,6 +85,7 @@ impl Backend for SpeechDispatcher { voice: true, get_voice: false, utterance_callbacks: true, + synthesize: false, } } @@ -108,6 +109,10 @@ impl Backend for SpeechDispatcher { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.cancel()?; diff --git a/src/backends/tolk.rs b/src/backends/tolk.rs index 056d75e..408c1f5 100644 --- a/src/backends/tolk.rs +++ b/src/backends/tolk.rs @@ -39,6 +39,10 @@ impl Backend for Tolk { Ok(None) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); self.0.silence(); diff --git a/src/backends/web.rs b/src/backends/web.rs index 578a213..e1c140f 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -59,6 +59,7 @@ impl Backend for Web { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: false, } } @@ -121,6 +122,10 @@ impl Backend for Web { } } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!(); + } + fn stop(&mut self) -> Result<(), Error> { trace!("stop()"); if let Some(window) = web_sys::window() { diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index ceb5f00..85fb1ae 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -156,6 +156,7 @@ impl Backend for WinRt { voice: true, get_voice: true, utterance_callbacks: true, + synthesize: false, } } @@ -207,6 +208,10 @@ impl Backend for WinRt { Ok(Some(utterance_id)) } + fn synthesize(&mut self, text: &str) -> Result, Error> { + unimplemented!() + } + fn stop(&mut self) -> std::result::Result<(), Error> { trace!("stop()"); if !self.is_speaking()? { diff --git a/src/lib.rs b/src/lib.rs index 76c7b94..98b723e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -162,6 +162,7 @@ unsafe impl Sync for UtteranceId {} #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Features { pub is_speaking: bool, + pub synthesize: bool, pub pitch: bool, pub rate: bool, pub stop: bool, @@ -217,6 +218,7 @@ pub trait Backend: Clone { fn id(&self) -> Option; fn supported_features(&self) -> Features; fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error>; + fn synthesize(&mut self, text: &str) -> Result, Error>; fn stop(&mut self) -> Result<(), Error>; fn min_rate(&self) -> f32; fn max_rate(&self) -> f32; From b85ffc80d4012393dd34a827bf190a9a26b0eb0f Mon Sep 17 00:00:00 2001 From: Bear-03 <64696287+Bear-03@users.noreply.github.com> Date: Fri, 22 Jul 2022 17:31:14 +0200 Subject: [PATCH 2/5] Add synthesis support to WinRT --- src/backends/winrt.rs | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 85fb1ae..6a0437f 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -9,12 +9,14 @@ use lazy_static::lazy_static; use log::{info, trace}; use unic_langid::LanguageIdentifier; use windows::{ + core::HSTRING, Foundation::TypedEventHandler, Media::{ Core::MediaSource, Playback::{MediaPlayer, MediaPlayerAudioCategory}, SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation}, }, + Storage::Streams::{DataReader, DataWriter, InMemoryRandomAccessStream}, }; use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; @@ -139,6 +141,22 @@ impl WinRt { voice: SpeechSynthesizer::DefaultVoice()?, }) } + + fn synthesize_with_content_type(&mut self, text: &str) -> Result<(Vec, HSTRING), Error> { + self.synth.Options()?.SetSpeakingRate(self.rate.into())?; + self.synth.Options()?.SetAudioPitch(self.pitch.into())?; + self.synth.Options()?.SetAudioVolume(self.volume.into())?; + + let synth_stream = self.synth.SynthesizeTextToStreamAsync(text)?.get()?; + + let size = synth_stream.Size()?; + let data_reader = DataReader::CreateDataReader(synth_stream.GetInputStreamAt(0)?)?; + let mut bytes = vec![0; size as usize]; + data_reader.LoadAsync(size as u32)?.get()?; + data_reader.ReadBytes(&mut bytes)?; + + Ok((bytes, synth_stream.ContentType()?)) + } } impl Backend for WinRt { @@ -156,7 +174,7 @@ impl Backend for WinRt { voice: true, get_voice: true, utterance_callbacks: true, - synthesize: false, + synthesize: true, } } @@ -191,14 +209,18 @@ impl Backend for WinRt { } } if no_utterances { - self.synth.Options()?.SetSpeakingRate(self.rate.into())?; - self.synth.Options()?.SetAudioPitch(self.pitch.into())?; - self.synth.Options()?.SetAudioVolume(self.volume.into())?; - let stream = self.synth.SynthesizeTextToStreamAsync(text)?.get()?; - let content_type = stream.ContentType()?; + let (bytes, content_type) = self.synthesize_with_content_type(text)?; + + let stream = InMemoryRandomAccessStream::new()?; + let data_writer = DataWriter::CreateDataWriter(&stream)?; + data_writer.WriteBytes(&bytes)?; + data_writer.StoreAsync()?; + data_writer.FlushAsync()?; + let source = MediaSource::CreateFromStream(stream, content_type)?; - self.player.SetSource(source)?; + self.player.SetSource(&source)?; self.player.Play()?; + let mut callbacks = CALLBACKS.lock().unwrap(); let callbacks = callbacks.get_mut(&self.id).unwrap(); if let Some(callback) = callbacks.utterance_begin.as_mut() { @@ -209,7 +231,8 @@ impl Backend for WinRt { } fn synthesize(&mut self, text: &str) -> Result, Error> { - unimplemented!() + let (bytes, _) = self.synthesize_with_content_type(text)?; + Ok(bytes) } fn stop(&mut self) -> std::result::Result<(), Error> { From 87cf05f78edfcfd264dd6cdfd314e65aeeba0301 Mon Sep 17 00:00:00 2001 From: Bear-03 <64696287+Bear-03@users.noreply.github.com> Date: Fri, 22 Jul 2022 18:03:01 +0200 Subject: [PATCH 3/5] Fix errors on stable toolchain --- src/backends/winrt.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 901cda6..6fbf4ff 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -148,10 +148,13 @@ impl WinRt { self.synth.Options()?.SetAudioVolume(self.volume.into())?; self.synth.SetVoice(&self.voice)?; - let synth_stream = self.synth.SynthesizeTextToStreamAsync(text)?.get()?; + let synth_stream = self + .synth + .SynthesizeTextToStreamAsync(&text.into())? + .get()?; let size = synth_stream.Size()?; - let data_reader = DataReader::CreateDataReader(synth_stream.GetInputStreamAt(0)?)?; + let data_reader = DataReader::CreateDataReader(&synth_stream.GetInputStreamAt(0)?)?; let mut bytes = vec![0; size as usize]; data_reader.LoadAsync(size as u32)?.get()?; data_reader.ReadBytes(&mut bytes)?; @@ -218,7 +221,7 @@ impl Backend for WinRt { data_writer.StoreAsync()?; data_writer.FlushAsync()?; - let source = MediaSource::CreateFromStream(stream, content_type)?; + let source = MediaSource::CreateFromStream(&stream, &content_type)?; self.player.SetSource(&source)?; self.player.Play()?; From 91a0f03f1af86a97e6410116eaad144b947e76c5 Mon Sep 17 00:00:00 2001 From: Bear-03 <64696287+Bear-03@users.noreply.github.com> Date: Sat, 23 Jul 2022 14:03:45 +0200 Subject: [PATCH 4/5] Remove InMemoryRandomAccessStream in WinRt::speak --- src/backends/winrt.rs | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 6fbf4ff..7c62597 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -9,14 +9,15 @@ use lazy_static::lazy_static; use log::{info, trace}; use unic_langid::LanguageIdentifier; use windows::{ - core::HSTRING, Foundation::TypedEventHandler, Media::{ Core::MediaSource, Playback::{MediaPlayer, MediaPlayerAudioCategory}, - SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation}, + SpeechSynthesis::{ + SpeechSynthesisStream, SpeechSynthesizer, VoiceGender, VoiceInformation, + }, }, - Storage::Streams::{DataReader, DataWriter, InMemoryRandomAccessStream}, + Storage::Streams::DataReader, }; use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; @@ -142,24 +143,18 @@ impl WinRt { }) } - fn synthesize_with_content_type(&mut self, text: &str) -> Result<(Vec, HSTRING), Error> { + fn create_synthesis_stream(&mut self, text: &str) -> Result { self.synth.Options()?.SetSpeakingRate(self.rate.into())?; self.synth.Options()?.SetAudioPitch(self.pitch.into())?; self.synth.Options()?.SetAudioVolume(self.volume.into())?; self.synth.SetVoice(&self.voice)?; - let synth_stream = self + let stream = self .synth .SynthesizeTextToStreamAsync(&text.into())? .get()?; - let size = synth_stream.Size()?; - let data_reader = DataReader::CreateDataReader(&synth_stream.GetInputStreamAt(0)?)?; - let mut bytes = vec![0; size as usize]; - data_reader.LoadAsync(size as u32)?.get()?; - data_reader.ReadBytes(&mut bytes)?; - - Ok((bytes, synth_stream.ContentType()?)) + Ok(stream) } } @@ -213,16 +208,10 @@ impl Backend for WinRt { } } if no_utterances { - let (bytes, content_type) = self.synthesize_with_content_type(text)?; + let stream = self.create_synthesis_stream(text)?; - let stream = InMemoryRandomAccessStream::new()?; - let data_writer = DataWriter::CreateDataWriter(&stream)?; - data_writer.WriteBytes(&bytes)?; - data_writer.StoreAsync()?; - data_writer.FlushAsync()?; - - let source = MediaSource::CreateFromStream(&stream, &content_type)?; - self.player.SetSource(&source)?; + let media_source = MediaSource::CreateFromStream(&stream, &stream.ContentType()?)?; + self.player.SetSource(&media_source)?; self.player.Play()?; let mut callbacks = CALLBACKS.lock().unwrap(); @@ -235,7 +224,14 @@ impl Backend for WinRt { } fn synthesize(&mut self, text: &str) -> Result, Error> { - let (bytes, _) = self.synthesize_with_content_type(text)?; + let stream = self.create_synthesis_stream(text)?; + + let size = stream.Size()?; + let data_reader = DataReader::CreateDataReader(&stream.GetInputStreamAt(0)?)?; + let mut bytes = vec![0; size as usize]; + data_reader.LoadAsync(size as u32)?; + data_reader.ReadBytes(&mut bytes)?; + Ok(bytes) } From 4f4ab5325255f73c54306328da82be51455fc6c8 Mon Sep 17 00:00:00 2001 From: Bear-03 <64696287+Bear-03@users.noreply.github.com> Date: Sat, 23 Jul 2022 14:04:15 +0200 Subject: [PATCH 5/5] Add Tts::synthesize method --- src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 96fdc6a..9a6a55f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -375,6 +375,15 @@ impl Tts { .speak(text.into().as_str(), interrupt) } + pub fn synthesize>(&mut self, text: S) -> Result, Error> { + let Features { synthesize, .. } = self.supported_features(); + if synthesize { + self.0.write().unwrap().synthesize(text.into().as_str()) + } else { + Err(Error::UnsupportedFeature) + } + } + /// Stops current speech. pub fn stop(&mut self) -> Result<&Self, Error> { let Features { stop, .. } = self.supported_features();