From b1f60811bf9369914ebbe057a93bcf7ea942d25c Mon Sep 17 00:00:00 2001 From: Nolan Darilek Date: Wed, 30 Mar 2022 20:13:27 -0500 Subject: [PATCH] Add voice support to WinRT backend. --- Cargo.toml | 2 +- examples/hello_world.rs | 25 ++++++------ src/backends/speech_dispatcher.rs | 2 +- src/backends/winrt.rs | 65 +++++++++++++++++++++++++------ src/lib.rs | 10 ++++- 5 files changed, 78 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 326476a..f47ab74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ env_logger = "0.9" [target.'cfg(windows)'.dependencies] tolk = { version = "0.5", optional = true } -windows = { version = "0.34", features = ["alloc", "Foundation", "Media_Core", "Media_Playback", "Media_SpeechSynthesis", "Storage_Streams"] } +windows = { version = "0.34", features = ["alloc", "Foundation", "Foundation_Collections", "Media_Core", "Media_Playback", "Media_SpeechSynthesis", "Storage_Streams"] } [target.'cfg(target_os = "linux")'.dependencies] speech-dispatcher = { version = "0.13", default-features = false } diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 3885ede..dc70110 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -71,19 +71,22 @@ fn main() -> Result<(), Error> { tts.speak("This is normal volume.", false)?; tts.set_volume(original_volume)?; } - let Features { voices, .. } = tts.supported_features(); - if voices { - let original_voice = tts.voice()?; - let voices_list = tts.list_voices(); + let Features { voice, .. } = tts.supported_features(); + if voice { + let voices = tts.voices()?; println!("Available voices:\n==="); - for v in voices_list.iter() { - println!("{}", v); - tts.set_voice(v)?; - println!("voice set"); - println!("{}", tts.voice()?); - tts.speak(v, false)?; + for v in &voices { + println!("{:?}", v); + } + let Features { get_voice, .. } = tts.supported_features(); + if get_voice { + let original_voice = tts.voice()?; + for v in &voices { + tts.set_voice(v)?; + tts.speak(format!("This is {}.", v.name), false)?; + } + tts.set_voice(&original_voice)?; } - tts.set_voice(original_voice)?; } tts.speak("Goodbye.", false)?; let mut _input = String::new(); diff --git a/src/backends/speech_dispatcher.rs b/src/backends/speech_dispatcher.rs index 23e1e32..1da28d9 100644 --- a/src/backends/speech_dispatcher.rs +++ b/src/backends/speech_dispatcher.rs @@ -198,7 +198,7 @@ impl Backend for SpeechDispatcher { Ok(rv) } - fn voice(&self) -> Result { + fn voice(&self) -> Result { unimplemented!() } diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index dbacbc6..22b992e 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -1,19 +1,23 @@ #[cfg(windows)] -use std::collections::{HashMap, VecDeque}; -use std::sync::Mutex; +use std::{ + collections::{HashMap, VecDeque}, + str::FromStr, + sync::Mutex, +}; use lazy_static::lazy_static; use log::{info, trace}; +use unic_langid::LanguageIdentifier; use windows::{ Foundation::TypedEventHandler, Media::{ Core::MediaSource, Playback::{MediaPlayer, MediaPlayerAudioCategory}, - SpeechSynthesis::SpeechSynthesizer, + SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation}, }, }; -use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; +use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS}; impl From for Error { fn from(e: windows::core::Error) -> Self { @@ -29,6 +33,7 @@ pub struct WinRt { rate: f32, pitch: f32, volume: f32, + voice: VoiceInformation, } struct Utterance { @@ -37,6 +42,7 @@ struct Utterance { rate: f32, pitch: f32, volume: f32, + voice: VoiceInformation, } lazy_static! { @@ -102,6 +108,7 @@ impl WinRt { tts.Options()?.SetSpeakingRate(utterance.rate.into())?; tts.Options()?.SetAudioPitch(utterance.pitch.into())?; tts.Options()?.SetAudioVolume(utterance.volume.into())?; + tts.SetVoice(utterance.voice.clone())?; let stream = tts .SynthesizeTextToStreamAsync(utterance.text.as_str())? .get()?; @@ -129,6 +136,7 @@ impl WinRt { rate: 1., pitch: 1., volume: 1., + voice: SpeechSynthesizer::DefaultVoice()?, }) } } @@ -145,7 +153,8 @@ impl Backend for WinRt { pitch: true, volume: true, is_speaking: true, - voices: true, + voice: true, + get_voice: true, utterance_callbacks: true, } } @@ -175,6 +184,7 @@ impl Backend for WinRt { rate: self.rate, pitch: self.pitch, volume: self.volume, + voice: self.voice.clone(), }; utterances.push_back(utterance); } @@ -291,16 +301,28 @@ impl Backend for WinRt { Ok(!utterances.is_empty()) } - fn voice(&self) -> Result { - unimplemented!() + fn voice(&self) -> Result { + let voice = self.synth.Voice()?; + voice.try_into() } - fn list_voices(&self) -> Vec { - unimplemented!() + fn voices(&self) -> Result, Error> { + let mut rv: Vec = vec![]; + for voice in SpeechSynthesizer::AllVoices()? { + rv.push(voice.try_into()?); + } + Ok(rv) } - fn set_voice(&mut self, voice: &str) -> Result<(), Error> { - unimplemented!() + fn set_voice(&mut self, voice: &Voice) -> Result<(), Error> { + for v in SpeechSynthesizer::AllVoices()? { + let vid: String = v.Id()?.try_into()?; + if vid == voice.id { + self.voice = v.clone(); + return Ok(()); + } + } + Err(Error::OperationFailed) } } @@ -315,3 +337,24 @@ impl Drop for WinRt { utterances.remove(&id); } } + +impl TryInto for VoiceInformation { + type Error = Error; + + fn try_into(self) -> Result { + let gender = self.Gender()?; + let gender = if gender == VoiceGender::Male { + Gender::Male + } else { + Gender::Female + }; + let language: String = self.Language()?.try_into()?; + let language = LanguageIdentifier::from_str(&language).unwrap(); + Ok(Voice { + id: self.Id()?.try_into()?, + name: self.DisplayName()?.try_into()?, + gender, + language, + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index ee86440..dfb9a8d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; #[cfg(target_os = "macos")] use std::ffi::CStr; use std::fmt; +use std::string::FromUtf16Error; use std::sync::{Arc, Mutex}; use std::{boxed::Box, sync::RwLock}; @@ -200,6 +201,9 @@ pub enum Error { #[cfg(windows)] #[error("WinRT error")] WinRt(windows::core::Error), + #[cfg(windows)] + #[error("UTF string conversion failed")] + UtfStringConversionFailed(#[from] FromUtf16Error), #[error("Unsupported feature")] UnsupportedFeature, #[error("Out of range")] @@ -232,7 +236,7 @@ pub trait Backend: Clone { fn set_volume(&mut self, volume: f32) -> Result<(), Error>; fn is_speaking(&self) -> Result; fn voices(&self) -> Result, Error>; - fn voice(&self) -> Result; + fn voice(&self) -> Result; fn set_voice(&mut self, voice: &Voice) -> Result<(), Error>; } @@ -577,7 +581,7 @@ impl Tts { /** * Return the current speaking voice. */ - pub fn voice(&self) -> Result { + pub fn voice(&self) -> Result { let Features { get_voice, .. } = self.supported_features(); if get_voice { self.0.read().unwrap().voice() @@ -697,12 +701,14 @@ impl Drop for Tts { } } +#[derive(Debug)] pub enum Gender { Unspecified, Male, Female, } +#[derive(Debug)] pub struct Voice { pub id: String, pub name: String,