Add voice support to WinRT backend.

This commit is contained in:
Nolan Darilek 2022-03-30 20:13:27 -05:00
parent 51cd84a6cd
commit b1f60811bf
5 changed files with 78 additions and 26 deletions

View File

@ -27,7 +27,7 @@ env_logger = "0.9"
[target.'cfg(windows)'.dependencies] [target.'cfg(windows)'.dependencies]
tolk = { version = "0.5", optional = true } tolk = { version = "0.5", optional = true }
windows = { version = "0.34", features = ["alloc", "Foundation", "Media_Core", "Media_Playback", "Media_SpeechSynthesis", "Storage_Streams"] } windows = { version = "0.34", features = ["alloc", "Foundation", "Foundation_Collections", "Media_Core", "Media_Playback", "Media_SpeechSynthesis", "Storage_Streams"] }
[target.'cfg(target_os = "linux")'.dependencies] [target.'cfg(target_os = "linux")'.dependencies]
speech-dispatcher = { version = "0.13", default-features = false } speech-dispatcher = { version = "0.13", default-features = false }

View File

@ -71,19 +71,22 @@ fn main() -> Result<(), Error> {
tts.speak("This is normal volume.", false)?; tts.speak("This is normal volume.", false)?;
tts.set_volume(original_volume)?; tts.set_volume(original_volume)?;
} }
let Features { voices, .. } = tts.supported_features(); let Features { voice, .. } = tts.supported_features();
if voices { if voice {
let original_voice = tts.voice()?; let voices = tts.voices()?;
let voices_list = tts.list_voices();
println!("Available voices:\n==="); println!("Available voices:\n===");
for v in voices_list.iter() { for v in &voices {
println!("{}", v); println!("{:?}", v);
tts.set_voice(v)?; }
println!("voice set"); let Features { get_voice, .. } = tts.supported_features();
println!("{}", tts.voice()?); if get_voice {
tts.speak(v, false)?; let original_voice = tts.voice()?;
for v in &voices {
tts.set_voice(v)?;
tts.speak(format!("This is {}.", v.name), false)?;
}
tts.set_voice(&original_voice)?;
} }
tts.set_voice(original_voice)?;
} }
tts.speak("Goodbye.", false)?; tts.speak("Goodbye.", false)?;
let mut _input = String::new(); let mut _input = String::new();

View File

@ -198,7 +198,7 @@ impl Backend for SpeechDispatcher {
Ok(rv) Ok(rv)
} }
fn voice(&self) -> Result<String, Error> { fn voice(&self) -> Result<Voice, Error> {
unimplemented!() unimplemented!()
} }

View File

@ -1,19 +1,23 @@
#[cfg(windows)] #[cfg(windows)]
use std::collections::{HashMap, VecDeque}; use std::{
use std::sync::Mutex; collections::{HashMap, VecDeque},
str::FromStr,
sync::Mutex,
};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::{info, trace}; use log::{info, trace};
use unic_langid::LanguageIdentifier;
use windows::{ use windows::{
Foundation::TypedEventHandler, Foundation::TypedEventHandler,
Media::{ Media::{
Core::MediaSource, Core::MediaSource,
Playback::{MediaPlayer, MediaPlayerAudioCategory}, Playback::{MediaPlayer, MediaPlayerAudioCategory},
SpeechSynthesis::SpeechSynthesizer, SpeechSynthesis::{SpeechSynthesizer, VoiceGender, VoiceInformation},
}, },
}; };
use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; use crate::{Backend, BackendId, Error, Features, Gender, UtteranceId, Voice, CALLBACKS};
impl From<windows::core::Error> for Error { impl From<windows::core::Error> for Error {
fn from(e: windows::core::Error) -> Self { fn from(e: windows::core::Error) -> Self {
@ -29,6 +33,7 @@ pub struct WinRt {
rate: f32, rate: f32,
pitch: f32, pitch: f32,
volume: f32, volume: f32,
voice: VoiceInformation,
} }
struct Utterance { struct Utterance {
@ -37,6 +42,7 @@ struct Utterance {
rate: f32, rate: f32,
pitch: f32, pitch: f32,
volume: f32, volume: f32,
voice: VoiceInformation,
} }
lazy_static! { lazy_static! {
@ -102,6 +108,7 @@ impl WinRt {
tts.Options()?.SetSpeakingRate(utterance.rate.into())?; tts.Options()?.SetSpeakingRate(utterance.rate.into())?;
tts.Options()?.SetAudioPitch(utterance.pitch.into())?; tts.Options()?.SetAudioPitch(utterance.pitch.into())?;
tts.Options()?.SetAudioVolume(utterance.volume.into())?; tts.Options()?.SetAudioVolume(utterance.volume.into())?;
tts.SetVoice(utterance.voice.clone())?;
let stream = tts let stream = tts
.SynthesizeTextToStreamAsync(utterance.text.as_str())? .SynthesizeTextToStreamAsync(utterance.text.as_str())?
.get()?; .get()?;
@ -129,6 +136,7 @@ impl WinRt {
rate: 1., rate: 1.,
pitch: 1., pitch: 1.,
volume: 1., volume: 1.,
voice: SpeechSynthesizer::DefaultVoice()?,
}) })
} }
} }
@ -145,7 +153,8 @@ impl Backend for WinRt {
pitch: true, pitch: true,
volume: true, volume: true,
is_speaking: true, is_speaking: true,
voices: true, voice: true,
get_voice: true,
utterance_callbacks: true, utterance_callbacks: true,
} }
} }
@ -175,6 +184,7 @@ impl Backend for WinRt {
rate: self.rate, rate: self.rate,
pitch: self.pitch, pitch: self.pitch,
volume: self.volume, volume: self.volume,
voice: self.voice.clone(),
}; };
utterances.push_back(utterance); utterances.push_back(utterance);
} }
@ -291,16 +301,28 @@ impl Backend for WinRt {
Ok(!utterances.is_empty()) Ok(!utterances.is_empty())
} }
fn voice(&self) -> Result<String, Error> { fn voice(&self) -> Result<Voice, Error> {
unimplemented!() let voice = self.synth.Voice()?;
voice.try_into()
} }
fn list_voices(&self) -> Vec<String> { fn voices(&self) -> Result<Vec<Voice>, Error> {
unimplemented!() let mut rv: Vec<Voice> = vec![];
for voice in SpeechSynthesizer::AllVoices()? {
rv.push(voice.try_into()?);
}
Ok(rv)
} }
fn set_voice(&mut self, voice: &str) -> Result<(), Error> { fn set_voice(&mut self, voice: &Voice) -> Result<(), Error> {
unimplemented!() for v in SpeechSynthesizer::AllVoices()? {
let vid: String = v.Id()?.try_into()?;
if vid == voice.id {
self.voice = v.clone();
return Ok(());
}
}
Err(Error::OperationFailed)
} }
} }
@ -315,3 +337,24 @@ impl Drop for WinRt {
utterances.remove(&id); utterances.remove(&id);
} }
} }
impl TryInto<Voice> for VoiceInformation {
type Error = Error;
fn try_into(self) -> Result<Voice, Self::Error> {
let gender = self.Gender()?;
let gender = if gender == VoiceGender::Male {
Gender::Male
} else {
Gender::Female
};
let language: String = self.Language()?.try_into()?;
let language = LanguageIdentifier::from_str(&language).unwrap();
Ok(Voice {
id: self.Id()?.try_into()?,
name: self.DisplayName()?.try_into()?,
gender,
language,
})
}
}

View File

@ -16,6 +16,7 @@ use std::collections::HashMap;
#[cfg(target_os = "macos")] #[cfg(target_os = "macos")]
use std::ffi::CStr; use std::ffi::CStr;
use std::fmt; use std::fmt;
use std::string::FromUtf16Error;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::{boxed::Box, sync::RwLock}; use std::{boxed::Box, sync::RwLock};
@ -200,6 +201,9 @@ pub enum Error {
#[cfg(windows)] #[cfg(windows)]
#[error("WinRT error")] #[error("WinRT error")]
WinRt(windows::core::Error), WinRt(windows::core::Error),
#[cfg(windows)]
#[error("UTF string conversion failed")]
UtfStringConversionFailed(#[from] FromUtf16Error),
#[error("Unsupported feature")] #[error("Unsupported feature")]
UnsupportedFeature, UnsupportedFeature,
#[error("Out of range")] #[error("Out of range")]
@ -232,7 +236,7 @@ pub trait Backend: Clone {
fn set_volume(&mut self, volume: f32) -> Result<(), Error>; fn set_volume(&mut self, volume: f32) -> Result<(), Error>;
fn is_speaking(&self) -> Result<bool, Error>; fn is_speaking(&self) -> Result<bool, Error>;
fn voices(&self) -> Result<Vec<Voice>, Error>; fn voices(&self) -> Result<Vec<Voice>, Error>;
fn voice(&self) -> Result<String, Error>; fn voice(&self) -> Result<Voice, Error>;
fn set_voice(&mut self, voice: &Voice) -> Result<(), Error>; fn set_voice(&mut self, voice: &Voice) -> Result<(), Error>;
} }
@ -577,7 +581,7 @@ impl Tts {
/** /**
* Return the current speaking voice. * Return the current speaking voice.
*/ */
pub fn voice(&self) -> Result<String, Error> { pub fn voice(&self) -> Result<Voice, Error> {
let Features { get_voice, .. } = self.supported_features(); let Features { get_voice, .. } = self.supported_features();
if get_voice { if get_voice {
self.0.read().unwrap().voice() self.0.read().unwrap().voice()
@ -697,12 +701,14 @@ impl Drop for Tts {
} }
} }
#[derive(Debug)]
pub enum Gender { pub enum Gender {
Unspecified, Unspecified,
Male, Male,
Female, Female,
} }
#[derive(Debug)]
pub struct Voice { pub struct Voice {
pub id: String, pub id: String,
pub name: String, pub name: String,