Merge pull request #2 from francois-caddet/feature/voices

Add voices feature
2024-11-17 13:09:37 +00:00 · 2022-03-30 10:17:32 -05:00 · 2022-03-30 10:17:32 -05:00 · 6057d9c968
commit 6057d9c968
parent acecb1f362 88f4598ec6
12 changed files with 260 additions and 2 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,6 +8,15 @@ license = "MIT"
 exclude = ["*.cfg", "*.yml"]
 edition = "2021"

+[package.metadata.patch.speech-dispatcher]
+version = "0.7.0"
+#patches = [
+#    "speech-dispatcher.patch"
+#]
+
+#[patch.crates-io]
+#speech-dispatcher = { path = './target/patch/speech-dispatcher-0.7.0'}
+
 [lib]
 crate-type = ["lib", "cdylib", "staticlib"]

@ -20,6 +29,7 @@ dyn-clonable = "0.9"
 lazy_static = "1"
 log = "0.4"
 thiserror = "1"
+unic-langid = "0.9.0"
 serde = { version = "1.0", optional = true, features = ["derive"] }

 [dev-dependencies]
@ -34,6 +44,7 @@ speech-dispatcher = { version = "0.13", default-features = false }

 [target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies]
 cocoa-foundation = "0.1"
+core-foundation = "0.9"
 libc = "0.2"
 objc = { version = "0.2", features = ["exception"] }

@ -43,4 +54,4 @@ web-sys = { version = "0.3", features = ["EventTarget", "SpeechSynthesis", "Spee

 [target.'cfg(target_os="android")'.dependencies]
 jni = "0.19"
-ndk-glue = "0.6"
+ndk-glue = "0.6"
--- a/examples/hello_world.rs
+++ b/examples/hello_world.rs
@ -71,6 +71,20 @@ fn main() -> Result<(), Error> {
        tts.speak("This is normal volume.", false)?;
        tts.set_volume(original_volume)?;
    }
+    let Features { voices, .. } = tts.supported_features();
+    if voices {
+        let original_voice = tts.voice()?;
+        let voices_list = tts.list_voices();
+        println!("Available voices:\n===");
+        for v in voices_list.iter() {
+            println!("{}",v);
+            tts.set_voice(v)?;
+            println!("voice set");
+            println!("{}", tts.voice()?);
+            tts.speak(v,false)?;
+        }
+        tts.set_voice(original_voice)?;
+    }
    tts.speak("Goodbye.", false)?;
    let mut _input = String::new();
    // The below is only needed to make the example run on MacOS because there is no NSRunLoop in this context.
--- a/speech-dispatcher.patch
+++ b/speech-dispatcher.patch
@ -0,0 +1,22 @@
+diff --git src/lib.rs src/lib.rs
+index 26ba271..180513e 100644
+--- src/lib.rs
+++ src/lib.rs
+@@ -127,7 +127,7 @@ unsafe extern "C" fn cb(msg_id: u64, client_id: u64, state: u32) {
+     }
+ }
+ 
+-unsafe extern "C" fn cb_im(msg_id: u64, client_id: u64, state: u32, index_mark: *mut i8) {
+unsafe extern "C" fn cb_im(msg_id: u64, client_id: u64, state: u32, index_mark: *mut u8) {
+     let index_mark = CStr::from_ptr(index_mark);
+     let index_mark = index_mark.to_string_lossy().to_string();
+     let state = match state {
+@@ -325,7 +325,7 @@ impl Connection {
+         i32_to_bool(v)
+     }
+ 
+-    pub fn wchar(&self, priority: Priority, wchar: i32) -> bool {
+    pub fn wchar(&self, priority: Priority, wchar: u32) -> bool {
+         let v = unsafe { spd_wchar(self.0, priority as u32, wchar) };
+         i32_to_bool(v)
+     }
--- a/src/backends/appkit.rs
+++ b/src/backends/appkit.rs
@ -200,6 +200,18 @@ impl Backend for AppKit {
        let is_speaking: i8 = unsafe { msg_send![self.0, isSpeaking] };
        Ok(is_speaking != NO as i8)
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        unimplemented!()
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        unimplemented!()
+    }
 }

 impl Drop for AppKit {
--- a/src/backends/av_foundation.rs
+++ b/src/backends/av_foundation.rs
@ -10,6 +10,10 @@ use objc::runtime::{Object, Sel};
 use objc::{class, declare::ClassDecl, msg_send, sel, sel_impl};

 use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS};
+use crate::voices::Backend as VoiceBackend;
+
+mod voices;
+use voices::*;

 #[derive(Clone, Debug)]
 pub(crate) struct AvFoundation {
@ -19,6 +23,7 @@ pub(crate) struct AvFoundation {
    rate: f32,
    volume: f32,
    pitch: f32,
+    voice: AVSpeechSynthesisVoice,
 }

 lazy_static! {
@ -142,6 +147,7 @@ impl AvFoundation {
                rate: 0.5,
                volume: 1.,
                pitch: 1.,
+                voice: AVSpeechSynthesisVoice::new(),
            }
        };
        *backend_id += 1;
@ -161,6 +167,7 @@ impl Backend for AvFoundation {
            pitch: true,
            volume: true,
            is_speaking: true,
+            voices: true,
            utterance_callbacks: true,
        }
    }
@ -185,6 +192,7 @@ impl Backend for AvFoundation {
            let _: () = msg_send![utterance, setVolume: self.volume];
            trace!("Setting pitch to {}", self.pitch);
            let _: () = msg_send![utterance, setPitchMultiplier: self.pitch];
+            let _: () = msg_send![utterance, setVoice: self.voice];
            trace!("Enqueuing");
            let _: () = msg_send![self.synth, speakUtterance: utterance];
            trace!("Done queuing");
@ -271,6 +279,19 @@ impl Backend for AvFoundation {
        let is_speaking: i8 = unsafe { msg_send![self.synth, isSpeaking] };
        Ok(is_speaking != NO as i8)
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        Ok(self.voice.id())
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        AVSpeechSynthesisVoice::list().iter().map(|v| {v.id()}).collect()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        self.voice = AVSpeechSynthesisVoice::new();
+        Ok(())
+    }
 }

 impl Drop for AvFoundation {
--- a/src/backends/av_foundation/voices.rs
+++ b/src/backends/av_foundation/voices.rs
@ -0,0 +1,67 @@
+
+use objc::runtime::*;
+use objc::*;
+use core_foundation::array::CFArray;
+use cocoa_foundation::foundation::NSString;
+use cocoa_foundation::base::{nil,id};
+use core_foundation::string::CFString;
+
+use crate::backends::AvFoundation;
+use crate::voices;
+use crate::voices::Gender;
+
+#[derive(Copy,Clone, Debug)]
+pub(crate) struct AVSpeechSynthesisVoice(*const Object);
+
+impl AVSpeechSynthesisVoice {
+    pub fn new() -> Self {
+        let voice: *const Object;
+        unsafe{
+            voice = msg_send![class!(AVSpeechSynthesisVoice), new];
+        };
+        AVSpeechSynthesisVoice{0:voice}
+    }
+}
+
+impl voices::Backend for AVSpeechSynthesisVoice {
+    type Backend = AvFoundation;
+
+    fn from_id(id: String) -> Self {
+        unimplemented!()
+    }
+
+    fn from_language(lang: voices::LanguageIdentifier) -> Self {
+        unimplemented!()
+    }
+
+    fn list() -> Vec<Self> {
+        let voices: CFArray = unsafe{msg_send![class!(AVSpeechSynthesisVoice), speechVoices]};
+        voices.iter().map(|v| {
+            AVSpeechSynthesisVoice{0: *v as *const Object}
+        }).collect()
+    }
+
+    fn name(self) -> String {
+        let name: CFString = unsafe{msg_send![self.0, name]};
+        name.to_string()
+    }
+
+    fn gender(self) -> Gender {
+        let gender: i64 = unsafe{ msg_send![self.0, gender] };
+        match gender {
+            1 => Gender::Male,
+            2 => Gender::Female,
+            _ => Gender::Other,
+        }
+    }
+
+    fn id(self) -> String {
+        let identifier: CFString = unsafe{msg_send![self.0, identifier]};
+        identifier.to_string()
+    }
+
+    fn language(self) -> voices::LanguageIdentifier {
+        let lang: CFString = unsafe{msg_send![self.0, language]};
+        lang.to_string().parse().unwrap()
+    }
+}
--- a/src/backends/speech_dispatcher.rs
+++ b/src/backends/speech_dispatcher.rs
@ -81,6 +81,7 @@ impl Backend for SpeechDispatcher {
            pitch: true,
            volume: true,
            is_speaking: true,
+            voices: false,
            utterance_callbacks: true,
        }
    }
@ -179,6 +180,18 @@ impl Backend for SpeechDispatcher {
        let is_speaking = speaking.get(&self.0.client_id()).unwrap();
        Ok(*is_speaking)
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        unimplemented!()
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        unimplemented!()
+    }
 }

 impl Drop for SpeechDispatcher {
--- a/src/backends/tolk.rs
+++ b/src/backends/tolk.rs
@ -108,4 +108,16 @@ impl Backend for Tolk {
    fn is_speaking(&self) -> Result<bool, Error> {
        unimplemented!()
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        unimplemented!()
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        unimplemented!()
+    }
 }
--- a/src/backends/web.rs
+++ b/src/backends/web.rs
@ -53,6 +53,7 @@ impl Backend for Web {
            pitch: true,
            volume: true,
            is_speaking: true,
+            voices: true,
            utterance_callbacks: true,
        }
    }
@ -196,6 +197,18 @@ impl Backend for Web {
            Err(Error::NoneError)
        }
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        unimplemented!()
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        unimplemented!()
+    }
 }

 impl Drop for Web {
--- a/src/backends/winrt.rs
+++ b/src/backends/winrt.rs
@ -145,6 +145,7 @@ impl Backend for WinRt {
            pitch: true,
            volume: true,
            is_speaking: true,
+            voices: true,
            utterance_callbacks: true,
        }
    }
@ -289,6 +290,18 @@ impl Backend for WinRt {
        let utterances = utterances.get(&self.id).unwrap();
        Ok(!utterances.is_empty())
    }
+
+    fn voice(&self) -> Result<String,Error> {
+        unimplemented!()
+    }
+
+    fn list_voices(&self) -> Vec<String> {
+        unimplemented!()
+    }
+
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error> {
+        unimplemented!()
+    }
 }

 impl Drop for WinRt {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -34,6 +34,7 @@ use thiserror::Error;
 use tolk::Tolk;

 mod backends;
+mod voices;

 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@ -165,6 +166,7 @@ pub struct Features {
    pub rate: bool,
    pub stop: bool,
    pub utterance_callbacks: bool,
+    pub voices: bool,
    pub volume: bool,
 }

@ -207,7 +209,7 @@ pub enum Error {
 }

 #[clonable]
-trait Backend: Clone {
+pub trait Backend: Clone {
    fn id(&self) -> Option<BackendId>;
    fn supported_features(&self) -> Features;
    fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error>;
@ -228,6 +230,9 @@ trait Backend: Clone {
    fn get_volume(&self) -> Result<f32, Error>;
    fn set_volume(&mut self, volume: f32) -> Result<(), Error>;
    fn is_speaking(&self) -> Result<bool, Error>;
+    fn voice(&self) -> Result<String, Error>;
+    fn list_voices(&self) -> Vec<String>;
+    fn set_voice(&mut self, voice: &str) -> Result<(),Error>;
 }

 #[derive(Default)]
@ -556,6 +561,40 @@ impl Tts {
        }
    }

+    /**
+     * Returns list of available voices.
+     */
+    pub fn list_voices(&self) -> Vec<String> {
+        self.0.read().unwrap().list_voices()
+    }
+
+    /**
+     * Return the current speaking voice. 
+     */
+    pub fn voice(&self) -> Result<String,Error> {
+        let Features { voices, .. } = self.supported_features();
+        if voices {
+            self.0.read().unwrap().voice()
+        } else {
+            Err(Error::UnsupportedFeature)
+        }
+    }
+
+    /**
+     * Set speaking voice.
+     */
+    pub fn set_voice<S: Into<String>>(&mut self, voice: S) -> Result<(),Error> {
+        let Features {
+            voices: voices_feature,
+            ..
+        } = self.supported_features();
+        if voices_feature {
+            self.0.write().unwrap().set_voice(voice.into().as_str())
+        } else {
+            Err(Error::UnsupportedFeature)
+        }
+    }
+
    /**
     * Called when this speech synthesizer begins speaking an utterance.
     */
--- a/src/voices.rs
+++ b/src/voices.rs
@ -0,0 +1,21 @@
+
+pub use unic_langid::LanguageIdentifier;
+
+pub enum Gender {
+    Other,
+    Male,
+    Female,
+}
+
+pub trait Backend: Sized {
+    type Backend: crate::Backend;
+    fn from_id(id: String) -> Self;
+    fn from_language(lang: LanguageIdentifier) -> Self;
+    fn list() -> Vec<Self>;
+    fn name(self) -> String;
+    fn gender(self) -> Gender;
+    fn id(self) -> String;
+    fn language(self) -> LanguageIdentifier;
+}
+
+pub struct Voice<T: Backend + Sized>(Box<T>);