diff --git a/Cargo.toml b/Cargo.toml index fc20fe3..549745f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tts" -version = "0.6.3" +version = "0.8.0" authors = ["Nolan Darilek "] repository = "https://github.com/ndarilek/tts-rs" description = "High-level Text-To-Speech (TTS) interface" @@ -9,7 +9,7 @@ exclude = ["*.cfg", "*.yml"] edition = "2018" [lib] -crate-type = ["lib", "staticlib"] +crate-type = ["lib", "cdylib", "staticlib"] [dependencies] lazy_static = "1" @@ -20,12 +20,12 @@ thiserror = "1" env_logger = "0.7" [target.'cfg(windows)'.dependencies] -tolk = "0.2" +tolk = ">= 0.2.1" winrt = "0.7" tts_winrt_bindings = { version = "0.1", path="winrt_bindings" } [target.'cfg(target_os = "linux")'.dependencies] -speech-dispatcher = "0.6" +speech-dispatcher = "0.7" [target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies] cocoa-foundation = "0.1" @@ -35,4 +35,4 @@ objc = "0.2" [target.wasm32-unknown-unknown.dependencies] wasm-bindgen = "0.2" -web-sys = { version = "0.3", features = ["SpeechSynthesis", "SpeechSynthesisUtterance", "Window", ] } +web-sys = { version = "0.3", features = ["EventTarget", "SpeechSynthesis", "SpeechSynthesisEvent", "SpeechSynthesisUtterance", "Window", ] } diff --git a/examples/hello_world.rs b/examples/hello_world.rs index a34f5f8..255cae2 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -12,6 +12,18 @@ use tts::*; fn main() -> Result<(), Error> { env_logger::init(); let mut tts = TTS::default()?; + let Features { + utterance_callbacks, + .. + } = tts.supported_features(); + if utterance_callbacks { + tts.on_utterance_begin(Some(Box::new(|utterance| { + println!("Started speaking {:?}", utterance) + })))?; + tts.on_utterance_end(Some(Box::new(|utterance| { + println!("Finished speaking {:?}", utterance) + })))?; + } tts.speak("Hello, world.", false)?; let Features { rate, .. } = tts.supported_features(); if rate { @@ -63,6 +75,8 @@ fn main() -> Result<(), Error> { }*/ tts.speak("Goodbye.", false)?; let mut _input = String::new(); + // The below is only needed to make the example run on MacOS because there is no NSRunLoop in this context. + // It shouldn't be needed in an app or game that almost certainly has one already. #[cfg(target_os = "macos")] { let run_loop: id = unsafe { NSRunLoop::currentRunLoop() }; diff --git a/src/backends/appkit.rs b/src/backends/appkit.rs index dd8caa3..671a922 100644 --- a/src/backends/appkit.rs +++ b/src/backends/appkit.rs @@ -7,12 +7,12 @@ use objc::declare::ClassDecl; use objc::runtime::*; use objc::*; -use crate::{Backend, Error, Features}; +use crate::{Backend, BackendId, Error, Features, UtteranceId}; -pub struct AppKit(*mut Object, *mut Object); +pub(crate) struct AppKit(*mut Object, *mut Object); impl AppKit { - pub fn new() -> Self { + pub(crate) fn new() -> Self { info!("Initializing AppKit backend"); unsafe { let obj: *mut Object = msg_send![class!(NSSpeechSynthesizer), new]; @@ -91,6 +91,10 @@ impl AppKit { } impl Backend for AppKit { + fn id(&self) -> Option { + None + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -101,7 +105,7 @@ impl Backend for AppKit { } } - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> { + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error> { trace!("speak({}, {})", text, interrupt); if interrupt { self.stop()?; @@ -110,7 +114,7 @@ impl Backend for AppKit { let str = NSString::alloc(nil).init_str(text); let _: () = msg_send![self.1, enqueueAndSpeak: str]; } - Ok(()) + Ok(None) } fn stop(&mut self) -> Result<(), Error> { diff --git a/src/backends/av_foundation.rs b/src/backends/av_foundation.rs index b5bfd32..c1f1c7f 100644 --- a/src/backends/av_foundation.rs +++ b/src/backends/av_foundation.rs @@ -1,17 +1,22 @@ #[cfg(any(target_os = "macos", target_os = "ios"))] #[link(name = "AVFoundation", kind = "framework")] +use std::sync::Mutex; + use cocoa_foundation::base::{id, nil}; use cocoa_foundation::foundation::NSString; +use lazy_static::lazy_static; use log::{info, trace}; -use objc::runtime::*; -use objc::*; +use objc::runtime::{Object, Sel}; +use objc::{class, declare::ClassDecl, msg_send, sel, sel_impl}; -use crate::{Backend, Error, Features}; +use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; mod voices; use voices::AVSpeechSynthesisVoice; -pub struct AvFoundation { +pub(crate) struct AvFoundation { + id: BackendId, + delegate: *mut Object, synth: *mut Object, rate: f32, volume: f32, @@ -19,23 +24,95 @@ pub struct AvFoundation { voice: AVSpeechSynthesisVoice, } +lazy_static! { + static ref NEXT_BACKEND_ID: Mutex = Mutex::new(0); +} + impl AvFoundation { - pub fn new() -> Self { + pub(crate) fn new() -> Self { info!("Initializing AVFoundation backend"); + let mut decl = ClassDecl::new("MyNSSpeechSynthesizerDelegate", class!(NSObject)).unwrap(); + decl.add_ivar::("backend_id"); + + extern "C" fn speech_synthesizer_did_start_speech_utterance( + this: &Object, + _: Sel, + _synth: *const Object, + utterance: id, + ) { + unsafe { + let backend_id: u64 = *this.get_ivar("backend_id"); + let backend_id = BackendId::AvFoundation(backend_id); + let mut callbacks = CALLBACKS.lock().unwrap(); + let callbacks = callbacks.get_mut(&backend_id).unwrap(); + if let Some(callback) = callbacks.utterance_begin.as_mut() { + let utterance_id = UtteranceId::AvFoundation(utterance); + callback(utterance_id); + } + } + } + + extern "C" fn speech_synthesizer_did_finish_speech_utterance( + this: &Object, + _: Sel, + _synth: *const Object, + utterance: id, + ) { + unsafe { + let backend_id: u64 = *this.get_ivar("backend_id"); + let backend_id = BackendId::AvFoundation(backend_id); + let mut callbacks = CALLBACKS.lock().unwrap(); + let callbacks = callbacks.get_mut(&backend_id).unwrap(); + if let Some(callback) = callbacks.utterance_end.as_mut() { + let utterance_id = UtteranceId::AvFoundation(utterance); + callback(utterance_id); + } + } + } + unsafe { + decl.add_method( + sel!(speechSynthesizer:didStartSpeechUtterance:), + speech_synthesizer_did_start_speech_utterance + as extern "C" fn(&Object, Sel, *const Object, id) -> (), + ); + decl.add_method( + sel!(speechSynthesizer:didFinishSpeechUtterance:), + speech_synthesizer_did_finish_speech_utterance + as extern "C" fn(&Object, Sel, *const Object, id) -> (), + ); + } + + let delegate_class = decl.register(); + let delegate_obj: *mut Object = unsafe { msg_send![delegate_class, new] }; + let mut backend_id = NEXT_BACKEND_ID.lock().unwrap(); + let rv = unsafe { let synth: *mut Object = msg_send![class!(AVSpeechSynthesizer), new]; + delegate_obj + .as_mut() + .unwrap() + .set_ivar("backend_id", *backend_id); + let _: () = msg_send![synth, setDelegate: delegate_obj]; AvFoundation { + id: BackendId::AvFoundation(*backend_id), + delegate: delegate_obj, synth: synth, rate: 0.5, volume: 1., pitch: 1., voice: AVSpeechSynthesisVoice::default(), } - } + }; + *backend_id += 1; + rv } } impl Backend for AvFoundation { + fn id(&self) -> Option { + Some(self.id) + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -43,18 +120,23 @@ impl Backend for AvFoundation { pitch: true, volume: true, is_speaking: true, +<<<<<<< HEAD voices: true, +======= + utterance_callbacks: true, +>>>>>>> develop } } - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> { + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error> { trace!("speak({}, {})", text, interrupt); if interrupt { self.stop()?; } + let utterance: id; unsafe { let str = NSString::alloc(nil).init_str(text); - let utterance: id = msg_send![class!(AVSpeechUtterance), alloc]; + utterance = msg_send![class!(AVSpeechUtterance), alloc]; let _: () = msg_send![utterance, initWithString: str]; let _: () = msg_send![utterance, setRate: self.rate]; let _: () = msg_send![utterance, setVolume: self.volume]; @@ -62,7 +144,7 @@ impl Backend for AvFoundation { let _: () = msg_send![utterance, setVoice: self.voice]; let _: () = msg_send![self.synth, speakUtterance: utterance]; } - Ok(()) + Ok(Some(UtteranceId::AvFoundation(utterance))) } fn stop(&mut self) -> Result<(), Error> { @@ -159,6 +241,7 @@ impl Backend for AvFoundation { impl Drop for AvFoundation { fn drop(&mut self) { unsafe { + let _: Object = msg_send![self.delegate, release]; let _: Object = msg_send![self.synth, release]; } } diff --git a/src/backends/mod.rs b/src/backends/mod.rs index c999faf..6274692 100644 --- a/src/backends/mod.rs +++ b/src/backends/mod.rs @@ -17,16 +17,16 @@ mod appkit; mod av_foundation; #[cfg(target_os = "linux")] -pub use self::speech_dispatcher::*; +pub(crate) use self::speech_dispatcher::*; #[cfg(windows)] -pub use self::tolk::*; +pub(crate) use self::tolk::*; #[cfg(target_arch = "wasm32")] pub use self::web::*; #[cfg(target_os = "macos")] -pub use self::appkit::*; +pub(crate) use self::appkit::*; #[cfg(any(target_os = "macos", target_os = "ios"))] -pub use self::av_foundation::*; +pub(crate) use self::av_foundation::*; diff --git a/src/backends/speech_dispatcher.rs b/src/backends/speech_dispatcher.rs index 89ab24c..afbbba3 100644 --- a/src/backends/speech_dispatcher.rs +++ b/src/backends/speech_dispatcher.rs @@ -1,14 +1,15 @@ #[cfg(target_os = "linux")] use std::collections::HashMap; +use std::convert::TryInto; use std::sync::Mutex; use lazy_static::*; use log::{info, trace}; use speech_dispatcher::*; -use crate::{Backend, Error, Features}; +use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; -pub struct SpeechDispatcher(Connection); +pub(crate) struct SpeechDispatcher(Connection); lazy_static! { static ref SPEAKING: Mutex> = { @@ -18,37 +19,55 @@ lazy_static! { } impl SpeechDispatcher { - pub fn new() -> Self { + pub(crate) fn new() -> Self { info!("Initializing SpeechDispatcher backend"); let connection = speech_dispatcher::Connection::open("tts", "tts", "tts", Mode::Threaded); let sd = SpeechDispatcher(connection); let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(sd.0.client_id(), false); - sd.0.on_begin(Some(|_msg_id, client_id| { + sd.0.on_begin(Some(Box::new(|msg_id, client_id| { let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(client_id, true); - })); - sd.0.on_end(Some(|_msg_id, client_id| { + let mut callbacks = CALLBACKS.lock().unwrap(); + let backend_id = BackendId::SpeechDispatcher(client_id); + let cb = callbacks.get_mut(&backend_id).unwrap(); + let utterance_id = UtteranceId::SpeechDispatcher(msg_id); + if let Some(f) = cb.utterance_begin.as_mut() { + f(utterance_id); + } + }))); + sd.0.on_end(Some(Box::new(|msg_id, client_id| { let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(client_id, false); - })); - sd.0.on_cancel(Some(|_msg_id, client_id| { + let mut callbacks = CALLBACKS.lock().unwrap(); + let backend_id = BackendId::SpeechDispatcher(client_id); + let cb = callbacks.get_mut(&backend_id).unwrap(); + let utterance_id = UtteranceId::SpeechDispatcher(msg_id); + if let Some(f) = cb.utterance_end.as_mut() { + f(utterance_id); + } + }))); + sd.0.on_cancel(Some(Box::new(|_msg_id, client_id| { let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(client_id, false); - })); - sd.0.on_pause(Some(|_msg_id, client_id| { + }))); + sd.0.on_pause(Some(Box::new(|_msg_id, client_id| { let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(client_id, false); - })); - sd.0.on_resume(Some(|_msg_id, client_id| { + }))); + sd.0.on_resume(Some(Box::new(|_msg_id, client_id| { let mut speaking = SPEAKING.lock().unwrap(); speaking.insert(client_id, true); - })); + }))); sd } } impl Backend for SpeechDispatcher { + fn id(&self) -> Option { + Some(BackendId::SpeechDispatcher(self.0.client_id())) + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -56,10 +75,11 @@ impl Backend for SpeechDispatcher { pitch: true, volume: true, is_speaking: true, + utterance_callbacks: true, } } - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> { + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error> { trace!("speak({}, {})", text, interrupt); if interrupt { self.stop()?; @@ -68,11 +88,15 @@ impl Backend for SpeechDispatcher { if single_char { self.0.set_punctuation(Punctuation::All); } - self.0.say(Priority::Important, text); + let id = self.0.say(Priority::Important, text); if single_char { self.0.set_punctuation(Punctuation::None); } - Ok(()) + if let Some(id) = id { + Ok(Some(UtteranceId::SpeechDispatcher(id.try_into().unwrap()))) + } else { + Err(Error::NoneError) + } } fn stop(&mut self) -> Result<(), Error> { diff --git a/src/backends/tolk.rs b/src/backends/tolk.rs index a1e8301..b22720f 100644 --- a/src/backends/tolk.rs +++ b/src/backends/tolk.rs @@ -2,12 +2,12 @@ use log::{info, trace}; use tolk::Tolk as TolkPtr; -use crate::{Backend, Error, Features}; +use crate::{Backend, BackendId, Error, Features, UtteranceId}; -pub struct Tolk(TolkPtr); +pub(crate) struct Tolk(TolkPtr); impl Tolk { - pub fn new() -> Option { + pub(crate) fn new() -> Option { info!("Initializing Tolk backend"); let tolk = TolkPtr::new(); if tolk.detect_screen_reader().is_some() { @@ -19,6 +19,10 @@ impl Tolk { } impl Backend for Tolk { + fn id(&self) -> Option { + None + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -26,28 +30,10 @@ impl Backend for Tolk { } } - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> { + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error> { trace!("speak({}, {})", text, interrupt); - const BUFFER_LENGTH: usize = 300; - if text.len() <= BUFFER_LENGTH { - self.0.speak(text, interrupt); - } else { - if interrupt { - self.stop()?; - } - let tokens = text.split_whitespace(); - let mut buffer = String::new(); - for token in tokens { - if buffer.len() + token.len() > BUFFER_LENGTH { - self.0.speak(buffer, false); - buffer = String::new(); - } else { - buffer.push_str(token); - buffer.push(' '); - } - } - } - Ok(()) + self.0.speak(text, interrupt); + Ok(None) } fn stop(&mut self) -> Result<(), Error> { diff --git a/src/backends/web.rs b/src/backends/web.rs index a8be3c2..a43b2c8 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -1,27 +1,45 @@ #[cfg(target_arch = "wasm32")] -use log::{info, trace}; -use web_sys::SpeechSynthesisUtterance; +use std::sync::Mutex; -use crate::{Backend, Error, Features}; +use lazy_static::lazy_static; +use log::{info, trace}; +use wasm_bindgen::prelude::*; +use wasm_bindgen::JsCast; +use web_sys::{SpeechSynthesisEvent, SpeechSynthesisUtterance}; + +use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; pub struct Web { + id: BackendId, rate: f32, pitch: f32, volume: f32, } +lazy_static! { + static ref NEXT_BACKEND_ID: Mutex = Mutex::new(0); +} + impl Web { pub fn new() -> Result { info!("Initializing Web backend"); - Ok(Web { + let mut backend_id = NEXT_BACKEND_ID.lock().unwrap(); + let rv = Web { + id: BackendId::Web(*backend_id), rate: 1., pitch: 1., volume: 1., - }) + }; + *backend_id += 1; + Ok(rv) } } impl Backend for Web { + fn id(&self) -> Option { + Some(self.id) + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -29,23 +47,46 @@ impl Backend for Web { pitch: true, volume: true, is_speaking: true, + utterance_callbacks: true, } } - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> { + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error> { trace!("speak({}, {})", text, interrupt); let utterance = SpeechSynthesisUtterance::new_with_text(text).unwrap(); utterance.set_rate(self.rate); utterance.set_pitch(self.pitch); utterance.set_volume(self.volume); + let id = self.id().unwrap(); + let utterance_id = UtteranceId::Web(utterance.clone()); + let callback = Closure::wrap(Box::new(move |evt: SpeechSynthesisEvent| { + let mut callbacks = CALLBACKS.lock().unwrap(); + let callback = callbacks.get_mut(&id).unwrap(); + if let Some(f) = callback.utterance_begin.as_mut() { + let utterance_id = UtteranceId::Web(evt.utterance()); + f(utterance_id); + } + }) as Box); + utterance.set_onstart(Some(callback.as_ref().unchecked_ref())); + let callback = Closure::wrap(Box::new(move |evt: SpeechSynthesisEvent| { + let mut callbacks = CALLBACKS.lock().unwrap(); + let callback = callbacks.get_mut(&id).unwrap(); + if let Some(f) = callback.utterance_end.as_mut() { + let utterance_id = UtteranceId::Web(evt.utterance()); + f(utterance_id); + } + }) as Box); + utterance.set_onend(Some(callback.as_ref().unchecked_ref())); if interrupt { self.stop()?; } if let Some(window) = web_sys::window() { let speech_synthesis = window.speech_synthesis().unwrap(); speech_synthesis.speak(&utterance); + Ok(Some(utterance_id)) + } else { + Err(Error::NoneError) } - Ok(()) } fn stop(&mut self) -> Result<(), Error> { diff --git a/src/backends/winrt.rs b/src/backends/winrt.rs index 2e8af98..98826e6 100644 --- a/src/backends/winrt.rs +++ b/src/backends/winrt.rs @@ -1,13 +1,19 @@ #[cfg(windows)] -use log::{info, trace}; +use std::collections::HashMap; +use std::sync::Mutex; + +use lazy_static::lazy_static; +use log::{info, trace}; +use winrt::ComInterface; -use tts_winrt_bindings::windows::media::core::MediaSource; use tts_winrt_bindings::windows::media::playback::{ - MediaPlaybackItem, MediaPlaybackList, MediaPlaybackState, MediaPlayer, + CurrentMediaPlaybackItemChangedEventArgs, MediaPlaybackItem, MediaPlaybackList, + MediaPlaybackState, MediaPlayer, }; use tts_winrt_bindings::windows::media::speech_synthesis::SpeechSynthesizer; +use tts_winrt_bindings::windows::{foundation::TypedEventHandler, media::core::MediaSource}; -use crate::{Backend, Error, Features}; +use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS}; impl From for Error { fn from(e: winrt::Error) -> Self { @@ -16,11 +22,28 @@ impl From for Error { } pub struct WinRT { + id: BackendId, synth: SpeechSynthesizer, player: MediaPlayer, playback_list: MediaPlaybackList, } +lazy_static! { + static ref NEXT_BACKEND_ID: Mutex = Mutex::new(0); + static ref BACKEND_TO_MEDIA_PLAYER: Mutex> = { + let v: HashMap = HashMap::new(); + Mutex::new(v) + }; + static ref BACKEND_TO_PLAYBACK_LIST: Mutex> = { + let v: HashMap = HashMap::new(); + Mutex::new(v) + }; + static ref LAST_SPOKEN_UTTERANCE: Mutex> = { + let v: HashMap = HashMap::new(); + Mutex::new(v) + }; +} + impl WinRT { pub fn new() -> std::result::Result { info!("Initializing WinRT backend"); @@ -28,11 +51,17 @@ impl WinRT { let player = MediaPlayer::new()?; player.set_auto_play(true)?; player.set_source(&playback_list)?; - Ok(Self { + let mut backend_id = NEXT_BACKEND_ID.lock().unwrap(); + let bid = BackendId::WinRT(*backend_id); + let mut rv = Self { + id: bid, synth: SpeechSynthesizer::new()?, player: player, playback_list: playback_list, - }) + }; + *backend_id += 1; + Self::init_callbacks(&mut rv)?; + Ok(rv) } fn reinit_player(&mut self) -> std::result::Result<(), Error> { @@ -40,11 +69,70 @@ impl WinRT { self.player = MediaPlayer::new()?; self.player.set_auto_play(true)?; self.player.set_source(&self.playback_list)?; + self.init_callbacks()?; + Ok(()) + } + + fn init_callbacks(&mut self) -> Result<(), winrt::Error> { + let id = self.id().unwrap(); + let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); + backend_to_media_player.insert(id, self.player.clone()); + self.player + .media_ended(TypedEventHandler::new(|sender, _args| { + let backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); + let id = backend_to_media_player.iter().find(|v| v.1 == sender); + if let Some(id) = id { + let id = id.0; + let mut callbacks = CALLBACKS.lock().unwrap(); + let callbacks = callbacks.get_mut(&id).unwrap(); + if let Some(callback) = callbacks.utterance_end.as_mut() { + let last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap(); + if let Some(utterance_id) = last_spoken_utterance.get(&id) { + callback(utterance_id.clone()); + } + } + } + Ok(()) + }))?; + let mut backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap(); + backend_to_playback_list.insert(id, self.playback_list.clone()); + self.playback_list + .current_item_changed(TypedEventHandler::new( + |sender: &MediaPlaybackList, args: &CurrentMediaPlaybackItemChangedEventArgs| { + let backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap(); + let id = backend_to_playback_list.iter().find(|v| v.1 == sender); + if let Some(id) = id { + let id = id.0; + let mut callbacks = CALLBACKS.lock().unwrap(); + let callbacks = callbacks.get_mut(&id).unwrap(); + let old_item = args.old_item()?; + if !old_item.is_null() { + if let Some(callback) = callbacks.utterance_end.as_mut() { + callback(UtteranceId::WinRT(old_item)); + } + } + let new_item = args.new_item()?; + if !new_item.is_null() { + let mut last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap(); + let utterance_id = UtteranceId::WinRT(new_item); + last_spoken_utterance.insert(*id, utterance_id.clone()); + if let Some(callback) = callbacks.utterance_begin.as_mut() { + callback(utterance_id); + } + } + } + Ok(()) + }, + ))?; Ok(()) } } impl Backend for WinRT { + fn id(&self) -> Option { + Some(self.id) + } + fn supported_features(&self) -> Features { Features { stop: true, @@ -52,10 +140,15 @@ impl Backend for WinRT { pitch: true, volume: true, is_speaking: true, + utterance_callbacks: true, } } - fn speak(&mut self, text: &str, interrupt: bool) -> std::result::Result<(), Error> { + fn speak( + &mut self, + text: &str, + interrupt: bool, + ) -> std::result::Result, Error> { trace!("speak({}, {})", text, interrupt); if interrupt { self.stop()?; @@ -72,11 +165,12 @@ impl Backend for WinRT { self.reinit_player()?; } } - self.playback_list.items()?.append(item)?; + self.playback_list.items()?.append(&item)?; if !self.is_speaking()? { self.player.play()?; } - Ok(()) + let utterance_id = UtteranceId::WinRT(item); + Ok(Some(utterance_id)) } fn stop(&mut self) -> std::result::Result<(), Error> { @@ -169,3 +263,15 @@ impl Backend for WinRT { unimplemented!() } } + +impl Drop for WinRT { + fn drop(&mut self) { + let id = self.id().unwrap(); + let mut backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap(); + backend_to_playback_list.remove(&id); + let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap(); + backend_to_media_player.remove(&id); + let mut last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap(); + last_spoken_utterance.remove(&id); + } +} diff --git a/src/lib.rs b/src/lib.rs index daebd23..67dea00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,16 +12,24 @@ */ use std::boxed::Box; +use std::collections::HashMap; #[cfg(target_os = "macos")] use std::ffi::CStr; +use std::sync::Mutex; -#[cfg(target_os = "macos")] +#[cfg(any(target_os = "macos", target_os = "ios"))] use cocoa_foundation::base::id; +use lazy_static::lazy_static; #[cfg(target_os = "macos")] use libc::c_char; #[cfg(target_os = "macos")] use objc::{class, msg_send, sel, sel_impl}; use thiserror::Error; +#[cfg(target_arch = "wasm32")] +use web_sys::SpeechSynthesisUtterance; + +#[cfg(windows)] +use tts_winrt_bindings::windows::media::playback::MediaPlaybackItem; mod backends; @@ -40,6 +48,30 @@ pub enum Backends { AvFoundation, } +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum BackendId { + #[cfg(target_os = "linux")] + SpeechDispatcher(u64), + #[cfg(target_arch = "wasm32")] + Web(u64), + #[cfg(windows)] + WinRT(u64), + #[cfg(any(target_os = "macos", target_os = "ios"))] + AvFoundation(u64), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum UtteranceId { + #[cfg(target_os = "linux")] + SpeechDispatcher(u64), + #[cfg(target_arch = "wasm32")] + Web(SpeechSynthesisUtterance), + #[cfg(windows)] + WinRT(MediaPlaybackItem), + #[cfg(any(target_os = "macos", target_os = "ios"))] + AvFoundation(id), +} + pub struct Features { pub stop: bool, pub rate: bool, @@ -47,6 +79,7 @@ pub struct Features { pub volume: bool, pub is_speaking: bool, pub voices: bool, + pub utterance_callbacks: bool, } impl Default for Features { @@ -58,6 +91,7 @@ impl Default for Features { volume: false, is_speaking: false, voices: false, + utterance_callbacks: false, } } } @@ -81,8 +115,9 @@ pub enum Error { } pub trait Backend { + fn id(&self) -> Option; fn supported_features(&self) -> Features; - fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error>; + fn speak(&mut self, text: &str, interrupt: bool) -> Result, Error>; fn stop(&mut self) -> Result<(), Error>; fn min_rate(&self) -> f32; fn max_rate(&self) -> f32; @@ -105,6 +140,23 @@ pub trait Backend { fn set_voice(&mut self, voice: &str) -> Result<(),Error>; } +#[derive(Default)] +struct Callbacks { + utterance_begin: Option>, + utterance_end: Option>, +} + +unsafe impl Send for Callbacks {} + +unsafe impl Sync for Callbacks {} + +lazy_static! { + static ref CALLBACKS: Mutex> = { + let m: HashMap = HashMap::new(); + Mutex::new(m) + }; +} + pub struct TTS(Box); unsafe impl std::marker::Send for TTS {} @@ -116,7 +168,7 @@ impl TTS { * Create a new `TTS` instance with the specified backend. */ pub fn new(backend: Backends) -> Result { - match backend { + let backend = match backend { #[cfg(target_os = "linux")] Backends::SpeechDispatcher => Ok(TTS(Box::new(backends::SpeechDispatcher::new()))), #[cfg(target_arch = "wasm32")] @@ -142,6 +194,16 @@ impl TTS { Backends::AppKit => Ok(TTS(Box::new(backends::AppKit::new()))), #[cfg(any(target_os = "macos", target_os = "ios"))] Backends::AvFoundation => Ok(TTS(Box::new(backends::AvFoundation::new()))), + }; + if backend.is_ok() { + let backend = backend.unwrap(); + if let Some(id) = backend.0.id() { + let mut callbacks = CALLBACKS.lock().unwrap(); + callbacks.insert(id, Callbacks::default()); + } + Ok(backend) + } else { + backend } } @@ -189,9 +251,12 @@ impl TTS { /** * Speaks the specified text, optionally interrupting current speech. */ - pub fn speak>(&mut self, text: S, interrupt: bool) -> Result<&Self, Error> { - self.0.speak(text.into().as_str(), interrupt)?; - Ok(self) + pub fn speak>( + &mut self, + text: S, + interrupt: bool, + ) -> Result, Error> { + self.0.speak(text.into().as_str(), interrupt) } /** @@ -410,4 +475,57 @@ impl TTS { Err(Error::UnsupportedFeature) } } + + /** + * Called when this speech synthesizer begins speaking an utterance. + */ + pub fn on_utterance_begin( + &self, + callback: Option>, + ) -> Result<(), Error> { + let Features { + utterance_callbacks, + .. + } = self.supported_features(); + if utterance_callbacks { + let mut callbacks = CALLBACKS.lock().unwrap(); + let id = self.0.id().unwrap(); + let mut callbacks = callbacks.get_mut(&id).unwrap(); + callbacks.utterance_begin = callback; + Ok(()) + } else { + Err(Error::UnsupportedFeature) + } + } + + /** + * Called when this speech synthesizer finishes speaking an utterance. + */ + pub fn on_utterance_end( + &self, + callback: Option>, + ) -> Result<(), Error> { + let Features { + utterance_callbacks, + .. + } = self.supported_features(); + if utterance_callbacks { + let mut callbacks = CALLBACKS.lock().unwrap(); + let id = self.0.id().unwrap(); + let mut callbacks = callbacks.get_mut(&id).unwrap(); + callbacks.utterance_end = callback; + Ok(()) + } else { + Err(Error::UnsupportedFeature) + } + } +} + +impl Drop for TTS { + fn drop(&mut self) { + if let Some(id) = self.0.id() { + let mut callbacks = CALLBACKS.lock().unwrap(); + callbacks.remove(&id); + } + } }