1
0
mirror of https://github.com/ndarilek/tts-rs.git synced 2024-11-17 12:39:36 +00:00

Merge branch 'develop' into feature/voices

This commit is contained in:
Francois Caddet 2020-09-26 18:20:10 +02:00
commit 47cbb80595
10 changed files with 461 additions and 85 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "tts"
version = "0.6.3"
version = "0.8.0"
authors = ["Nolan Darilek <nolan@thewordnerd.info>"]
repository = "https://github.com/ndarilek/tts-rs"
description = "High-level Text-To-Speech (TTS) interface"
@ -9,7 +9,7 @@ exclude = ["*.cfg", "*.yml"]
edition = "2018"
[lib]
crate-type = ["lib", "staticlib"]
crate-type = ["lib", "cdylib", "staticlib"]
[dependencies]
lazy_static = "1"
@ -20,12 +20,12 @@ thiserror = "1"
env_logger = "0.7"
[target.'cfg(windows)'.dependencies]
tolk = "0.2"
tolk = ">= 0.2.1"
winrt = "0.7"
tts_winrt_bindings = { version = "0.1", path="winrt_bindings" }
[target.'cfg(target_os = "linux")'.dependencies]
speech-dispatcher = "0.6"
speech-dispatcher = "0.7"
[target.'cfg(any(target_os = "macos", target_os = "ios"))'.dependencies]
cocoa-foundation = "0.1"
@ -35,4 +35,4 @@ objc = "0.2"
[target.wasm32-unknown-unknown.dependencies]
wasm-bindgen = "0.2"
web-sys = { version = "0.3", features = ["SpeechSynthesis", "SpeechSynthesisUtterance", "Window", ] }
web-sys = { version = "0.3", features = ["EventTarget", "SpeechSynthesis", "SpeechSynthesisEvent", "SpeechSynthesisUtterance", "Window", ] }

View File

@ -12,6 +12,18 @@ use tts::*;
fn main() -> Result<(), Error> {
env_logger::init();
let mut tts = TTS::default()?;
let Features {
utterance_callbacks,
..
} = tts.supported_features();
if utterance_callbacks {
tts.on_utterance_begin(Some(Box::new(|utterance| {
println!("Started speaking {:?}", utterance)
})))?;
tts.on_utterance_end(Some(Box::new(|utterance| {
println!("Finished speaking {:?}", utterance)
})))?;
}
tts.speak("Hello, world.", false)?;
let Features { rate, .. } = tts.supported_features();
if rate {
@ -63,6 +75,8 @@ fn main() -> Result<(), Error> {
}*/
tts.speak("Goodbye.", false)?;
let mut _input = String::new();
// The below is only needed to make the example run on MacOS because there is no NSRunLoop in this context.
// It shouldn't be needed in an app or game that almost certainly has one already.
#[cfg(target_os = "macos")]
{
let run_loop: id = unsafe { NSRunLoop::currentRunLoop() };

View File

@ -7,12 +7,12 @@ use objc::declare::ClassDecl;
use objc::runtime::*;
use objc::*;
use crate::{Backend, Error, Features};
use crate::{Backend, BackendId, Error, Features, UtteranceId};
pub struct AppKit(*mut Object, *mut Object);
pub(crate) struct AppKit(*mut Object, *mut Object);
impl AppKit {
pub fn new() -> Self {
pub(crate) fn new() -> Self {
info!("Initializing AppKit backend");
unsafe {
let obj: *mut Object = msg_send![class!(NSSpeechSynthesizer), new];
@ -91,6 +91,10 @@ impl AppKit {
}
impl Backend for AppKit {
fn id(&self) -> Option<BackendId> {
None
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -101,7 +105,7 @@ impl Backend for AppKit {
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> {
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
if interrupt {
self.stop()?;
@ -110,7 +114,7 @@ impl Backend for AppKit {
let str = NSString::alloc(nil).init_str(text);
let _: () = msg_send![self.1, enqueueAndSpeak: str];
}
Ok(())
Ok(None)
}
fn stop(&mut self) -> Result<(), Error> {

View File

@ -1,17 +1,22 @@
#[cfg(any(target_os = "macos", target_os = "ios"))]
#[link(name = "AVFoundation", kind = "framework")]
use std::sync::Mutex;
use cocoa_foundation::base::{id, nil};
use cocoa_foundation::foundation::NSString;
use lazy_static::lazy_static;
use log::{info, trace};
use objc::runtime::*;
use objc::*;
use objc::runtime::{Object, Sel};
use objc::{class, declare::ClassDecl, msg_send, sel, sel_impl};
use crate::{Backend, Error, Features};
use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS};
mod voices;
use voices::AVSpeechSynthesisVoice;
pub struct AvFoundation {
pub(crate) struct AvFoundation {
id: BackendId,
delegate: *mut Object,
synth: *mut Object,
rate: f32,
volume: f32,
@ -19,23 +24,95 @@ pub struct AvFoundation {
voice: AVSpeechSynthesisVoice,
}
lazy_static! {
static ref NEXT_BACKEND_ID: Mutex<u64> = Mutex::new(0);
}
impl AvFoundation {
pub fn new() -> Self {
pub(crate) fn new() -> Self {
info!("Initializing AVFoundation backend");
let mut decl = ClassDecl::new("MyNSSpeechSynthesizerDelegate", class!(NSObject)).unwrap();
decl.add_ivar::<u64>("backend_id");
extern "C" fn speech_synthesizer_did_start_speech_utterance(
this: &Object,
_: Sel,
_synth: *const Object,
utterance: id,
) {
unsafe {
let backend_id: u64 = *this.get_ivar("backend_id");
let backend_id = BackendId::AvFoundation(backend_id);
let mut callbacks = CALLBACKS.lock().unwrap();
let callbacks = callbacks.get_mut(&backend_id).unwrap();
if let Some(callback) = callbacks.utterance_begin.as_mut() {
let utterance_id = UtteranceId::AvFoundation(utterance);
callback(utterance_id);
}
}
}
extern "C" fn speech_synthesizer_did_finish_speech_utterance(
this: &Object,
_: Sel,
_synth: *const Object,
utterance: id,
) {
unsafe {
let backend_id: u64 = *this.get_ivar("backend_id");
let backend_id = BackendId::AvFoundation(backend_id);
let mut callbacks = CALLBACKS.lock().unwrap();
let callbacks = callbacks.get_mut(&backend_id).unwrap();
if let Some(callback) = callbacks.utterance_end.as_mut() {
let utterance_id = UtteranceId::AvFoundation(utterance);
callback(utterance_id);
}
}
}
unsafe {
decl.add_method(
sel!(speechSynthesizer:didStartSpeechUtterance:),
speech_synthesizer_did_start_speech_utterance
as extern "C" fn(&Object, Sel, *const Object, id) -> (),
);
decl.add_method(
sel!(speechSynthesizer:didFinishSpeechUtterance:),
speech_synthesizer_did_finish_speech_utterance
as extern "C" fn(&Object, Sel, *const Object, id) -> (),
);
}
let delegate_class = decl.register();
let delegate_obj: *mut Object = unsafe { msg_send![delegate_class, new] };
let mut backend_id = NEXT_BACKEND_ID.lock().unwrap();
let rv = unsafe {
let synth: *mut Object = msg_send![class!(AVSpeechSynthesizer), new];
delegate_obj
.as_mut()
.unwrap()
.set_ivar("backend_id", *backend_id);
let _: () = msg_send![synth, setDelegate: delegate_obj];
AvFoundation {
id: BackendId::AvFoundation(*backend_id),
delegate: delegate_obj,
synth: synth,
rate: 0.5,
volume: 1.,
pitch: 1.,
voice: AVSpeechSynthesisVoice::default(),
}
}
};
*backend_id += 1;
rv
}
}
impl Backend for AvFoundation {
fn id(&self) -> Option<BackendId> {
Some(self.id)
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -43,18 +120,23 @@ impl Backend for AvFoundation {
pitch: true,
volume: true,
is_speaking: true,
<<<<<<< HEAD
voices: true,
=======
utterance_callbacks: true,
>>>>>>> develop
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> {
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
if interrupt {
self.stop()?;
}
let utterance: id;
unsafe {
let str = NSString::alloc(nil).init_str(text);
let utterance: id = msg_send![class!(AVSpeechUtterance), alloc];
utterance = msg_send![class!(AVSpeechUtterance), alloc];
let _: () = msg_send![utterance, initWithString: str];
let _: () = msg_send![utterance, setRate: self.rate];
let _: () = msg_send![utterance, setVolume: self.volume];
@ -62,7 +144,7 @@ impl Backend for AvFoundation {
let _: () = msg_send![utterance, setVoice: self.voice];
let _: () = msg_send![self.synth, speakUtterance: utterance];
}
Ok(())
Ok(Some(UtteranceId::AvFoundation(utterance)))
}
fn stop(&mut self) -> Result<(), Error> {
@ -159,6 +241,7 @@ impl Backend for AvFoundation {
impl Drop for AvFoundation {
fn drop(&mut self) {
unsafe {
let _: Object = msg_send![self.delegate, release];
let _: Object = msg_send![self.synth, release];
}
}

View File

@ -17,16 +17,16 @@ mod appkit;
mod av_foundation;
#[cfg(target_os = "linux")]
pub use self::speech_dispatcher::*;
pub(crate) use self::speech_dispatcher::*;
#[cfg(windows)]
pub use self::tolk::*;
pub(crate) use self::tolk::*;
#[cfg(target_arch = "wasm32")]
pub use self::web::*;
#[cfg(target_os = "macos")]
pub use self::appkit::*;
pub(crate) use self::appkit::*;
#[cfg(any(target_os = "macos", target_os = "ios"))]
pub use self::av_foundation::*;
pub(crate) use self::av_foundation::*;

View File

@ -1,14 +1,15 @@
#[cfg(target_os = "linux")]
use std::collections::HashMap;
use std::convert::TryInto;
use std::sync::Mutex;
use lazy_static::*;
use log::{info, trace};
use speech_dispatcher::*;
use crate::{Backend, Error, Features};
use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS};
pub struct SpeechDispatcher(Connection);
pub(crate) struct SpeechDispatcher(Connection);
lazy_static! {
static ref SPEAKING: Mutex<HashMap<u64, bool>> = {
@ -18,37 +19,55 @@ lazy_static! {
}
impl SpeechDispatcher {
pub fn new() -> Self {
pub(crate) fn new() -> Self {
info!("Initializing SpeechDispatcher backend");
let connection = speech_dispatcher::Connection::open("tts", "tts", "tts", Mode::Threaded);
let sd = SpeechDispatcher(connection);
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(sd.0.client_id(), false);
sd.0.on_begin(Some(|_msg_id, client_id| {
sd.0.on_begin(Some(Box::new(|msg_id, client_id| {
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(client_id, true);
}));
sd.0.on_end(Some(|_msg_id, client_id| {
let mut callbacks = CALLBACKS.lock().unwrap();
let backend_id = BackendId::SpeechDispatcher(client_id);
let cb = callbacks.get_mut(&backend_id).unwrap();
let utterance_id = UtteranceId::SpeechDispatcher(msg_id);
if let Some(f) = cb.utterance_begin.as_mut() {
f(utterance_id);
}
})));
sd.0.on_end(Some(Box::new(|msg_id, client_id| {
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(client_id, false);
}));
sd.0.on_cancel(Some(|_msg_id, client_id| {
let mut callbacks = CALLBACKS.lock().unwrap();
let backend_id = BackendId::SpeechDispatcher(client_id);
let cb = callbacks.get_mut(&backend_id).unwrap();
let utterance_id = UtteranceId::SpeechDispatcher(msg_id);
if let Some(f) = cb.utterance_end.as_mut() {
f(utterance_id);
}
})));
sd.0.on_cancel(Some(Box::new(|_msg_id, client_id| {
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(client_id, false);
}));
sd.0.on_pause(Some(|_msg_id, client_id| {
})));
sd.0.on_pause(Some(Box::new(|_msg_id, client_id| {
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(client_id, false);
}));
sd.0.on_resume(Some(|_msg_id, client_id| {
})));
sd.0.on_resume(Some(Box::new(|_msg_id, client_id| {
let mut speaking = SPEAKING.lock().unwrap();
speaking.insert(client_id, true);
}));
})));
sd
}
}
impl Backend for SpeechDispatcher {
fn id(&self) -> Option<BackendId> {
Some(BackendId::SpeechDispatcher(self.0.client_id()))
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -56,10 +75,11 @@ impl Backend for SpeechDispatcher {
pitch: true,
volume: true,
is_speaking: true,
utterance_callbacks: true,
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> {
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
if interrupt {
self.stop()?;
@ -68,11 +88,15 @@ impl Backend for SpeechDispatcher {
if single_char {
self.0.set_punctuation(Punctuation::All);
}
self.0.say(Priority::Important, text);
let id = self.0.say(Priority::Important, text);
if single_char {
self.0.set_punctuation(Punctuation::None);
}
Ok(())
if let Some(id) = id {
Ok(Some(UtteranceId::SpeechDispatcher(id.try_into().unwrap())))
} else {
Err(Error::NoneError)
}
}
fn stop(&mut self) -> Result<(), Error> {

View File

@ -2,12 +2,12 @@
use log::{info, trace};
use tolk::Tolk as TolkPtr;
use crate::{Backend, Error, Features};
use crate::{Backend, BackendId, Error, Features, UtteranceId};
pub struct Tolk(TolkPtr);
pub(crate) struct Tolk(TolkPtr);
impl Tolk {
pub fn new() -> Option<Self> {
pub(crate) fn new() -> Option<Self> {
info!("Initializing Tolk backend");
let tolk = TolkPtr::new();
if tolk.detect_screen_reader().is_some() {
@ -19,6 +19,10 @@ impl Tolk {
}
impl Backend for Tolk {
fn id(&self) -> Option<BackendId> {
None
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -26,28 +30,10 @@ impl Backend for Tolk {
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> {
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
const BUFFER_LENGTH: usize = 300;
if text.len() <= BUFFER_LENGTH {
self.0.speak(text, interrupt);
} else {
if interrupt {
self.stop()?;
}
let tokens = text.split_whitespace();
let mut buffer = String::new();
for token in tokens {
if buffer.len() + token.len() > BUFFER_LENGTH {
self.0.speak(buffer, false);
buffer = String::new();
} else {
buffer.push_str(token);
buffer.push(' ');
}
}
}
Ok(())
self.0.speak(text, interrupt);
Ok(None)
}
fn stop(&mut self) -> Result<(), Error> {

View File

@ -1,27 +1,45 @@
#[cfg(target_arch = "wasm32")]
use log::{info, trace};
use web_sys::SpeechSynthesisUtterance;
use std::sync::Mutex;
use crate::{Backend, Error, Features};
use lazy_static::lazy_static;
use log::{info, trace};
use wasm_bindgen::prelude::*;
use wasm_bindgen::JsCast;
use web_sys::{SpeechSynthesisEvent, SpeechSynthesisUtterance};
use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS};
pub struct Web {
id: BackendId,
rate: f32,
pitch: f32,
volume: f32,
}
lazy_static! {
static ref NEXT_BACKEND_ID: Mutex<u64> = Mutex::new(0);
}
impl Web {
pub fn new() -> Result<Self, Error> {
info!("Initializing Web backend");
Ok(Web {
let mut backend_id = NEXT_BACKEND_ID.lock().unwrap();
let rv = Web {
id: BackendId::Web(*backend_id),
rate: 1.,
pitch: 1.,
volume: 1.,
})
};
*backend_id += 1;
Ok(rv)
}
}
impl Backend for Web {
fn id(&self) -> Option<BackendId> {
Some(self.id)
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -29,23 +47,46 @@ impl Backend for Web {
pitch: true,
volume: true,
is_speaking: true,
utterance_callbacks: true,
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error> {
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
let utterance = SpeechSynthesisUtterance::new_with_text(text).unwrap();
utterance.set_rate(self.rate);
utterance.set_pitch(self.pitch);
utterance.set_volume(self.volume);
let id = self.id().unwrap();
let utterance_id = UtteranceId::Web(utterance.clone());
let callback = Closure::wrap(Box::new(move |evt: SpeechSynthesisEvent| {
let mut callbacks = CALLBACKS.lock().unwrap();
let callback = callbacks.get_mut(&id).unwrap();
if let Some(f) = callback.utterance_begin.as_mut() {
let utterance_id = UtteranceId::Web(evt.utterance());
f(utterance_id);
}
}) as Box<dyn Fn(_)>);
utterance.set_onstart(Some(callback.as_ref().unchecked_ref()));
let callback = Closure::wrap(Box::new(move |evt: SpeechSynthesisEvent| {
let mut callbacks = CALLBACKS.lock().unwrap();
let callback = callbacks.get_mut(&id).unwrap();
if let Some(f) = callback.utterance_end.as_mut() {
let utterance_id = UtteranceId::Web(evt.utterance());
f(utterance_id);
}
}) as Box<dyn Fn(_)>);
utterance.set_onend(Some(callback.as_ref().unchecked_ref()));
if interrupt {
self.stop()?;
}
if let Some(window) = web_sys::window() {
let speech_synthesis = window.speech_synthesis().unwrap();
speech_synthesis.speak(&utterance);
Ok(Some(utterance_id))
} else {
Err(Error::NoneError)
}
Ok(())
}
fn stop(&mut self) -> Result<(), Error> {

View File

@ -1,13 +1,19 @@
#[cfg(windows)]
use log::{info, trace};
use std::collections::HashMap;
use std::sync::Mutex;
use lazy_static::lazy_static;
use log::{info, trace};
use winrt::ComInterface;
use tts_winrt_bindings::windows::media::core::MediaSource;
use tts_winrt_bindings::windows::media::playback::{
MediaPlaybackItem, MediaPlaybackList, MediaPlaybackState, MediaPlayer,
CurrentMediaPlaybackItemChangedEventArgs, MediaPlaybackItem, MediaPlaybackList,
MediaPlaybackState, MediaPlayer,
};
use tts_winrt_bindings::windows::media::speech_synthesis::SpeechSynthesizer;
use tts_winrt_bindings::windows::{foundation::TypedEventHandler, media::core::MediaSource};
use crate::{Backend, Error, Features};
use crate::{Backend, BackendId, Error, Features, UtteranceId, CALLBACKS};
impl From<winrt::Error> for Error {
fn from(e: winrt::Error) -> Self {
@ -16,11 +22,28 @@ impl From<winrt::Error> for Error {
}
pub struct WinRT {
id: BackendId,
synth: SpeechSynthesizer,
player: MediaPlayer,
playback_list: MediaPlaybackList,
}
lazy_static! {
static ref NEXT_BACKEND_ID: Mutex<u64> = Mutex::new(0);
static ref BACKEND_TO_MEDIA_PLAYER: Mutex<HashMap<BackendId, MediaPlayer>> = {
let v: HashMap<BackendId, MediaPlayer> = HashMap::new();
Mutex::new(v)
};
static ref BACKEND_TO_PLAYBACK_LIST: Mutex<HashMap<BackendId, MediaPlaybackList>> = {
let v: HashMap<BackendId, MediaPlaybackList> = HashMap::new();
Mutex::new(v)
};
static ref LAST_SPOKEN_UTTERANCE: Mutex<HashMap<BackendId, UtteranceId>> = {
let v: HashMap<BackendId, UtteranceId> = HashMap::new();
Mutex::new(v)
};
}
impl WinRT {
pub fn new() -> std::result::Result<Self, Error> {
info!("Initializing WinRT backend");
@ -28,11 +51,17 @@ impl WinRT {
let player = MediaPlayer::new()?;
player.set_auto_play(true)?;
player.set_source(&playback_list)?;
Ok(Self {
let mut backend_id = NEXT_BACKEND_ID.lock().unwrap();
let bid = BackendId::WinRT(*backend_id);
let mut rv = Self {
id: bid,
synth: SpeechSynthesizer::new()?,
player: player,
playback_list: playback_list,
})
};
*backend_id += 1;
Self::init_callbacks(&mut rv)?;
Ok(rv)
}
fn reinit_player(&mut self) -> std::result::Result<(), Error> {
@ -40,11 +69,70 @@ impl WinRT {
self.player = MediaPlayer::new()?;
self.player.set_auto_play(true)?;
self.player.set_source(&self.playback_list)?;
self.init_callbacks()?;
Ok(())
}
fn init_callbacks(&mut self) -> Result<(), winrt::Error> {
let id = self.id().unwrap();
let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap();
backend_to_media_player.insert(id, self.player.clone());
self.player
.media_ended(TypedEventHandler::new(|sender, _args| {
let backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap();
let id = backend_to_media_player.iter().find(|v| v.1 == sender);
if let Some(id) = id {
let id = id.0;
let mut callbacks = CALLBACKS.lock().unwrap();
let callbacks = callbacks.get_mut(&id).unwrap();
if let Some(callback) = callbacks.utterance_end.as_mut() {
let last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap();
if let Some(utterance_id) = last_spoken_utterance.get(&id) {
callback(utterance_id.clone());
}
}
}
Ok(())
}))?;
let mut backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap();
backend_to_playback_list.insert(id, self.playback_list.clone());
self.playback_list
.current_item_changed(TypedEventHandler::new(
|sender: &MediaPlaybackList, args: &CurrentMediaPlaybackItemChangedEventArgs| {
let backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap();
let id = backend_to_playback_list.iter().find(|v| v.1 == sender);
if let Some(id) = id {
let id = id.0;
let mut callbacks = CALLBACKS.lock().unwrap();
let callbacks = callbacks.get_mut(&id).unwrap();
let old_item = args.old_item()?;
if !old_item.is_null() {
if let Some(callback) = callbacks.utterance_end.as_mut() {
callback(UtteranceId::WinRT(old_item));
}
}
let new_item = args.new_item()?;
if !new_item.is_null() {
let mut last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap();
let utterance_id = UtteranceId::WinRT(new_item);
last_spoken_utterance.insert(*id, utterance_id.clone());
if let Some(callback) = callbacks.utterance_begin.as_mut() {
callback(utterance_id);
}
}
}
Ok(())
},
))?;
Ok(())
}
}
impl Backend for WinRT {
fn id(&self) -> Option<BackendId> {
Some(self.id)
}
fn supported_features(&self) -> Features {
Features {
stop: true,
@ -52,10 +140,15 @@ impl Backend for WinRT {
pitch: true,
volume: true,
is_speaking: true,
utterance_callbacks: true,
}
}
fn speak(&mut self, text: &str, interrupt: bool) -> std::result::Result<(), Error> {
fn speak(
&mut self,
text: &str,
interrupt: bool,
) -> std::result::Result<Option<UtteranceId>, Error> {
trace!("speak({}, {})", text, interrupt);
if interrupt {
self.stop()?;
@ -72,11 +165,12 @@ impl Backend for WinRT {
self.reinit_player()?;
}
}
self.playback_list.items()?.append(item)?;
self.playback_list.items()?.append(&item)?;
if !self.is_speaking()? {
self.player.play()?;
}
Ok(())
let utterance_id = UtteranceId::WinRT(item);
Ok(Some(utterance_id))
}
fn stop(&mut self) -> std::result::Result<(), Error> {
@ -169,3 +263,15 @@ impl Backend for WinRT {
unimplemented!()
}
}
impl Drop for WinRT {
fn drop(&mut self) {
let id = self.id().unwrap();
let mut backend_to_playback_list = BACKEND_TO_PLAYBACK_LIST.lock().unwrap();
backend_to_playback_list.remove(&id);
let mut backend_to_media_player = BACKEND_TO_MEDIA_PLAYER.lock().unwrap();
backend_to_media_player.remove(&id);
let mut last_spoken_utterance = LAST_SPOKEN_UTTERANCE.lock().unwrap();
last_spoken_utterance.remove(&id);
}
}

View File

@ -12,16 +12,24 @@
*/
use std::boxed::Box;
use std::collections::HashMap;
#[cfg(target_os = "macos")]
use std::ffi::CStr;
use std::sync::Mutex;
#[cfg(target_os = "macos")]
#[cfg(any(target_os = "macos", target_os = "ios"))]
use cocoa_foundation::base::id;
use lazy_static::lazy_static;
#[cfg(target_os = "macos")]
use libc::c_char;
#[cfg(target_os = "macos")]
use objc::{class, msg_send, sel, sel_impl};
use thiserror::Error;
#[cfg(target_arch = "wasm32")]
use web_sys::SpeechSynthesisUtterance;
#[cfg(windows)]
use tts_winrt_bindings::windows::media::playback::MediaPlaybackItem;
mod backends;
@ -40,6 +48,30 @@ pub enum Backends {
AvFoundation,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum BackendId {
#[cfg(target_os = "linux")]
SpeechDispatcher(u64),
#[cfg(target_arch = "wasm32")]
Web(u64),
#[cfg(windows)]
WinRT(u64),
#[cfg(any(target_os = "macos", target_os = "ios"))]
AvFoundation(u64),
}
#[derive(Clone, Debug, PartialEq)]
pub enum UtteranceId {
#[cfg(target_os = "linux")]
SpeechDispatcher(u64),
#[cfg(target_arch = "wasm32")]
Web(SpeechSynthesisUtterance),
#[cfg(windows)]
WinRT(MediaPlaybackItem),
#[cfg(any(target_os = "macos", target_os = "ios"))]
AvFoundation(id),
}
pub struct Features {
pub stop: bool,
pub rate: bool,
@ -47,6 +79,7 @@ pub struct Features {
pub volume: bool,
pub is_speaking: bool,
pub voices: bool,
pub utterance_callbacks: bool,
}
impl Default for Features {
@ -58,6 +91,7 @@ impl Default for Features {
volume: false,
is_speaking: false,
voices: false,
utterance_callbacks: false,
}
}
}
@ -81,8 +115,9 @@ pub enum Error {
}
pub trait Backend {
fn id(&self) -> Option<BackendId>;
fn supported_features(&self) -> Features;
fn speak(&mut self, text: &str, interrupt: bool) -> Result<(), Error>;
fn speak(&mut self, text: &str, interrupt: bool) -> Result<Option<UtteranceId>, Error>;
fn stop(&mut self) -> Result<(), Error>;
fn min_rate(&self) -> f32;
fn max_rate(&self) -> f32;
@ -105,6 +140,23 @@ pub trait Backend {
fn set_voice(&mut self, voice: &str) -> Result<(),Error>;
}
#[derive(Default)]
struct Callbacks {
utterance_begin: Option<Box<dyn FnMut(UtteranceId)>>,
utterance_end: Option<Box<dyn FnMut(UtteranceId)>>,
}
unsafe impl Send for Callbacks {}
unsafe impl Sync for Callbacks {}
lazy_static! {
static ref CALLBACKS: Mutex<HashMap<BackendId, Callbacks>> = {
let m: HashMap<BackendId, Callbacks> = HashMap::new();
Mutex::new(m)
};
}
pub struct TTS(Box<dyn Backend>);
unsafe impl std::marker::Send for TTS {}
@ -116,7 +168,7 @@ impl TTS {
* Create a new `TTS` instance with the specified backend.
*/
pub fn new(backend: Backends) -> Result<TTS, Error> {
match backend {
let backend = match backend {
#[cfg(target_os = "linux")]
Backends::SpeechDispatcher => Ok(TTS(Box::new(backends::SpeechDispatcher::new()))),
#[cfg(target_arch = "wasm32")]
@ -142,6 +194,16 @@ impl TTS {
Backends::AppKit => Ok(TTS(Box::new(backends::AppKit::new()))),
#[cfg(any(target_os = "macos", target_os = "ios"))]
Backends::AvFoundation => Ok(TTS(Box::new(backends::AvFoundation::new()))),
};
if backend.is_ok() {
let backend = backend.unwrap();
if let Some(id) = backend.0.id() {
let mut callbacks = CALLBACKS.lock().unwrap();
callbacks.insert(id, Callbacks::default());
}
Ok(backend)
} else {
backend
}
}
@ -189,9 +251,12 @@ impl TTS {
/**
* Speaks the specified text, optionally interrupting current speech.
*/
pub fn speak<S: Into<String>>(&mut self, text: S, interrupt: bool) -> Result<&Self, Error> {
self.0.speak(text.into().as_str(), interrupt)?;
Ok(self)
pub fn speak<S: Into<String>>(
&mut self,
text: S,
interrupt: bool,
) -> Result<Option<UtteranceId>, Error> {
self.0.speak(text.into().as_str(), interrupt)
}
/**
@ -410,4 +475,57 @@ impl TTS {
Err(Error::UnsupportedFeature)
}
}
/**
* Called when this speech synthesizer begins speaking an utterance.
*/
pub fn on_utterance_begin(
&self,
callback: Option<Box<dyn FnMut(UtteranceId)>>,
) -> Result<(), Error> {
let Features {
utterance_callbacks,
..
} = self.supported_features();
if utterance_callbacks {
let mut callbacks = CALLBACKS.lock().unwrap();
let id = self.0.id().unwrap();
let mut callbacks = callbacks.get_mut(&id).unwrap();
callbacks.utterance_begin = callback;
Ok(())
} else {
Err(Error::UnsupportedFeature)
}
}
/**
* Called when this speech synthesizer finishes speaking an utterance.
*/
pub fn on_utterance_end(
&self,
callback: Option<Box<dyn FnMut(UtteranceId)>>,
) -> Result<(), Error> {
let Features {
utterance_callbacks,
..
} = self.supported_features();
if utterance_callbacks {
let mut callbacks = CALLBACKS.lock().unwrap();
let id = self.0.id().unwrap();
let mut callbacks = callbacks.get_mut(&id).unwrap();
callbacks.utterance_end = callback;
Ok(())
} else {
Err(Error::UnsupportedFeature)
}
}
}
impl Drop for TTS {
fn drop(&mut self) {
if let Some(id) = self.0.id() {
let mut callbacks = CALLBACKS.lock().unwrap();
callbacks.remove(&id);
}
}
}