Source code for fmus_vox.tts

"""
Text-to-Speech (TTS) functionality for fmus-vox.

This module provides functionality for synthesizing text into speech
using various models and techniques.
"""

from typing import Optional, Union, Dict, Any

from fmus_vox.core.audio import Audio
from fmus_vox.core.errors import SynthesisError

# Import speaker implementation
from fmus_vox.tts.speaker import Speaker


[docs]
def speak(
    text: str,
    voice: str = "default",
    output: Optional[str] = None,
    model: str = "vits",
    **kwargs
) -> Optional[Audio]:
    """
    Synthesize speech from text using a specified model and voice.

    This is a simple functional API for quick speech synthesis.
    For more control, use the Speaker class directly.

    Args:
        text: Text to synthesize
        voice: Voice to use (name or ID)
        output: Path to save audio file (if None, returns Audio object)
        model: Model to use for synthesis (vits, coqui, etc.)
        **kwargs: Additional model-specific parameters

    Returns:
        Audio object if output is None, otherwise None

    Raises:
        SynthesisError: If synthesis fails

    Examples:
        >>> # Synthesize speech and play it
        >>> audio = speak("Hello, world!")
        >>> audio.play()

        >>> # Synthesize speech and save to file
        >>> speak("Hello, world!", output="hello.wav")
    """
    try:
        # Create speaker with specified model and voice
        speaker = Speaker(model=model, voice=voice, **kwargs)

        # Synthesize speech
        audio = speaker.speak(text)

        # Save to file if output path is provided
        if output:
            audio.save(output)
            return None

        return audio

    except Exception as e:
        raise SynthesisError(f"Failed to synthesize speech: {str(e)}")