Skip to content

Grapheme-to-Phoneme Converter

speechline.utils.g2p

g2p_en(text)

Convert English text to string of phonemes via gruut.

Parameters:

Name Type Description Default
text str

English text to convert.

required

Returns:

Type Description
str

Phoneme string.

Source code in speechline/utils/g2p.py
def g2p_en(text: str) -> List[str]:
    """
    Convert English text to string of phonemes via gruut.

    Args:
        text (str):
            English text to convert.

    Returns:
        str:
            Phoneme string.
    """
    phonemes = []
    for words in sentences(text):
        for word in words:
            if word.is_major_break or word.is_minor_break:
                phonemes.append(word.text)
            elif word.phonemes:
                phonemes.append(" ".join(word.phonemes))
    return phonemes

g2p_id(text)

Convert Indonesian text to string of phonemes via g2p_id.

Parameters:

Name Type Description Default
text str

Indonesian text to convert.

required

Returns:

Type Description
str

Phoneme string.

Source code in speechline/utils/g2p.py
def g2p_id(text: str) -> List[str]:
    """
    Convert Indonesian text to string of phonemes via g2p_id.

    Args:
        text (str):
            Indonesian text to convert.

    Returns:
        str:
            Phoneme string.
    """
    if not HAS_G2P_ID:
        raise ImportError("g2p_id package not installed. Install with: pip install g2p-id")

    g2p = G2p()
    phonemes = g2p(text)
    return [" ".join(phoneme) for phoneme in phonemes]

g2p_sw(text)

Convert Swahili text to string of phonemes via gruut.

Parameters:

Name Type Description Default
text str

Swahili text to convert.

required

Returns:

Type Description
str

Phoneme string.

Source code in speechline/utils/g2p.py
def g2p_sw(text: str) -> List[str]:
    """
    Convert Swahili text to string of phonemes via gruut.

    Args:
        text (str):
            Swahili text to convert.

    Returns:
        str:
            Phoneme string.
    """
    phonemes = []
    for words in sentences(text, lang="sw"):
        for word in words:
            if word.is_major_break or word.is_minor_break:
                phonemes.append(word.text)
            elif word.phonemes:
                _phonemes = word.phonemes[:]

                # NOTE: gruut doesn't handle "ng'" /ŋ/
                # we need to fix e.g. ng'ombe -> /ŋombe/ instead of /ᵑgombe/
                NG_GRAPHEME = "ng'"
                NG_PRENASALIZED_PHONEME = "ᵑg"
                NG_PHONEME = "ŋ"
                if NG_GRAPHEME in word.text:
                    ng_graphemes = re.findall(f"{NG_GRAPHEME}?", word.text)
                    ng_phonemes_idx = [i for i, p in enumerate(_phonemes) if p == NG_PRENASALIZED_PHONEME]
                    assert len(ng_graphemes) == len(ng_phonemes_idx)
                    for i, g in zip(ng_phonemes_idx, ng_graphemes):
                        _phonemes[i] = NG_PHONEME if g == NG_GRAPHEME else _phonemes[i]

                phonemes.append(" ".join(_phonemes))
    return phonemes

get_g2p(language)

Gets the corresponding g2p function given language.

Parameters:

Name Type Description Default
language str

Language code. Can be in the form of en-US or simply en.

required

Exceptions:

Type Description
NotImplementedError

Language has no g2p function implemented yet.

Returns:

Type Description
Callable

G2p callable function.

Source code in speechline/utils/g2p.py
def get_g2p(language: str) -> Callable:
    """
    Gets the corresponding g2p function given `language`.

    Args:
        language (str):
            Language code. Can be in the form of `en-US` or simply `en`.

    Raises:
        NotImplementedError: Language has no g2p function implemented yet.

    Returns:
        Callable:
            G2p callable function.
    """

    LANG2G2P = {
        "en": g2p_en,
        "id": g2p_id,
        "sw": g2p_sw,
    }

    if language.lower() not in LANG2G2P:
        raise NotImplementedError(f"{language} has no g2p function yet!")
    return LANG2G2P[language.lower()]