Grapheme-to-Phoneme Converter
speechline.utils.g2p
g2p_en(text)
Convert English text to string of phonemes via gruut.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text |
str |
English text to convert. |
required |
Returns:
| Type | Description |
|---|---|
str |
Phoneme string. |
Source code in speechline/utils/g2p.py
def g2p_en(text: str) -> List[str]:
"""
Convert English text to string of phonemes via gruut.
Args:
text (str):
English text to convert.
Returns:
str:
Phoneme string.
"""
phonemes = []
for words in sentences(text):
for word in words:
if word.is_major_break or word.is_minor_break:
phonemes.append(word.text)
elif word.phonemes:
phonemes.append(" ".join(word.phonemes))
return phonemes
g2p_id(text)
Convert Indonesian text to string of phonemes via g2p_id.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text |
str |
Indonesian text to convert. |
required |
Returns:
| Type | Description |
|---|---|
str |
Phoneme string. |
Source code in speechline/utils/g2p.py
def g2p_id(text: str) -> List[str]:
"""
Convert Indonesian text to string of phonemes via g2p_id.
Args:
text (str):
Indonesian text to convert.
Returns:
str:
Phoneme string.
"""
if not HAS_G2P_ID:
raise ImportError("g2p_id package not installed. Install with: pip install g2p-id")
g2p = G2p()
phonemes = g2p(text)
return [" ".join(phoneme) for phoneme in phonemes]
g2p_sw(text)
Convert Swahili text to string of phonemes via gruut.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text |
str |
Swahili text to convert. |
required |
Returns:
| Type | Description |
|---|---|
str |
Phoneme string. |
Source code in speechline/utils/g2p.py
def g2p_sw(text: str) -> List[str]:
"""
Convert Swahili text to string of phonemes via gruut.
Args:
text (str):
Swahili text to convert.
Returns:
str:
Phoneme string.
"""
phonemes = []
for words in sentences(text, lang="sw"):
for word in words:
if word.is_major_break or word.is_minor_break:
phonemes.append(word.text)
elif word.phonemes:
_phonemes = word.phonemes[:]
# NOTE: gruut doesn't handle "ng'" /ŋ/
# we need to fix e.g. ng'ombe -> /ŋombe/ instead of /ᵑgombe/
NG_GRAPHEME = "ng'"
NG_PRENASALIZED_PHONEME = "ᵑg"
NG_PHONEME = "ŋ"
if NG_GRAPHEME in word.text:
ng_graphemes = re.findall(f"{NG_GRAPHEME}?", word.text)
ng_phonemes_idx = [i for i, p in enumerate(_phonemes) if p == NG_PRENASALIZED_PHONEME]
assert len(ng_graphemes) == len(ng_phonemes_idx)
for i, g in zip(ng_phonemes_idx, ng_graphemes):
_phonemes[i] = NG_PHONEME if g == NG_GRAPHEME else _phonemes[i]
phonemes.append(" ".join(_phonemes))
return phonemes
get_g2p(language)
Gets the corresponding g2p function given language.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
language |
str |
Language code. Can be in the form of |
required |
Exceptions:
| Type | Description |
|---|---|
NotImplementedError |
Language has no g2p function implemented yet. |
Returns:
| Type | Description |
|---|---|
Callable |
G2p callable function. |
Source code in speechline/utils/g2p.py
def get_g2p(language: str) -> Callable:
"""
Gets the corresponding g2p function given `language`.
Args:
language (str):
Language code. Can be in the form of `en-US` or simply `en`.
Raises:
NotImplementedError: Language has no g2p function implemented yet.
Returns:
Callable:
G2p callable function.
"""
LANG2G2P = {
"en": g2p_en,
"id": g2p_id,
"sw": g2p_sw,
}
if language.lower() not in LANG2G2P:
raise NotImplementedError(f"{language} has no g2p function yet!")
return LANG2G2P[language.lower()]