Skip to content

Phoneme Error Rate

speechline.metrics.phoneme_error_rate.PhonemeErrorRate

Phoneme-Error Rate metric, with flexibility in lexicon.

Parameters:

Name Type Description Default
lexicon Dict[str, List[List[str]]]

Pronunciation lexicon with word (grapheme) as key, and list of valid phoneme-list pronunciations.

required
Source code in speechline/metrics/phoneme_error_rate.py
class PhonemeErrorRate:
    """
    Phoneme-Error Rate metric, with flexibility in lexicon.

    Args:
        lexicon (Dict[str, List[List[str]]]):
            Pronunciation lexicon with word (grapheme) as key,
            and list of valid phoneme-list pronunciations.
    """

    def __init__(
        self, lexicon: Dict[str, List[List[str]]], epsilon_token: str = "<*>"
    ) -> None:
        self.lexicon = deepcopy(lexicon)
        self.epsilon_token = epsilon_token

    def __call__(
        self, sequences: List[List[str]], predictions: List[List[str]]
    ) -> float:
        """
        Calculates PER given list of ground truth words, predicted phonemes,
        and corresponding lexicon.

        ### Example
        ```pycon title="example_phoneme_error_rate.py"
        >>> lexicon = {
        ...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
        ...     "guy": [["g", "a", "i"]]
        ... }
        >>> per = PhonemeErrorRate(lexicon)
        >>> sequences = [
        ...     ["hello", "hello"],
        ...     ["hello", "guy"]
        ... ]
        >>> predictions = [
        ...     ["h", "e", "l", "l", "o", "b", "e", "l", "l", "o"],
        ...     ["h", "a", "l", "l", "o", "g", "a", "i"]
        ... ]
        >>> per(sequences=sequences, predictions=predictions)
        0.05555555555555555
        ```

        Args:
            sequences (List[List[str]]):
                List of list of ground truth words in a batch.
            predictions (List[List[str]]):
                List of list of predicted phonemes in a batch.

        Raises:
            ValueError: Mismatch in the number of predictions and sequences.
            KeyError: Words not found in the lexicon.

        Returns:
            float:
                Phoneme error rate.
        """
        if len(sequences) != len(predictions):
            raise ValueError(
                f"Mismatch in the number of predictions ({len(predictions)}) and sequences ({len(sequences)})"  # noqa: E501
            )

        oovs = [word for seq in sequences for word in seq if word not in self.lexicon]
        if len(oovs) > 0:
            raise KeyError(f"Words not found in the lexicon: {oovs}")

        errors, total = 0, 0
        for words, prediction in zip(sequences, predictions):
            measures = self.compute_measures(words, prediction)
            errors += measures["errors"]
            total += measures["total"]
        return errors / total

    def compute_measures(
        self, words: List[str], prediction: List[str]
    ) -> Dict[str, int]:
        """
        Computes the number of phoneme-level errors.

        ### Example
        ```pycon title="example_compute_measures.py"
        >>> lexicon = {
        ...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
        ...     "guy": [["g", "a", "i"]]
        ... }
        >>> words = ["hello", "guy"]
        >>> per = PhonemeErrorRate(lexicon)
        >>> per.compute_measures(
        ...     words,
        ...     prediction=["h", "a", "l", "l", "o", "g", "a", "i"]
        ... )
        {'errors': 0, 'total': 8}
        >>> per.compute_measures(
        ...     words,
        ...     prediction=["h", "a", "l", "a", "i"]
        ... )
        {'errors': 3, 'total': 8}
        >>> per.compute_measures(
        ...     words,
        ...     prediction=["h", "a", "l", "l", "o", "b", "h", "a", "i"]
        ... )
        {'errors': 2, 'total': 8}
        ```

        Args:
            words (List[str]):
                List of ground truth words.
            prediction (List[str]):
                List of predicted phonemes.

        Returns:
            Dict[str, int]:
                A dictionary with number of errors and total number of true phonemes.
        """
        stack = self._build_pronunciation_stack(words)
        reference = [
            phoneme for word in words for phoneme in max(self.lexicon[word], key=len)
        ]

        editops = Levenshtein.editops(reference, prediction)
        # get initial number of errors
        errors = len(editops)

        for tag, i, j in editops:
            # if there are >1 valid phonemes at position in stack
            if i < len(stack) and len(stack[i]) > 1:
                # check if pair of phoneme is in list of valid phoneme pairs
                # or is substituted by epsilon, which we will thus ignore
                permutes = permutations(stack[i], 2)
                if tag == "replace" and (reference[i], prediction[j]) in permutes:
                    errors -= 1
                # or is an epsilon and hence skippable
                elif tag == "delete" and reference[i] == self.epsilon_token:
                    errors -= 1

        return {"errors": errors, "total": len(reference)}

    def _build_pronunciation_stack(self, words: List[str]) -> List[Set[str]]:
        """
        Builds a list of expected pronunciation "stack".

        ### Example
        ```pycon title="example_build_pronunciation_stack.py"
        >>> lexicon = {
        ...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
        ...     "guy": [["g", "a", "i"]]
        ... }
        >>> words = ["hello", "guy"]
        >>> per = PhonemeErrorRate(lexicon)
        >>> per._build_pronunciation_stack(words)
        [{'h'}, {'a', 'e'}, {'l'}, {'l'}, {'o'}, {'g'}, {'a'}, {'i'}]
        ```

        Args:
            words (List[str]):
                List of words whose pronunciation stack will be built.

        Returns:
            List[Set[str]]:
                List of possible phonemes of the input words.
        """

        def insert_epsilon(
            pronunciations: List[List[str]], epsilon_token: str = "<*>"
        ) -> List[List[str]]:
            """
            Insert epsilon (skippable) token into pronunciation phonemes.
            Epsilon tokens will be ignored during phoneme matching step.

            Args:
                pronunciations (List[List[str]]):
                    List of phoneme pronunciations.

            Returns:
                List[List[str]]:
                    List of updated phoneme pronunciations.
            """
            updated_pronunciations = pronunciations[:]
            # get longest pronunciation
            longest_pron = max(updated_pronunciations, key=len)
            for pron in updated_pronunciations:
                if len(pron) != len(longest_pron):
                    editops = Levenshtein.editops(pron, longest_pron)
                    for op, i, _ in editops:
                        # insert epsilon on insertion index
                        if op == "insert":
                            pron.insert(i, epsilon_token)

            # repeat, insert epsilon based on new longest pronunciation
            # See: https://github.com/bookbot-kids/speechline/issues/64.
            longest_pron = max(updated_pronunciations, key=len)
            for pron in updated_pronunciations:
                if len(pron) != len(longest_pron):
                    editops = Levenshtein.editops(pron, longest_pron)
                    # only this time following the target index
                    for op, _, j in editops:
                        # insert epsilon on insertion index
                        if op == "insert":
                            pron.insert(j, epsilon_token)

            return updated_pronunciations

        stack = []
        for word in words:
            pronunciations = insert_epsilon(self.lexicon[word], self.epsilon_token)
            length = len(pronunciations[0])
            word_stack = [
                set(pron[i] for pron in pronunciations) for i in range(length)
            ]
            stack += word_stack
        return stack

__call__(self, sequences, predictions) special

Calculates PER given list of ground truth words, predicted phonemes, and corresponding lexicon.

Example
example_phoneme_error_rate.py
>>> lexicon = {
...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
...     "guy": [["g", "a", "i"]]
... }
>>> per = PhonemeErrorRate(lexicon)
>>> sequences = [
...     ["hello", "hello"],
...     ["hello", "guy"]
... ]
>>> predictions = [
...     ["h", "e", "l", "l", "o", "b", "e", "l", "l", "o"],
...     ["h", "a", "l", "l", "o", "g", "a", "i"]
... ]
>>> per(sequences=sequences, predictions=predictions)
0.05555555555555555

Parameters:

Name Type Description Default
sequences List[List[str]]

List of list of ground truth words in a batch.

required
predictions List[List[str]]

List of list of predicted phonemes in a batch.

required

Exceptions:

Type Description
ValueError

Mismatch in the number of predictions and sequences.

KeyError

Words not found in the lexicon.

Returns:

Type Description
float

Phoneme error rate.

Source code in speechline/metrics/phoneme_error_rate.py
def __call__(
    self, sequences: List[List[str]], predictions: List[List[str]]
) -> float:
    """
    Calculates PER given list of ground truth words, predicted phonemes,
    and corresponding lexicon.

    ### Example
    ```pycon title="example_phoneme_error_rate.py"
    >>> lexicon = {
    ...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
    ...     "guy": [["g", "a", "i"]]
    ... }
    >>> per = PhonemeErrorRate(lexicon)
    >>> sequences = [
    ...     ["hello", "hello"],
    ...     ["hello", "guy"]
    ... ]
    >>> predictions = [
    ...     ["h", "e", "l", "l", "o", "b", "e", "l", "l", "o"],
    ...     ["h", "a", "l", "l", "o", "g", "a", "i"]
    ... ]
    >>> per(sequences=sequences, predictions=predictions)
    0.05555555555555555
    ```

    Args:
        sequences (List[List[str]]):
            List of list of ground truth words in a batch.
        predictions (List[List[str]]):
            List of list of predicted phonemes in a batch.

    Raises:
        ValueError: Mismatch in the number of predictions and sequences.
        KeyError: Words not found in the lexicon.

    Returns:
        float:
            Phoneme error rate.
    """
    if len(sequences) != len(predictions):
        raise ValueError(
            f"Mismatch in the number of predictions ({len(predictions)}) and sequences ({len(sequences)})"  # noqa: E501
        )

    oovs = [word for seq in sequences for word in seq if word not in self.lexicon]
    if len(oovs) > 0:
        raise KeyError(f"Words not found in the lexicon: {oovs}")

    errors, total = 0, 0
    for words, prediction in zip(sequences, predictions):
        measures = self.compute_measures(words, prediction)
        errors += measures["errors"]
        total += measures["total"]
    return errors / total

compute_measures(self, words, prediction)

Computes the number of phoneme-level errors.

Example
example_compute_measures.py
>>> lexicon = {
...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
...     "guy": [["g", "a", "i"]]
... }
>>> words = ["hello", "guy"]
>>> per = PhonemeErrorRate(lexicon)
>>> per.compute_measures(
...     words,
...     prediction=["h", "a", "l", "l", "o", "g", "a", "i"]
... )
{'errors': 0, 'total': 8}
>>> per.compute_measures(
...     words,
...     prediction=["h", "a", "l", "a", "i"]
... )
{'errors': 3, 'total': 8}
>>> per.compute_measures(
...     words,
...     prediction=["h", "a", "l", "l", "o", "b", "h", "a", "i"]
... )
{'errors': 2, 'total': 8}

Parameters:

Name Type Description Default
words List[str]

List of ground truth words.

required
prediction List[str]

List of predicted phonemes.

required

Returns:

Type Description
Dict[str, int]

A dictionary with number of errors and total number of true phonemes.

Source code in speechline/metrics/phoneme_error_rate.py
def compute_measures(
    self, words: List[str], prediction: List[str]
) -> Dict[str, int]:
    """
    Computes the number of phoneme-level errors.

    ### Example
    ```pycon title="example_compute_measures.py"
    >>> lexicon = {
    ...     "hello": [["h", "e", "l", "l", "o"], ["h", "a", "l", "l", "o"]],
    ...     "guy": [["g", "a", "i"]]
    ... }
    >>> words = ["hello", "guy"]
    >>> per = PhonemeErrorRate(lexicon)
    >>> per.compute_measures(
    ...     words,
    ...     prediction=["h", "a", "l", "l", "o", "g", "a", "i"]
    ... )
    {'errors': 0, 'total': 8}
    >>> per.compute_measures(
    ...     words,
    ...     prediction=["h", "a", "l", "a", "i"]
    ... )
    {'errors': 3, 'total': 8}
    >>> per.compute_measures(
    ...     words,
    ...     prediction=["h", "a", "l", "l", "o", "b", "h", "a", "i"]
    ... )
    {'errors': 2, 'total': 8}
    ```

    Args:
        words (List[str]):
            List of ground truth words.
        prediction (List[str]):
            List of predicted phonemes.

    Returns:
        Dict[str, int]:
            A dictionary with number of errors and total number of true phonemes.
    """
    stack = self._build_pronunciation_stack(words)
    reference = [
        phoneme for word in words for phoneme in max(self.lexicon[word], key=len)
    ]

    editops = Levenshtein.editops(reference, prediction)
    # get initial number of errors
    errors = len(editops)

    for tag, i, j in editops:
        # if there are >1 valid phonemes at position in stack
        if i < len(stack) and len(stack[i]) > 1:
            # check if pair of phoneme is in list of valid phoneme pairs
            # or is substituted by epsilon, which we will thus ignore
            permutes = permutations(stack[i], 2)
            if tag == "replace" and (reference[i], prediction[j]) in permutes:
                errors -= 1
            # or is an epsilon and hence skippable
            elif tag == "delete" and reference[i] == self.epsilon_token:
                errors -= 1

    return {"errors": errors, "total": len(reference)}