Skip to content

Audio Multi Label Classification

speechline.pipelines.audio_multilabel_classification.AudioMultiLabelClassificationPipeline (AudioClassificationPipeline)

Subclass of AudioClassificationPipeline. Performs multi-label audio classification instead of multi-class classification. Applies Sigmoid on logits instead of Softmax.

Source code in speechline/pipelines/audio_multilabel_classification.py
class AudioMultiLabelClassificationPipeline(AudioClassificationPipeline):
    """
    Subclass of `AudioClassificationPipeline`.
    Performs multi-label audio classification instead of multi-class classification.
    Applies Sigmoid on logits instead of Softmax.
    """

    def _sanitize_parameters(self, **kwargs) -> Tuple[Dict, Dict, Dict]:
        """
        Forces post-processor to return all probabilities.

        Returns:
            Tuple[Dict, Dict, Dict]:
                Tuple consisting of:

                    1. Preprocess parameters (empty).
                    2. Forward parameters (empty).
                    3. Postprocess parameters (`top_k = num_labels`).
        """
        postprocess_params = {"top_k": self.model.config.num_labels}
        return {}, {}, postprocess_params

    def postprocess(self, model_outputs: ModelOutput, **kwargs) -> np.ndarray:
        """
        Applies Sigmoid on logits.

        Args:
            model_outputs (ModelOutput):
                Generic HuggingFace model outputs.

        Returns:
            np.ndarray:
                List of probabilities.
        """
        probs = model_outputs.logits[0]
        sigmoid = torch.nn.Sigmoid()
        scores = sigmoid(probs).cpu().numpy()
        return scores

postprocess(self, model_outputs, **kwargs)

Applies Sigmoid on logits.

Parameters:

Name Type Description Default
model_outputs ModelOutput

Generic HuggingFace model outputs.

required

Returns:

Type Description
np.ndarray

List of probabilities.

Source code in speechline/pipelines/audio_multilabel_classification.py
def postprocess(self, model_outputs: ModelOutput, **kwargs) -> np.ndarray:
    """
    Applies Sigmoid on logits.

    Args:
        model_outputs (ModelOutput):
            Generic HuggingFace model outputs.

    Returns:
        np.ndarray:
            List of probabilities.
    """
    probs = model_outputs.logits[0]
    sigmoid = torch.nn.Sigmoid()
    scores = sigmoid(probs).cpu().numpy()
    return scores