Skip to content

I/O

speechline.utils.io

export_segment_audio_wav(output_wav_path, segment)

Export segment audio to WAV.

Equivalent to:

example_export_segment_audio_wav.sh
ffmpeg -i {segment} -acodec pcm_s16le -ac 1 -ar 16000 {output_wav_path}

Parameters:

Name Type Description Default
output_wav_path str

Path to WAV file.

required
segment AudioSegment

Audio segment to export.

required
Source code in speechline/utils/io.py
def export_segment_audio_wav(output_wav_path: str, segment: AudioSegment) -> None:
    """
    Export segment audio to WAV.

    Equivalent to:

    ```sh title="example_export_segment_audio_wav.sh"
    ffmpeg -i {segment} -acodec pcm_s16le -ac 1 -ar 16000 {output_wav_path}
    ```

    Args:
        output_wav_path (str):
            Path to WAV file.
        segment (AudioSegment):
            Audio segment to export.
    """
    parameters = ["-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000"]
    segment.export(output_wav_path, format="wav", parameters=parameters)

export_segment_transcripts_tsv(output_tsv_path, segment)

Export segment transcripts to TSV of structure:

example_output_segment_transcripts.tsv
start_time_in_secs  end_time_in_secs        label
start_time_in_secs  end_time_in_secs        label
...

Parameters:

Name Type Description Default
output_tsv_path str

Path to TSV file.

required
segment List[Dict[str, Union[str, float]]]

List of offsets in segment.

required
Source code in speechline/utils/io.py
def export_segment_transcripts_tsv(output_tsv_path: str, segment: List[Dict[str, Union[str, float]]]) -> None:
    """
    Export segment transcripts to TSV of structure:

    ```tsv title="example_output_segment_transcripts.tsv"
    start_time_in_secs\tend_time_in_secs\tlabel
    start_time_in_secs\tend_time_in_secs\tlabel
    ...
    ```

    Args:
        output_tsv_path (str):
            Path to TSV file.
        segment (List[Dict[str, Union[str, float]]]):
            List of offsets in segment.
    """
    with open(output_tsv_path, "w") as f:
        for s in segment:
            f.write(f'{s["start_time"]}\t{s["end_time"]}\t{s["text"]}\n')

export_transcripts_json(output_json_path, offsets)

Exports transcript with offsets as JSON.

example_output_transcripts.json
[
  {
    "text": {text},
    "start_time": {start_time},
    "end_time": {end_time}
  },
  {
    "text": {text},
    "start_time": {start_time},
    "end_time": {end_time}
  },
  ...
]

Parameters:

Name Type Description Default
output_json_path str

Path to output JSON file.

required
offsets List[Dict[str, Union[str, float]]]

List of offsets.

required
Source code in speechline/utils/io.py
def export_transcripts_json(
    output_json_path: str,
    offsets: List[Dict[str, Union[str, float]]],
) -> None:
    """
    Exports transcript with offsets as JSON.

    ```json title="example_output_transcripts.json"
    [
      {
        "text": {text},
        "start_time": {start_time},
        "end_time": {end_time}
      },
      {
        "text": {text},
        "start_time": {start_time},
        "end_time": {end_time}
      },
      ...
    ]
    ```

    Args:
        output_json_path (str):
            Path to output JSON file.
        offsets (List[Dict[str, Union[str, float]]]):
            List of offsets.
    """
    _ = Path(output_json_path).parent.mkdir(parents=True, exist_ok=True)
    with open(output_json_path, "w") as f:
        json.dump(offsets, f, indent=2)

get_chunk_path(path, outdir, idx, extension)

Generate path to chunk at output directory.

Assumes path as {inputdir}/{lang}/{utt_id}.{old_extension}, and will return {outdir}/{lang}/{utt_id}-{idx}.{extension}

Parameters:

Name Type Description Default
path str

Path to file.

required
outdir str

Output directory where file will be saved.

required
idx int

Index of chunk.

required
extension str

New file extension.

required

Returns:

Type Description
str

Path to chunk at output directory.

Source code in speechline/utils/io.py
def get_chunk_path(path: str, outdir: str, idx: int, extension: str) -> str:
    """
    Generate path to chunk at output directory.

    Assumes `path` as `{inputdir}/{lang}/{utt_id}.{old_extension}`,
    and will return `{outdir}/{lang}/{utt_id}-{idx}.{extension}`

    Args:
        path (str):
            Path to file.
        outdir (str):
            Output directory where file will be saved.
        idx (int):
            Index of chunk.
        extension (str):
            New file extension.

    Returns:
        str:
            Path to chunk at output directory.
    """
    outdir_path = get_outdir_path(path, outdir)
    filename = os.path.splitext(os.path.basename(path))[0]
    output_path = f"{os.path.join(outdir_path, filename)}-{str(idx)}.{extension}"
    return output_path

get_outdir_path(path, outdir)

Generate path at output directory.

Assumes path as {inputdir}/{lang}/*.wav, and will return {outdir}/{lang}/*.wav

Parameters:

Name Type Description Default
path str

Path to file.

required
outdir str

Output directory where file will be saved.

required

Returns:

Type Description
str

Path to output directory.

Source code in speechline/utils/io.py
def get_outdir_path(path: str, outdir: str) -> str:
    """
    Generate path at output directory.

    Assumes `path` as `{inputdir}/{lang}/*.wav`,
    and will return `{outdir}/{lang}/*.wav`

    Args:
        path (str):
            Path to file.
        outdir (str):
            Output directory where file will be saved.

    Returns:
        str:
            Path to output directory.
    """
    pathname, _ = os.path.splitext(path)  # remove extension
    components = os.path.normpath(pathname).split(os.sep)  # split into components
    # keep last 2 components: {outdir}/{lang}/
    output_path = f"{os.path.join(outdir, *components[-2:-1])}"
    return output_path

pydub_to_np(audio)

Converts pydub AudioSegment into np.float32 of shape [duration_in_seconds * sample_rate, channels], where each value is in range [-1.0, 1.0]. Source: StackOverflow. # noqa: E501

Parameters:

Name Type Description Default
audio AudioSegment

AudioSegment to convert.

required

Returns:

Type Description
np.ndarray

Resultant NumPy array of AudioSegment.

Source code in speechline/utils/io.py
def pydub_to_np(audio: AudioSegment) -> np.ndarray:
    """
    Converts pydub AudioSegment into `np.float32` of shape
    `[duration_in_seconds * sample_rate, channels]`,
    where each value is in range `[-1.0, 1.0]`.
    Source: [StackOverflow](https://stackoverflow.com/questions/38015319/how-to-create-a-numpy-array-from-a-pydub-audiosegment/66922265#66922265). # noqa: E501

    Args:
        audio (AudioSegment):
            AudioSegment to convert.

    Returns:
        np.ndarray:
            Resultant NumPy array of AudioSegment.
    """
    return np.array(audio.get_array_of_samples(), dtype=np.float32).reshape((-1, audio.channels)) / (
        1 << (8 * audio.sample_width - 1)
    )