pip install datasets==2.10.1 pip install soundfile==0.12.1 pip install librosa==0.10.0.post2
from datasets import load_dataset
dataset = load_dataset("Gae8J/gaepago_s")
Outputs
DatasetDict({
train: Dataset({
features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'],
num_rows: 12
})
validation: Dataset({
features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'],
num_rows: 12
})
test: Dataset({
features: ['file', 'audio', 'label', 'is_unknown', 'youtube_id'],
num_rows: 12
})
})
dataset['train'][0]
Outputs
{'file': 'bark/1_Q80fDGLRM.wav', 'audio': {'path': 'bark/1_Q80fDGLRM.wav', 'array': array([-9.15838356e-08, 6.80501699e-08, 1.97052145e-07, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00]), 'sampling_rate': 16000}, 'label': 0, 'is_unknown': False, 'youtube_id': '1_Q80fDGLRM'}