-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathasr.py
More file actions
105 lines (91 loc) · 3.18 KB
/
Copy pathasr.py
File metadata and controls
105 lines (91 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
"""ASR.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1U1xujD9Aq5QUGUULiBzCdVib9PAFUEqg
"""
# !pip install speechbrain
# !pip install transformers
# !pip install pydub
# !pip install librosa
import time
from time import perf_counter
import numpy as np
import matplotlib.pyplot as plt
import librosa
from pydub import AudioSegment
import os
from google.colab import files
import moviepy.editor
from transformers import pipeline
from speechbrain.pretrained import EncoderDecoderASR
asr_model2 = EncoderDecoderASR.from_hparams(source="speechbrain/asr-crdnn-rnnlm-librispeech", savedir="pretrained_models/asr-crdnn-rnnlm-librispeech")
def transcribe_audio(fileList = []):
listOfText = []
if fileList == [] :
uploaded = files.upload()
listOfAudios = list(uploaded.keys())
else:
listOfAudios = fileList
a = perf_counter()
for file in listOfAudios:
duration = librosa.get_duration(filename=file)
t1 = 0
t2 = duration * 1000 if duration < 30 else 30000
textTemp = ""
j = 1
i = 1
final_fragment = False
while t2 <= duration*1000 and not final_fragment:
final_fragment = True if t2 == duration * 1000 else False
newAudio = AudioSegment.from_wav(file)
newAudio = newAudio[t1:t2]
name = 'Audio_' + str(j) + '_segment_' + str(i) + '.wav'
newAudio.export(name, format="wav")
t1 += 30000
t2 = duration * 1000 if t2 + 30000 > duration * 1000 else t2 + 30000
i+= 1
print("\nAnalizando: " + name)
resAux = asr_model2.transcribe_file(os.getcwd() + "/" + name)
textTemp = textTemp + " " + resAux
listOfText.append(textTemp[1:])
j += 1
print("\nTerminado en: " + str(f'{perf_counter() - a:.2f}') + " seg.\n")
return listOfText
def transcribe_video(fileList = []):
listOfText = []
if fileList == [] :
uploaded = files.upload()
listOfAudios = list(uploaded.keys())
else:
listOfAudios = fileList
a = perf_counter()
for file in listOfAudios:
duration = librosa.get_duration(filename=file)
j = 1
i = 1
video = moviepy.editor.VideoFileClip(os.getcwd() + "/" + file)
audio = video.audio
nameVideo = "AudioExtraction_Video_" + str(j) + (".wav")
print("\nExporting audio from video: " + file + "\nTo: " + nameVideo)
audio.write_audiofile(os.getcwd() + "/" + nameVideo)
t1 = 0
t2 = duration * 1000 if duration < 30 else 30000
textTemp = ""
final_fragment = False
while t2 <= duration*1000 and not final_fragment:
final_fragment = True if t2 == duration * 1000 else False
newAudio = AudioSegment.from_wav(nameVideo)
newAudio = newAudio[t1:t2]
name = 'Video_' + str(j) + '_segment_' + str(i) + '.wav'
newAudio.export(name, format="wav")
t1 += 30000
t2 = duration * 1000 if t2 + 30000 > duration * 1000 else t2 + 30000
i+= 1
print("\nAnalizando: " + name)
resAux = asr_model2.transcribe_file(os.getcwd() + "/" + name)
textTemp = textTemp + " " + resAux
listOfText.append(textTemp[1:])
j += 1
print("\nTerminado en: " + str(f'{perf_counter() - a:.2f}') + " seg.\n")
return listOfText