0% found this document useful (0 votes)

32 views12 pages

ATI Ipynb

The document outlines a Python script that utilizes various libraries such as TensorFlow, Whisper, and Librosa to analyze audio files for environmental sounds and transcribe speech. It includes functionalities for classifying sounds using the YAMNet model, transcribing audio with the Whisper model, and generating context-aware image prompts based on the audio content. The script is designed to run on both Google Colab and local environments, with GPU configuration for enhanced performance.

Uploaded by

aayushcomputerscience

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

32 views12 pages

ATI Ipynb

Uploaded by

aayushcomputerscience

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 12

wimport tensorflow as tf

import numpy as np
import pandas as pd
import librosa
import os
import sys
import torch
from PIL import Image
from datetime import datetime

# Install required packages

!pip install -q tensorflow-hub
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q librosa
!pip install -q diffusers transformers accelerate

# Import after installation

import tensorflow_hub as hub
import whisper
from diffusers import (
StableDiffusionXLPipeline,
DPMSolverMultistepScheduler,
StableDiffusionUpscalePipeline
)

# Mount Google Drive if in Colab

IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
from google.colab import drive
drive.mount('/content/drive')
#ENTER AUDIO PATH =======================================
AUDIO_FILE_PATH = "/content/drive/MyDrive/trump_gets_laughed_at.mp3"
#========================================================
OUTPUT_DIR = "/content/drive/MyDrive/AI_Generated_Images"
else:
AUDIO_FILE_PATH = "path/to/your/audio.wav" # Replace with local path
OUTPUT_DIR = "./AI_Generated_Images"

# Create output directory if it doesn't exist

os.makedirs(OUTPUT_DIR, exist_ok=True)

# Configure GPU properly

print("Configuring GPU...")
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
try:
for device in physical_devices:
tf.config.experimental.set_memory_growth(device, True)
print(f"GPU configured successfully: {len(physical_devices)} GPU(s) found")
device = "cuda"
torch_dtype = torch.float16
except Exception as e:
print(f"Error configuring GPU: {e}")
device = "cpu"
torch_dtype = torch.float32
else:
print("No GPU found. Running on CPU.")
device = "cpu"
torch_dtype = torch.float32
# Function to download YAMNet class map
def get_yamnet_class_map():
class_map_path = 'yamnet_class_map.csv'
if not os.path.exists(class_map_path):
print("Downloading YAMNet class map...")
import urllib.request
url =
'https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/
yamnet/yamnet_class_map.csv'
urllib.request.urlretrieve(url, class_map_path)
return class_map_path

# Load YAMNet model with error handling

print("Loading YAMNet model...")
try:
# Force CPU execution for YAMNet as it seems to have issues with the GPU
with tf.device('/cpu:0'):
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
print("YAMNet model loaded successfully on CPU")
except Exception as e:
print(f"Error loading YAMNet model: {e}")
yamnet_model = None

def load_yamnet_class_names():
class_names_path = get_yamnet_class_map()
df = pd.read_csv(class_names_path, header=0)
return df.iloc[:, 2].tolist()

def classify_environmental_sounds(audio_path, threshold=0.15):

try:
print(f"Loading audio file: {audio_path}")
waveform, sr = librosa.load(audio_path, sr=16000)
print(f"Audio loaded: {len(waveform)} samples, {sr}Hz")

# Ensure waveform is the right shape and type

waveform = waveform.astype(np.float32)

# Process in smaller chunks to avoid memory issues

chunk_size = 5 * sr # 5 seconds chunks
all_scores = []

# Process each chunk separately

for i in range(0, len(waveform), chunk_size):
chunk = waveform[i:i + chunk_size]
if len(chunk) < sr: # Skip chunks less than 1 second
continue

chunk_tensor = tf.convert_to_tensor(chunk, dtype=tf.float32)

print(f"Processing chunk {i//chunk_size + 1}/{(len(waveform) +
chunk_size - 1)//chunk_size}")

try:
# Clear previous session memory
tf.keras.backend.clear_session()

# Process the chunk with YAMNet on CPU

with tf.device('/cpu:0'):
scores, embeddings, spectrogram = yamnet_model(chunk_tensor)
all_scores.append(scores)

except Exception as chunk_err:

print(f"Error processing chunk: {chunk_err}")
# Continue with next chunk
continue

if not all_scores:
print("No valid audio chunks processed")
return []

# Combine scores from all chunks

combined_scores = tf.concat(all_scores, axis=0)
mean_scores = tf.reduce_mean(combined_scores, axis=0).numpy()

class_names = load_yamnet_class_names()
detected_classes = [
(class_names[i], float(mean_scores[i]))
for i in np.where(mean_scores > threshold)[0]
if i < len(class_names)
]

return sorted(detected_classes, key=lambda x: x[1], reverse=True)

except Exception as e:
print(f"Error in classify_environmental_sounds: {e}")
import traceback
traceback.print_exc()
return []

# Load Whisper model

print("Loading Whisper model...")
try:
# Continue using GPU for Whisper since it's working correctly
whisper_device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = whisper.load_model("base", device=whisper_device)
print(f"Whisper model loaded successfully on {whisper_device}")
except Exception as e:
print(f"Error loading Whisper model: {e}")
whisper_model = None

def transcribe_with_validation(audio_path, min_confidence=0.5, min_words=3):

try:
if whisper_model is None:
return {'text': '', 'confidence': 0, 'is_valid': False}

print(f"Transcribing audio: {audio_path}")

result = whisper_model.transcribe(audio_path)
transcription = result.get("text", "").strip()

segments = result.get("segments", [])

confidences = [seg.get('confidence', 0.7) for seg in segments]
avg_confidence = np.mean(confidences) if confidences else 0.8

is_valid = (
avg_confidence >= min_confidence and
len(transcription.split()) >= min_words and
any(c.isalpha() for c in transcription)
)
print(f"Transcription: '{transcription}'")
print(f"Confidence: {avg_confidence:.2f}, Valid: {is_valid}")
return {
'text': transcription,
'confidence': avg_confidence,
'is_valid': is_valid
}
except Exception as e:
print(f"Error in transcribe_with_validation: {e}")
import traceback
traceback.print_exc()
return {'text': '', 'confidence': 0, 'is_valid': False}

# ENHANCED: More dynamic and contextual prompt creation

def create_image_prompt(audio_path, env_threshold=0.15):
"""
Creates a context-aware image prompt based on audio analysis
that adapts to the specific audio content detected
"""
try:
# Process audio with both YAMNet and Whisper
env_results = []
env_labels = []
env_types = set()

if yamnet_model is not None:

print("Classifying environmental sounds...")
env_results = classify_environmental_sounds(audio_path,
threshold=env_threshold)
env_labels = [label for label, score in env_results[:10]] # Top 10
labels
print(f"Top detected sounds: {env_labels}")

# Categorize the environment for better context understanding

weather_sounds = {'Rain', 'Thunder', 'Wind', 'Thunderstorm', 'Storm'}
nature_sounds = {'Water', 'Stream', 'River', 'Ocean', 'Waves',
'Forest', 'Birds', 'Animals'}
indoor_sounds = {'Inside, small room', 'Speech', 'Conversation',
'Music', 'Keyboard', 'Computer', 'Typing'}
urban_sounds = {'Traffic', 'Vehicle', 'Car', 'Engine', 'Urban', 'City',
'Street'}
crowd_sounds = {'Crowd', 'Applause', 'Cheering', 'Speech',
'Conference', 'Meeting'}

# Identify environment types present

for label in env_labels:
if any(sound.lower() in label.lower() for sound in weather_sounds):
env_types.add('weather')
if any(sound.lower() in label.lower() for sound in nature_sounds):
env_types.add('nature')
if any(sound.lower() in label.lower() for sound in indoor_sounds):
env_types.add('indoor')
if any(sound.lower() in label.lower() for sound in urban_sounds):
env_types.add('urban')
if any(sound.lower() in label.lower() for sound in crowd_sounds):
env_types.add('crowd')

print("Transcribing speech...")
stt_result = transcribe_with_validation(audio_path)

# Build a context-aware prompt based on detected content

# This is the key enhancement - dynamic prompt construction

if stt_result['is_valid'] and len(stt_result['text']) > 10:

# Speech is prominent - build a speech-focused scene with environmental
context
speech_text = stt_result['text']
speech_topic = extract_topic(speech_text)
speech_emotion = analyze_speech_emotion(speech_text)

# Determine speech context (formal/informal, etc.)

speech_context = determine_speech_context(env_types, env_labels,
speech_text)

# Build prompt around the speech with appropriate context

prompt = f"A {speech_context} scene with a person speaking about
{speech_topic}"

# Add emotional context if detected

if speech_emotion:
prompt += f" with {speech_emotion} expression"

# Add environment context if available

if env_types:
environment = get_environment_description(env_types, env_labels)
prompt += f". {environment}"

elif env_types:
# No clear speech - focus on environmental sounds
primary_env = determine_primary_environment(env_types, env_labels)

# Build a rich environmental scene based on detected sounds

prompt = primary_env

# Add weather details if detected

weather_details = extract_weather_details(env_labels)
if weather_details:
prompt += f" {weather_details}"

# Add activity details if detected

activity_details = extract_activity_details(env_labels)
if activity_details:
prompt += f" {activity_details}"

else:
# Fallback for when no clear context is detected
prompt = "A realistic environmental scene with natural lighting and
atmosphere"

# Add quality enhancers appropriate for the type of scene

prompt = enhance_prompt_quality(prompt, env_types, 'speech' in
prompt.lower())

return prompt

except Exception as e:
print(f"Error in create_image_prompt: {e}")
import traceback
traceback.print_exc()
return "Realistic natural environment scene"

def extract_topic(text):
"""Extract the main topic from speech text"""
# Simplified topic extraction - in production you might use NLP
if len(text) < 20:
return text.strip()

# Basic topic extraction by taking the first sentence or phrase

first_sentence = text.split('.')[0].strip()
if len(first_sentence) > 50:
return first_sentence[:50] + "..."
return first_sentence

def analyze_speech_emotion(text):
"""Detect emotional tone in speech text"""
# Simple keyword-based emotion detection
positive_words = ['happy', 'excited', 'glad', 'wonderful', 'great', 'amazing',
'joy']
negative_words = ['sad', 'angry', 'upset', 'terrible', 'awful', 'worried',
'concerned']
neutral_words = ['explain', 'inform', 'tell', 'describe', 'discuss']

text_lower = text.lower()

# Count emotion words

positive_count = sum(1 for word in positive_words if word in text_lower)
negative_count = sum(1 for word in negative_words if word in text_lower)
neutral_count = sum(1 for word in neutral_words if word in text_lower)

# Determine dominant emotion

if positive_count > negative_count and positive_count > neutral_count:
return "positive"
elif negative_count > positive_count and negative_count > neutral_count:
return "serious or concerned"
elif neutral_count > 0:
return "informative"

# Default to neutral if no clear emotion

return ""

def determine_speech_context(env_types, env_labels, speech_text):

"""Determine the context of speech based on environment and content"""
# Check for formal settings
formal_indicators = ['conference', 'meeting', 'lecture', 'presentation',
'speech']
formal_env = any(indicator in ' '.join(env_labels).lower() for indicator in
formal_indicators)

# Check for formal language in speech

formal_speech = any(term in speech_text.lower() for term in
['ladies and gentlemen', 'thank you for', 'i am pleased
to',
'in conclusion', 'our company', 'organization'])

# Check for casual settings

casual_indicators = ['music', 'party', 'conversation', 'chat', 'laugh']
casual_env = any(indicator in ' '.join(env_labels).lower() for indicator in
casual_indicators)

# Determine context
if (formal_env or formal_speech) and not casual_env:
return "formal presentation"
elif casual_env:
return "casual conversation"
elif 'indoor' in env_types:
return "indoor discussion"
elif 'crowd' in env_types:
return "public address"
else:
return "realistic speaking"

def determine_primary_environment(env_types, env_labels):

"""Determine the primary environment type based on detected sounds"""
env_priorities = ['weather', 'nature', 'urban', 'crowd', 'indoor']

# Check environment types in priority order

for env in env_priorities:
if env in env_types:
if env == 'weather':
return "A dramatic weather scene with atmospheric conditions"
elif env == 'nature':
return "A beautiful natural landscape with organic elements"
elif env == 'urban':
return "A detailed urban cityscape with architectural elements"
elif env == 'crowd':
return "A vibrant scene with a gathering of people"
elif env == 'indoor':
return "A detailed interior space with ambient lighting"

# Default environment if no clear type is determined

return "A realistic environmental scene"

def get_environment_description(env_types, env_labels):

"""Generate a rich environment description based on detected types and
labels"""
descriptions = []

# Add specific descriptions based on environment types

if 'weather' in env_types:
weather_terms = [label for label in env_labels if label.lower() in
['rain', 'thunder', 'wind', 'storm', 'lightning']]
if weather_terms:
descriptions.append(f"with {' and '.join(weather_terms).lower()}
visible")

if 'nature' in env_types:
nature_desc = "in a natural setting"
water_terms = [label for label in env_labels if label.lower() in
['water', 'river', 'stream', 'ocean', 'waves']]
if water_terms:
nature_desc += f" with {water_terms[0].lower()}"
descriptions.append(nature_desc)

if 'indoor' in env_types:
indoor_desc = "in an indoor space"
if "Inside, small room" in env_labels:
indoor_desc = "in a small room with intimate lighting"
elif "Inside, large room" in env_labels:
indoor_desc = "in a large hall with spacious architecture"
descriptions.append(indoor_desc)

if 'urban' in env_types:
descriptions.append("in an urban environment with city elements")

if 'crowd' in env_types:
descriptions.append("with a crowd of attentive people")

# Combine descriptions
if descriptions:
return " ".join(descriptions)
else:
return "in a detailed environment"

def extract_weather_details(env_labels):
"""Extract weather details from environment labels"""
weather_details = []

# Look for specific weather conditions

if any('rain' in label.lower() for label in env_labels):
weather_details.append("rain falling")

if any('thunder' in label.lower() for label in env_labels):

weather_details.append("thunderclouds")

if any('wind' in label.lower() for label in env_labels):

weather_details.append("with visible wind effects")

if any('storm' in label.lower() for label in env_labels):

weather_details.append("during a storm")

# Combine details
if weather_details:
return "with " + ", ".join(weather_details)
return ""

def extract_activity_details(env_labels):
"""Extract activity details from environment labels"""
activities = []

# Look for human activities

if any(label.lower() in ['speech', 'speaking', 'talk'] for label in
env_labels):
activities.append("people engaged in conversation")

if any(label.lower() in ['keyboard', 'typing', 'computer'] for label in

env_labels):
activities.append("someone working on a computer")

if any(label.lower() in ['music', 'singing', 'instrument'] for label in

env_labels):
activities.append("with music being played")

# Combine activities
if activities:
return "showing " + ", ".join(activities)
return ""

def enhance_prompt_quality(prompt, env_types, has_person):

"""Add appropriate quality enhancers to the prompt based on content"""
# Base quality enhancers
quality_base = "highly detailed, sharp focus, professional photography"

# Add environment-specific quality enhancers

if 'weather' in env_types:
prompt += f", {quality_base}, volumetric lighting, atmospheric conditions"

elif 'nature' in env_types:

prompt += f", {quality_base}, natural lighting, organic textures,
atmospheric perspective"

elif 'urban' in env_types:

prompt += f", {quality_base}, urban textures, architectural details,
realistic lighting"

elif 'indoor' in env_types:

prompt += f", {quality_base}, interior lighting, ambient occlusion,
realistic textures"

else:
prompt += f", {quality_base}, realistic lighting"

# Add person-specific quality if needed

if has_person:
prompt += ", detailed facial features, realistic expression, natural
posture"

return prompt

# Image Generator class

class ImageGenerator:
def __init__(self):
self.base_model_loaded = False
self.upscaler_loaded = False
self.base_model = None
self.upscaler = None

def load_base_model(self):
"""Load the base Stable Diffusion XL model"""
if not self.base_model_loaded:
print("Loading Stable Diffusion XL model...")
try:
# Use SDXL for high-quality images
self.base_model = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch_dtype,
variant="fp16",
use_safetensors=True
)

# Optimize for speed and memory

self.base_model.scheduler =
DPMSolverMultistepScheduler.from_config(
self.base_model.scheduler.config,
algorithm_type="sde-dpmsolver++",
use_karras_sigmas=True
)

# Move to GPU if available

self.base_model = self.base_model.to(device)

# Enable memory optimization

self.base_model.enable_attention_slicing()
if torch.cuda.is_available():
self.base_model.enable_model_cpu_offload()

self.base_model_loaded = True
print("Base model loaded successfully")
except Exception as e:
print(f"Error loading base model: {e}")
raise
return self.base_model

def load_upscaler(self):
"""Load the upscaler model for higher resolution"""
if not self.upscaler_loaded:
print("Loading upscaler model...")
try:
self.upscaler = StableDiffusionUpscalePipeline.from_pretrained(
"stabilityai/stable-diffusion-x4-upscaler",
torch_dtype=torch_dtype
)
self.upscaler = self.upscaler.to(device)
if torch.cuda.is_available():
self.upscaler.enable_model_cpu_offload()
self.upscaler_loaded = True
print("Upscaler model loaded successfully")
except Exception as e:
print(f"Error loading upscaler: {e}")
# Continue without upscaler
pass
return self.upscaler

def generate_image(self, prompt, negative_prompt=None, guidance_scale=7.5,

steps=30, width=1024, height=1024, upscale=True,
enhance_faces=True, num_images=1):
"""Generate high-quality images with environmental context awareness"""

# Load the base model if not already loaded

if not self.base_model_loaded:
self.load_base_model()

if negative_prompt is None:
negative_prompt = "deformed, bad anatomy, disfigured, poorly drawn
face, mutation, mutated, extra limbs, ugly, poorly drawn hands, missing limbs,
blurry, watermark, blurry, grainy, signature, cut off, low-res"

# Generate images with the base model

print(f"Generating {num_images} image(s) with prompt: {prompt}")
images = self.base_model(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=steps,
width=width,
height=height,
num_images_per_prompt=num_images
).images

# Process each generated image

processed_images = []
for idx, img in enumerate(images):
print(f"Processing image {idx+1}/{len(images)}...")

# Upscale if requested and upscaler is available

if upscale and self.upscaler_loaded:
img = self._upscale_image(img)

processed_images.append(img)

# Save the image

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{OUTPUT_DIR}/image_{timestamp}_{idx}.png"
img.save(filename)
print(f"Saved image to {filename}")

return processed_images

def _upscale_image(self, image):

"""Upscale an image to higher resolution"""
try:
if not self.upscaler_loaded:
self.load_upscaler()

if self.upscaler_loaded:
# Resize to match upscaler's expected input
low_res_img = image.resize((512, 512))
upscaled = self.upscaler(
prompt="high quality, detailed, sharp focus",
image=low_res_img,
num_inference_steps=20
).images[0]
return upscaled
except Exception as e:
print(f"Error during upscaling: {e}")

# Return original if upscaling failed

return image

# Complete pipeline function

def audio_to_image(audio_file_path, num_images=1):
"""Complete pipeline from audio file to images"""
# Process audio to generate contextual prompt
prompt = create_image_prompt(audio_file_path)
print(f"\nGenerated prompt: {prompt}")

# Initialize image generator

generator = ImageGenerator()

# Determine appropriate generation parameters based on prompt content

has_face = "person" in prompt.lower() or "people" in prompt.lower() or
"speaking" in prompt.lower()
is_weather = "weather" in prompt.lower() or "rain" in prompt.lower() or "storm"
in prompt.lower()
is_nature = "nature" in prompt.lower() or "landscape" in prompt.lower() or
"forest" in prompt.lower()

# Adjust guidance scale based on content

guidance_scale = 8.0 if has_face else 7.5
if is_weather or is_nature:
guidance_scale = 7.0 # Lower for natural scenes

# Adjust steps based on complexity

steps = 35 if has_face else 30
if is_weather:
steps = 40 # More steps for complex weather

# Generate images
images = generator.generate_image(
prompt=prompt,
guidance_scale=guidance_scale,
steps=steps,
width=1024,
height=1024,
upscale=True,
enhance_faces=has_face,
num_images=num_images
)

return images, prompt

# Main execution
if __name__ == "__main__":
print(f"Using audio file: {AUDIO_FILE_PATH}")

if not os.path.exists(AUDIO_FILE_PATH):
print(f"ERROR: File not found at {AUDIO_FILE_PATH}")
else:
print("\nProcessing audio to generate images...")
images, prompt = audio_to_image(AUDIO_FILE_PATH, num_images=1)

print("\n=== AUDIO ANALYSIS AND IMAGE GENERATION COMPLETE ===")

print(f"Prompt: {prompt}")
print(f"Generated {len(images)} images in {OUTPUT_DIR}")
print("====================================================")

A Linguagem Da Paz Num Mundo de Conflitos
No ratings yet
A Linguagem Da Paz Num Mundo de Conflitos
181 pages
Software Testing and Quality Assurance Assignment: Exercise 1
No ratings yet
Software Testing and Quality Assurance Assignment: Exercise 1
3 pages
Guide To YAMNet - Sound Event Classifier
No ratings yet
Guide To YAMNet - Sound Event Classifier
10 pages
Ass
No ratings yet
Ass
5 pages
Speech
No ratings yet
Speech
13 pages
Emotion Dect
No ratings yet
Emotion Dect
4 pages
CNN Lab Manual
No ratings yet
CNN Lab Manual
29 pages
Codefp 1
No ratings yet
Codefp 1
15 pages
Tutorial Pytorch Best Commands
No ratings yet
Tutorial Pytorch Best Commands
8 pages
UrbanSound8K Dataset: Automatic Sound Recognition (ASR) Project With CNN and ANN Models
No ratings yet
UrbanSound8K Dataset: Automatic Sound Recognition (ASR) Project With CNN and ANN Models
31 pages
Finetuning
No ratings yet
Finetuning
10 pages
Technical Plan - High-Accuracy On-Device Snoring Detection
No ratings yet
Technical Plan - High-Accuracy On-Device Snoring Detection
8 pages
Voice Identification GLM4 Guide
No ratings yet
Voice Identification GLM4 Guide
2 pages
MusicGen - Ipynb - Colab
No ratings yet
MusicGen - Ipynb - Colab
12 pages
Generating Industrial Training Report-19-32
No ratings yet
Generating Industrial Training Report-19-32
14 pages
Ass 8
No ratings yet
Ass 8
2 pages
Speech Recognition Techniques - GUVI
No ratings yet
Speech Recognition Techniques - GUVI
4 pages
Report PS12 Underwater Domain Awareness
No ratings yet
Report PS12 Underwater Domain Awareness
22 pages
Audio Recognition with CNN
No ratings yet
Audio Recognition with CNN
14 pages
Pad Assignment 2
No ratings yet
Pad Assignment 2
12 pages
Audio To Text Cookbook
No ratings yet
Audio To Text Cookbook
3 pages
2.4 Zero-Shot Audio Classification
No ratings yet
2.4 Zero-Shot Audio Classification
3 pages
Deep-Learning Capstone Project
No ratings yet
Deep-Learning Capstone Project
10 pages
Test Work
No ratings yet
Test Work
18 pages
Agent
No ratings yet
Agent
20 pages
DL Report
No ratings yet
DL Report
16 pages
Distinguishing Between Two Human Voices Using AI
No ratings yet
Distinguishing Between Two Human Voices Using AI
11 pages
AI Models Used
No ratings yet
AI Models Used
2 pages
Speech Recognition ML Only Procedure
No ratings yet
Speech Recognition ML Only Procedure
2 pages
Sincnet
No ratings yet
Sincnet
2 pages
GeneativeAI Interview
No ratings yet
GeneativeAI Interview
36 pages
From Import From Import Import As Import: # Load Model and Tokenizer
No ratings yet
From Import From Import Import As Import: # Load Model and Tokenizer
4 pages
Lab Programs
No ratings yet
Lab Programs
4 pages
4032 Whispering LLaMA A Cross
No ratings yet
4032 Whispering LLaMA A Cross
10 pages
Methodology, 1
No ratings yet
Methodology, 1
1 page
Audio Deep Learning Made Simple (Part 2) - Why Mel Spectrograms Perform Better - Towards Data Science
No ratings yet
Audio Deep Learning Made Simple (Part 2) - Why Mel Spectrograms Perform Better - Towards Data Science
16 pages
8.text To Speech Generation With LLM With Hugging Face - Ipynb
No ratings yet
8.text To Speech Generation With LLM With Hugging Face - Ipynb
100 pages
Train Py
No ratings yet
Train Py
4 pages
RVCV2 Tools Mocci - Ipynb
No ratings yet
RVCV2 Tools Mocci - Ipynb
17 pages
# Simple HGM-XA Multimodal Emotion
No ratings yet
# Simple HGM-XA Multimodal Emotion
3 pages
Pyan Net
No ratings yet
Pyan Net
4 pages
Audio GAN
No ratings yet
Audio GAN
2 pages
Audio Classification with ANN
No ratings yet
Audio Classification with ANN
1 page
ML Take Home Assignment
No ratings yet
ML Take Home Assignment
2 pages
Project Report - 092046
No ratings yet
Project Report - 092046
5 pages
2.5 Automatic Speech Recognition
No ratings yet
2.5 Automatic Speech Recognition
8 pages
Evaluation of State of Art Open-Source ASR Engines With Local Inferencing
No ratings yet
Evaluation of State of Art Open-Source ASR Engines With Local Inferencing
81 pages
Code For Ser
No ratings yet
Code For Ser
3 pages
Reproducibility at ICLR 2019
No ratings yet
Reproducibility at ICLR 2019
82 pages
Tutorials Sources Beginner Ptcheat
No ratings yet
Tutorials Sources Beginner Ptcheat
7 pages
Tsip04 Chimi Rinzin
No ratings yet
Tsip04 Chimi Rinzin
17 pages
IE417 Lab1
No ratings yet
IE417 Lab1
10 pages
Update On Speech Recognition System Using LibriSpeech
No ratings yet
Update On Speech Recognition System Using LibriSpeech
3 pages
Val
No ratings yet
Val
9 pages
L6 Hardware and Software For DL en
No ratings yet
L6 Hardware and Software For DL en
66 pages
Audio Transcript Config
No ratings yet
Audio Transcript Config
4 pages
Transfer Learning For Image Classification in Pytorch
No ratings yet
Transfer Learning For Image Classification in Pytorch
13 pages
How To Generate Songs Using Neural Network
No ratings yet
How To Generate Songs Using Neural Network
6 pages
Breaking Down The Mix - Using Python and Neural Networks To Separate Audio Tracks - by John MicMico - Artificial Intelligence in Plain English
No ratings yet
Breaking Down The Mix - Using Python and Neural Networks To Separate Audio Tracks - by John MicMico - Artificial Intelligence in Plain English
9 pages
Audio Noise Detection
No ratings yet
Audio Noise Detection
29 pages
【Zybio】 Guía de funcionamiento del sistema de gestión de datos del analizador de hematología V1.0 - 20191105
No ratings yet
【Zybio】 Guía de funcionamiento del sistema de gestión de datos del analizador de hematología V1.0 - 20191105
15 pages
5.1 Using Network Configuration Tools: Unit V:Networking and TCP/IP
No ratings yet
5.1 Using Network Configuration Tools: Unit V:Networking and TCP/IP
20 pages
IT Admins' Guide: AD360 System Needs
No ratings yet
IT Admins' Guide: AD360 System Needs
5 pages
C1 Editable End-Of-Year Test
No ratings yet
C1 Editable End-Of-Year Test
9 pages
Things To Know About EPM 11.1.2.4
No ratings yet
Things To Know About EPM 11.1.2.4
58 pages
Hospital IT System Overview
No ratings yet
Hospital IT System Overview
31 pages
Join Logical File: Overview: Outline
No ratings yet
Join Logical File: Overview: Outline
10 pages
Biodata Etrio Widodo
No ratings yet
Biodata Etrio Widodo
3 pages
Salinan Dari Copy of Genshin Impact Materials Tracker (By Oble)
No ratings yet
Salinan Dari Copy of Genshin Impact Materials Tracker (By Oble)
242 pages
All DAA Codes
No ratings yet
All DAA Codes
27 pages
V30 User Manual V1.65
No ratings yet
V30 User Manual V1.65
35 pages
Presentation IT Infrastructure
No ratings yet
Presentation IT Infrastructure
18 pages
BHT1500B UsersManual E3 PDF
No ratings yet
BHT1500B UsersManual E3 PDF
238 pages
Government Scheme Awareness App
No ratings yet
Government Scheme Awareness App
4 pages
SCM - Modernizing Oracle Fusion Cloud SCM Applications With Redwood
No ratings yet
SCM - Modernizing Oracle Fusion Cloud SCM Applications With Redwood
41 pages
Flash DIR-300
No ratings yet
Flash DIR-300
3 pages
Manual de Usuario GoLabel - II - UM
No ratings yet
Manual de Usuario GoLabel - II - UM
171 pages
Questions Interview
No ratings yet
Questions Interview
7 pages
NOXON Iradio Manual GB
No ratings yet
NOXON Iradio Manual GB
60 pages
s7-1500 Techn Data Cpu en PDF
No ratings yet
s7-1500 Techn Data Cpu en PDF
11 pages
Neural Network and Fuzzy Logic
No ratings yet
Neural Network and Fuzzy Logic
4 pages
Iqdialogue Asm 4.0 User's Guide - Sug-Diaasm-007
No ratings yet
Iqdialogue Asm 4.0 User's Guide - Sug-Diaasm-007
166 pages
AllTorque Gen II Manual
100% (1)
AllTorque Gen II Manual
43 pages
Oracle Cost Management Report
No ratings yet
Oracle Cost Management Report
36 pages
Assignment PDC 1107
No ratings yet
Assignment PDC 1107
3 pages
Agile Methodology 2016
No ratings yet
Agile Methodology 2016
17 pages
Slides Interim 2017 CFRG 01 Sessa Secp256k1 00
No ratings yet
Slides Interim 2017 CFRG 01 Sessa Secp256k1 00
7 pages