NLP
PROGRAM 1
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
text = " If you drive to the sun at 55 mph, it would take you about 193 years"
print(sent_tokenize(text))
print(word_tokenize(text))
OUTPUT :
PROGRAM 2
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
example_sent = """This is a sample sentence,
showing off the stop words filtration."""
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(example_sent)
filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
filtered_sentence = []
for w in word_tokens:
if w not in stop_words:
filtered_sentence.append(w)
print(word_tokens)
print(filtered_sentence)
OUTPUT :
PROGRAM 3
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
print("rocks :", lemmatizer.lemmatize("rocks"))
print("corpora :", lemmatizer.lemmatize("corpora"))
print("better :", lemmatizer.lemmatize("better", pos="a"))
OUTPUT :
PROGRAM 4
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
ps = PorterStemmer()
words = ["likes","liked","likely","liking"]
for w in words:
print(w,":",ps.stem(w))
OUTPUT :
PROGRAM 5
from nltk.corpus import wordnet
list1 = ['Compare', 'require']
list2 = ['choose', 'copy']
list = []
for word1 in list1:
for word2 in list2:
wordFromList1 = wordnet.synsets(word1)[0]
wordFromList2 = wordnet.synsets(word2)[0]
s = wordFromList1.wup_similarity(wordFromList2)
list.append(s)
print(max(list))
OUTPUT :
PROGRAM 6
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
def extract_named_entities(text):
words = word_tokenize(text)
pos_tags = pos_tag(words)
named_entities = ne_chunk(pos_tags)
return named_entities
text ="Albert Einsten was born in Germany. He was awareded the nobel prize in physics in 1921"
named_entities = extract_named_entities(text)
named_entities.draw()
OUTPUT :
PROGRAM 7
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
def analyse_sentiment(text):
sid = SentimentIntensityAnalyzer()
sentiment_score = sid.polarity_scores(text)
return sentiment_score
text = "I love this product! it is a"
sentiment_scores = analyse_sentiment(text)
print(f"Sentiment scores : {sentiment_scores}")
OUTPUT :
PROGRAM 8
from gtts import gTTS
import os
def text_to_speech(text, lang='en'):
tts = gTTS(text=text, lang=lang)
tts.save("output.mp3")
os.system("start output.mp3")
if __name__ == "__main__":
text = "Hello, this is a text-to-speech conversion example."
text_to_speech(text)
OUTPUT :
PROGRAM 9
import speech_recognition as sr
recognizer = sr.Recognizer()
def audio_file_to_text (file_path):
with sr.AudioFile(file_path) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
print('transcription:{}'.format(text))
except sr.UnknownValueError:
print("Sorry, I couldn't understand")
except sr.RequestError:
print("couldn't request results from google speech recognition service")
audio_file_path ="Y:/ttsMP3.com_VoiceText_2025-1-29_11-40-18.wav"
audio_file_to_text(audio_file_path)
OUTPUT :
PROGRAM 10
import nltk
from nltk.chat.util import Chat, reflections
pairs = [
r"my name is (.*)",
["Hello %1, how can I help you today?",]
],
r"hi|hello|hey",
["Hello!", "Hi there!", "Hey!"]
],
[
r"what is your name?",
["I am a chatbot created using NLTK.",]
],
r"how are you?",
["I'm doing well, thank you!", "I'm great, how about you?"]
],
r"sorry (.*)",
["It's okay, no worries.", "No problem at all."]
],
r"quit",
["Goodbye! Have a great day!"]
],
chatbot = Chat(pairs, reflections)
print("Hi! I'm a chatbot. Type 'quit' to exit.")
chatbot.converse()
OUTPUT :
PROGRAM 12
import re
emails = [
"1.Congratulation you've won a free iPhone click here to claim now",
"2.Dear customer, your bank account needs verification",
"3.Meeting at 5pm, please be on time",
"4.Win $10,000 easily! just sign up now"
spam_keywords = ["click here", "win", "verification"]
def filter_spam(emails, keywords):
return [email for email in emails if any(re.search(keyword, email, re.IGNORECASE) for keyword in
keywords)]
spam_emails = filter_spam(emails, spam_keywords)
print("Spam Emails:")
for email in spam_emails:
print(email)
OUTPUT :
PROGRAM 13
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
import re
# Sample text
text = "This is a Sample Sentence.!!"
print(text)
# Lowercase
text = text.lower()
# Remove punctuation
removepun_text = re.sub(r'[^\w\s]', '', text)
# Tokenize
tokenize_text = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
words = [word for word in tokenize_text if word not in stop_words]
# Stemming
ps = PorterStemmer()
word = ["Likes","Liking","Liked"]
stemmed_words = [ps.stem(word) for word in word]
# Lemmatization
lemmatizer = WordNetLemmatizer()
word1 = ["rocks","corpora"]
lemmatized_words = [lemmatizer.lemmatize(word) for word in word1]
print("Lower Text:",text)
print("Tokenization:",tokenize_text)
print("Stop Word Removal:",words)
print("Remove Punctuation:",removepun_text)
print("Stemmed Words:",stemmed_words)
print("Lemmatized Words:",lemmatized_words)
OUTPUT :
PROGRAM 14
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
def pos_tagging(sentence):
words = word_tokenize(sentence)
tagged_words = pos_tag(words)
return tagged_words
sentence = "The quick brown fox jumps over the lazy dog."
tagged_sentence = pos_tagging(sentence)
print("Tagged Sentence:", tagged_sentence)
OUTPUT :
PROGRAM 15
import nltk
import re
from nltk import word_tokenize, pos_tag, ne_chunk
from nltk.tree import Tree
def extract_entities(sent):
chunked = ne_chunk(pos_tag(word_tokenize(sent)))
entities = []
current_entity = []
for subtree in chunked:
if type(subtree) == Tree:
entity_name = " ".join([token for token, pos in subtree.leaves()])
entity_type = subtree.label()
entities.append((entity_name, entity_type))
return entities
def extract_relations(text):
sentences = nltk.sent_tokenize(text)
relations = []
for sent in sentences:
entities = extract_entities(sent)
if len(entities) >= 2:
for i, ent1 in enumerate(entities):
for ent2 in entities[i + 1:]:
relation = (ent1[0], ent1[1], ent2[0], ent2[1])
relations.append(relation)
return relations
# Sample text
text = "Barack Obama was born in Hawaii. He was elected president of the United States in 2008."
relations = extract_relations(text)
for relation in relations:
print(f"Entity 1: {relation[0]} (Type: {relation[1]}) - Entity 2: {relation[2]} (Type: {relation[3]})")
OUTPUT :
PROGRAM 16
import nltk
import gensim
from gensim import corpora
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download necessary NLTK data files
nltk.download('stopwords')
nltk.download('punkt')
# Sample text data
documents = [
"Artificial intelligence and machine learning are revolutionizing various industries.",
"Natural language processing techniques are used to analyze and generate human language.",
"Deep learning models have significantly improved the accuracy of speech recognition systems.",
"Computer vision enables machines to interpret and understand visual information.",
"Neural networks are a key component of many modern AI systems."
# Preprocess the text data
stop_words = set(stopwords.words('english'))
def preprocess(doc):
tokens = word_tokenize(doc.lower())
tokens = [word for word in tokens if word.isalnum()]
tokens = [word for word in tokens if word not in stop_words]
return tokens
processed_docs = [preprocess(doc) for doc in documents]
# Create a dictionary and corpus
dictionary = corpora.Dictionary(processed_docs)
corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
# Train the LDA model
lda_model = gensim.models.ldamodel.LdaModel(corpus, num_topics=2, id2word=dictionary,
passes=10)
# Print the topics
for idx, topic in lda_model.print_topics(-1):
print(f"Topic {idx}: {topic}")
OUTPUT :