0% found this document useful (0 votes)

13 views18 pages

NLP Lab

The document outlines various text processing techniques, including basic operations like tokenization, stemming, lemmatization, and stopword removal. It also covers advanced topics such as POS tagging, TF-IDF implementation, text classification using Naïve Bayes, and sentiment analysis. Additionally, it discusses the creation of n-gram language models and text summarization using transformer models.

Uploaded by

Dattaram Kolte

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

13 views18 pages

NLP Lab

Uploaded by

Dattaram Kolte

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 18

Basic Text Processing operation on text document.

for line in open("data.txt"):

for word in line.split():
if word.endswith("ing"):
print(word)
print(len(word))

import re
data="The biggest 5 Animals are 1.Elephant 2.Girrafe 3.Tiger 4.Lion 5.Cheetah"
result=re.sub("\d+",'',data)
print(result)

def punctuations(data):
text=data
text=text.replace("'nt"," not")
text=text.replace("'s"," is")
text=text.replace("'re"," are")
text=text.replace("'ll"," will")
return text
s="How's my team doin,you're supposed to be not losing"
returned_data=punctuations(s)
print(returned_data)
Tokenization, Stemming

import nltk
nltk.download()

import nltk
data="Welcome to TIMSCDR!!"
tokens=nltk.sent_tokenize(data)
print(tokens)

tokens=nltk.word_tokenize(data)
print(tokens)

from nltk.stem import PorterStemmer

port_stemmer=PorterStemmer()

print(port_stemmer.stem("Liked"))

data=["liked","liking","likes","killing","killed"]
for words in data:
print(words," :",port_stemmer.stem(words))

from nltk.stem import WordNetLemmatizer

lemmati=WordNetLemmatizer()

print("Socks :",lemmati.lemmatize("socks"))
print("corpora :",lemmati.lemmatize("corpora"))
print("better :",lemmati.lemmatize("better",pos="a"))
Removal of Stopwords

from nltk.corpus import stopwords

nltk.download('stopwords')
stopwords=set(stopwords.words('english'))
print(stopwords)

from nltk.tokenize import sent_tokenize,word_tokenize

from nltk.corpus import stopwords

stopWords=set(stopwords.words('english'))

data="All work and no play.All work and no play makes jack a dull boy"
tokens=nltk.word_tokenize(data)
filtered_Data=[]

for w in tokens:
if w not in stopWords:
filtered_Data.append(w)
print(filtered_Data)
Implementation of POS Tag

1.
import spacy
nlp=spacy.load("en_core_web_sm")

doc=nlp("Don't be afraid to give up the good to go for the great")

POS_count= doc.count_by(spacy.attrs.POS)

print(POS_count)

for k,v in sorted(POS_count.items()):

print(f'{k}. {doc.vocab[k].text} : {v}')

2.
import spacy
from spacy import displacy

nlp=spacy.load("en_core_web_sm")
doc=nlp("This is my School")
options={'color':'red','bg':'blue','compact':'True','distance':100}
displacy.render(doc,style="dep",options=options)

3.
import nltk
from nltk.tag import DefaultTagger

text="The way to get started is to quit talking"

tokens=nltk.word_tokenize(text)

tagging=DefaultTagger("Ad")
print(tagging.tag(tokens))
4.
import nltk
import nltk
sentence= "the little yellow dog barked at the car"

#Tokenization
tokens= nltk.word_tokenize(sentence)

#POs Tagging
tag=nltk.pos_tag(tokens)

phrase= "NP: {<DT>?<JJ>*<NN>}"

cp=nltk.RegexpParser(phrase)
result=cp.parse(tag)

print(result)
TF-IDF implementation

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
text = ["I love writing code in Python. I love Python code",
"I hate writing code in Java. I hate Java code"]

df = pd.DataFrame({'review':['review1','review2'],'text':text})
cv = CountVectorizer(stop_words='english')
cv_matrix = cv.fit_transform(df['text'])

df_dtm=pd.DataFrame(cv_matrix.toarray(),index=df['review'].values,
columns=cv.get_feature_names_out())
df_dtm

pip install sklearn

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
text = ["I love writing code in python.I love python code",
"I hate writing code in java.i hate java code"]

df = pd.DataFrame({'review':['review1','review2'],'text':text})

tfidf = TfidfVectorizer(stop_words='english',norm=None)
tfidf_matrix = tfidf.fit_transform(df['text'])

output = pd.DataFrame(tfidf_matrix.toarray(),index=df['review'],
columns=tfidf.get_feature_names_out())

output
Practical 4: Creating and comparing different text representations
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def compare_text_representations(texts):
# Bag of Words (BoW) representation
bow_vectorizer = CountVectorizer()
bow_matrix = bow_vectorizer.fit_transform(texts)

# TF-IDF representation
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)

# Compare similarities
bow_similarity = cosine_similarity(bow_matrix[0], bow_matrix[1])
tfidf_similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])

print("BoW vectors:")
print(bow_matrix.toarray())
print("\nTF-IDF vectors:")
print(tfidf_matrix.toarray())
print(f"\nBoW Cosine Similarity: {bow_similarity[0][0]:.4f}")
print(f"TF-IDF Cosine Similarity: {tfidf_similarity[0][0]:.4f}")

# Example usage
texts = [
"The cat sat on the mat",
"The dog sat on the log"
]
compare_text_representations(texts)
Training and using word embeddings
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import nltk

nltk.download('punkt')

def train_word_embeddings(sentences):
# Tokenize sentences
tokenized_sentences = [word_tokenize(sentence.lower()) for sentence in sentences]

# Train Word2Vec model

model = Word2Vec(sentences=tokenized_sentences, vector_size=100,
window=5, min_count=1, workers=4)

return model

def use_word_embeddings(model, word, top_n=5):

try:
similar_words = model.wv.most_similar(word, topn=top_n)
print(f"Words most similar to '{word}':")
for w, score in similar_words:
print(f"{w}: {score:.4f}")
except KeyError:
print(f"'{word}' not in vocabulary")

# Example usage
sentences = [
"The quick brown fox jumps over the lazy dog",
"A fox is a cunning animal",
"The dog barks at night",
"Foxes and dogs are different species"
]
model = train_word_embeddings(sentences)
use_word_embeddings(model, "fox")
Implement N gram Language model
1.
import nltk
from nltk.util import ngrams

text="The little boy ran away"

Ngram= ngrams(sequence=nltk.word_tokenize(text),n=3)

for gram in Ngram:

print(gram)
2.
import nltk
from nltk import bigrams, trigrams
from collections import defaultdict
import random

nltk.download('punkt')

def build_language_model(text, n=2):

words = nltk.word_tokenize(text.lower())

if n == 2:
pairs = list(bigrams(words))
elif n == 3:
pairs = list(trigrams(words))
else:
raise ValueError("n must be 2 or 3")

model = defaultdict(lambda: defaultdict(int))

for pair in pairs:

if n == 2:
model[pair[0]][pair[1]] += 1
else:
model[(pair[0], pair[1])][pair[2]] += 1

return model

def generate_text(model, num_words=20, start_word=None, n=2):

if start_word is None:
start_word = random.choice(list(model.keys()))

words = [start_word] if n == 2 else list(start_word)

for _ in range(num_words - n + 1):

if n == 2:
last_word = words[-1]
next_word = max(model[last_word], key=model[last_word].get)
else:
last_words = tuple(words[-2:])
next_word = max(model[last_words], key=model[last_words].get)

words.append(next_word)

return ' '.join(words)

# Example usage
text = """
The cat sat on the mat. The dog ran in the park.
Cats like to play with toys. Dogs enjoy chasing balls.
"""
bigram_model = build_language_model(text, n=2)
trigram_model = build_language_model(text, n=3)

print("Generated text (bigram model):")

print(generate_text(bigram_model, num_words=15, start_word="the", n=2))
print("\nGenerated text (trigram model):")
print(generate_text(trigram_model, num_words=15, start_word=("the", "cat"),
n=3))
Implementation text classification using Naïve Bayes
import pandas as pd
df = pd.read_csv('twitter_training.csv',header=None,usecols=[2,3])

df.head()

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split

#Rename columns
df.columns=['Sentiment','Text']

#Removes the missing values

df= df.dropna()

#encode label
le= LabelEncoder()
df['Sentiment']=le.fit_transform(df['Sentiment'])

df.head()

X=list(df['Text'])
y=list(df['Sentiment'])

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,

random_state=42)

from sklearn.feature_extraction.text import CountVectorizer

cv=CountVectorizer(analyzer='word',ngram_range=(1,1),stop_words='english')

X_train_cv = cv.fit_transform(X_train)
X_test_cv = cv.transform(X_test)

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import f1_score
import numpy as np

#train Naive Bayes Classifier

clf=MultinomialNB()
clf.fit(X_train_cv, y_train)

y_pred=clf.predict(X_test_cv)

score=f1_score(y_test,y_pred, average="micro")
print('F-1 score : {}'.format(np.round(score,4)))

for i in range(1,11):
cv=CountVectorizer(analyzer='word',ngram_range=(1,i),stop_words='english')

#creating BOG
X_train_cv=cv.fit_transform(X_train)
X_test_cv=cv.transform(X_test)

#training of classifiers
clf2=MultinomialNB()
clf2.fit(X_train_cv,y_train)

y_pred=clf2.predict(X_test_cv)

score=f1_score(y_test,y_pred,average="micro")
print('F1-Score: {}'.format(np.round(score,4)))
Implementing a text classifier

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

def train_text_classifier(X, y):

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,

random_state=42)

# Create a CountVectorizer

vectorizer = CountVectorizer()

X_train_vectorized = vectorizer.fit_transform(X_train)

X_test_vectorized = vectorizer.transform(X_test)

# Train a Naive Bayes classifier

classifier = MultinomialNB()

classifier.fit(X_train_vectorized, y_train)

# Make predictions

y_pred = classifier.predict(X_test_vectorized)

# Print classification report

print(classification_report(y_test, y_pred))

return vectorizer, classifier

def classify_text(text, vectorizer, classifier):

text_vectorized = vectorizer.transform([text])

prediction = classifier.predict(text_vectorized)

return prediction[0]

# Example usage

X=[

"I love this movie, it's amazing!",

"This book is terrible, I couldn't finish it.",

"The food at this restaurant is delicious.",

"The service here is awful, I'm never coming back.",

"What a great experience, highly recommended!",

y = ["positive", "negative", "positive", "negative", "positive"]

vectorizer, classifier = train_text_classifier(X, y)

new_text = "The product exceeded my expectations, I'm very satisfied."

prediction = classify_text(new_text, vectorizer, classifier)

print(f"Prediction for '{new_text}': {prediction}")

Building a sentiment analysis system

import nltk

from nltk.sentiment import SentimentIntensityAnalyzer

import pandas as pd

nltk.download('vader_lexicon')

def analyze_sentiment(text):

sia = SentimentIntensityAnalyzer()

sentiment_scores = sia.polarity_scores(text)

if sentiment_scores['compound'] >= 0.05:

sentiment = "Positive"

elifsentiment_scores['compound'] <= -0.05:

sentiment = "Negative"

else:

sentiment = "Neutral"

return sentiment, sentiment_scores

def analyze_sentiments(texts):
results = []

for text in texts:

sentiment, scores = analyze_sentiment(text)

results.append({

'text': text,

'sentiment': sentiment,

'pos_score': scores['pos'],

'neg_score': scores['neg'],

'neu_score': scores['neu'],

'compound_score': scores['compound']

})

return pd.DataFrame(results)

# Example usage

texts = [

"I absolutely love this product! It's amazing!",

"This is the worst experience I've ever had.",

"The movie was okay, nothing special.",

"I'm feeling pretty neutral about the whole situation.",

"The customer service was excellent and very helpful!"

results_df = analyze_sentiments(texts)
print(results_df)

Implementation of Text Summarization

from transformers import pipeline

def summarize_text(text, max_length=150, min_length=50):

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

summary = summarizer(text, max_length=max_length, min_length=min_length,

do_sample=False)

return summary[0]['summary_text']

# Example usage

long_text = """

Climate change is one of the most pressing issues facing our planet today. It refers
to long-term shifts in temperatures and weather patterns, mainly caused by human
activities, especially the burning of fossil fuels. These activities release greenhouse
gases into the atmosphere, trapping heat and causing the Earth's average
temperature to rise. The consequences of climate change are far-reaching and
include more frequent and severe weather events, rising sea levels, and disruptions
to ecosystems. To address this global challenge, countries and organizations
worldwide are working on strategies to reduce greenhouse gas emissions and
transition to cleaner energy sources. Individual actions, such as reducing energy
consumption and adopting sustainable practices, also play a crucial role in
mitigating the effects of climate change.

"""
summary = summarize_text(long_text)

print("Original text length:", len(long_text))

print("Summary length:", len(summary))

print("\nSummary:")

print(summary)

Software Manual MAS-100 NT & NT Ex en V14.0
No ratings yet
Software Manual MAS-100 NT & NT Ex en V14.0
56 pages
NLP Assignment 2
No ratings yet
NLP Assignment 2
3 pages
Glove
100% (1)
Glove
10 pages
Accomplishment Report of Project ICARE
100% (1)
Accomplishment Report of Project ICARE
10 pages
Self Evaluation Exercises
No ratings yet
Self Evaluation Exercises
12 pages
Python NLP Techniques Guide
No ratings yet
Python NLP Techniques Guide
18 pages
NLP Lab Codes Till Mod3
No ratings yet
NLP Lab Codes Till Mod3
7 pages
NLP Lab Programs
No ratings yet
NLP Lab Programs
18 pages
I041 NLP Assignment5
No ratings yet
I041 NLP Assignment5
12 pages
NLP Tushar
No ratings yet
NLP Tushar
21 pages
NLP Lab Manual for B.E. Students
No ratings yet
NLP Lab Manual for B.E. Students
21 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
21 pages
C24064 - NLP - Lab Manual
No ratings yet
C24064 - NLP - Lab Manual
28 pages
NLPPractical
No ratings yet
NLPPractical
12 pages
NLP Lab Manual - Final
No ratings yet
NLP Lab Manual - Final
15 pages
Python NLP Tasks with NLTK
No ratings yet
Python NLP Tasks with NLTK
17 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
123 NLP 456
No ratings yet
123 NLP 456
4 pages
TSA Lab Manual New
No ratings yet
TSA Lab Manual New
14 pages
Lab
No ratings yet
Lab
8 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
Ai&Ml Bai601 NLP Lab Manual
No ratings yet
Ai&Ml Bai601 NLP Lab Manual
48 pages
NLP Exp4
No ratings yet
NLP Exp4
10 pages
Natural Language Processing
No ratings yet
Natural Language Processing
22 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
15 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
AI Lab Manual Aktu
No ratings yet
AI Lab Manual Aktu
11 pages
NLP Session 4
No ratings yet
NLP Session 4
13 pages
NLP Assignment (917722H031)
No ratings yet
NLP Assignment (917722H031)
18 pages
NLP - Cheatsheet
No ratings yet
NLP - Cheatsheet
10 pages
Gen Ai Lab Programs
No ratings yet
Gen Ai Lab Programs
15 pages
R22 NLP Python Programs
No ratings yet
R22 NLP Python Programs
15 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
17 pages
Generative AI Lab Manual
No ratings yet
Generative AI Lab Manual
24 pages
Artificial Intelligence (18Csc305J) Lab: EXPERIMENT 13: Implementation of NLP Problem
No ratings yet
Artificial Intelligence (18Csc305J) Lab: EXPERIMENT 13: Implementation of NLP Problem
9 pages
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
No ratings yet
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
13 pages
NLP - (Natural Language Processing Lab Manual)
No ratings yet
NLP - (Natural Language Processing Lab Manual)
12 pages
DS 7
No ratings yet
DS 7
3 pages
NLP Record
No ratings yet
NLP Record
23 pages
10253.exp 5
No ratings yet
10253.exp 5
12 pages
NLP PRGRM-1
No ratings yet
NLP PRGRM-1
7 pages
Natural Language Processing
No ratings yet
Natural Language Processing
5 pages
Genaii
No ratings yet
Genaii
5 pages
Combine PDF
No ratings yet
Combine PDF
124 pages
Tugas NLP - 1152000052 1
No ratings yet
Tugas NLP - 1152000052 1
14 pages
NLP Tasks for MCA Students
No ratings yet
NLP Tasks for MCA Students
16 pages
Tokenization (Breaking Text Into Words) : Import From Import From Import From Import
No ratings yet
Tokenization (Breaking Text Into Words) : Import From Import From Import From Import
11 pages
CS-875-Lecture 4
No ratings yet
CS-875-Lecture 4
47 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
NLP Projects
No ratings yet
NLP Projects
4 pages
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Natural Language Processing
No ratings yet
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Natural Language Processing
5 pages
Word Embedding Learning Process
No ratings yet
Word Embedding Learning Process
6 pages
Python Text Classification Guide
No ratings yet
Python Text Classification Guide
34 pages
NLP Study Plan For Beginners - HW Samples
No ratings yet
NLP Study Plan For Beginners - HW Samples
47 pages
Soundarya 256 NLP Practs
No ratings yet
Soundarya 256 NLP Practs
14 pages
NLP Practical Journal 2023-24
No ratings yet
NLP Practical Journal 2023-24
22 pages
Assignment
No ratings yet
Assignment
6 pages
NLP - Assignment2 Proper RNN Working
No ratings yet
NLP - Assignment2 Proper RNN Working
3 pages
Win Your Case How To Present, Persuade, and Prevail - Every Place, Every Time
No ratings yet
Win Your Case How To Present, Persuade, and Prevail - Every Place, Every Time
3 pages
Gender Autonomy As A Feminist Premise of Identity and Its Impact Upon Female Protagonists in Fictional Narratives
No ratings yet
Gender Autonomy As A Feminist Premise of Identity and Its Impact Upon Female Protagonists in Fictional Narratives
7 pages
Chaper 5
No ratings yet
Chaper 5
41 pages
Infinitives - Rule - and - Check - Answer Key
No ratings yet
Infinitives - Rule - and - Check - Answer Key
4 pages
Abstract Logical Reasoning Reviewer
No ratings yet
Abstract Logical Reasoning Reviewer
5 pages
Immediate Future - Going To
No ratings yet
Immediate Future - Going To
7 pages
Lesson Plan - Where Were You at
No ratings yet
Lesson Plan - Where Were You at
6 pages
Lets Celebrate Diversity!: Actividad Stop Bullying (Día 2)
No ratings yet
Lets Celebrate Diversity!: Actividad Stop Bullying (Día 2)
5 pages
Ielts Academic Reading Test Tutorial
No ratings yet
Ielts Academic Reading Test Tutorial
3 pages
Teaching Grammar (II) : Unit 2
No ratings yet
Teaching Grammar (II) : Unit 2
25 pages
810 in Voice 5010
100% (1)
810 in Voice 5010
54 pages
Fractions for 3rd Graders
No ratings yet
Fractions for 3rd Graders
3 pages
3 The Writing Process
No ratings yet
3 The Writing Process
28 pages
Igcse Write Format 2025
No ratings yet
Igcse Write Format 2025
1 page
A Schedule Is Said To Be Conflict-Serializable When The Schedule Is Conflict-Equivalent To One or More Serial Schedules
No ratings yet
A Schedule Is Said To Be Conflict-Serializable When The Schedule Is Conflict-Equivalent To One or More Serial Schedules
9 pages
Shi'Ur Qomah
No ratings yet
Shi'Ur Qomah
2 pages
Another Side of Life
No ratings yet
Another Side of Life
960 pages
LRP English New
No ratings yet
LRP English New
60 pages
Theory of L-Functions: An Introduction To The
No ratings yet
Theory of L-Functions: An Introduction To The
205 pages
Greetings and Introductions Worksheet
100% (4)
Greetings and Introductions Worksheet
2 pages
Tos Mil 1ST QTR
No ratings yet
Tos Mil 1ST QTR
5 pages
NX NF TipsUndTricks
100% (1)
NX NF TipsUndTricks
12 pages
Ideophones, Mimetics and Expressives - (2019)
100% (2)
Ideophones, Mimetics and Expressives - (2019)
337 pages
SQL 2
No ratings yet
SQL 2
2 pages
DLL MATH-2 Week8 Q2 Final
No ratings yet
DLL MATH-2 Week8 Q2 Final
8 pages
Lim 2014 Manichaeans and Public Disputation in Late Antiquity
No ratings yet
Lim 2014 Manichaeans and Public Disputation in Late Antiquity
40 pages
Brazilian Culture and Civilization
No ratings yet
Brazilian Culture and Civilization
8 pages
Tech Note 1 AirSmart Addressing
No ratings yet
Tech Note 1 AirSmart Addressing
3 pages

NLP Lab

Uploaded by

NLP Lab

Uploaded by

Basic Text Processing operation on text document.

for line in open("data.txt"):

from nltk.stem import PorterStemmer

from nltk.stem import WordNetLemmatizer

from nltk.corpus import stopwords

from nltk.tokenize import sent_tokenize,word_tokenize

from nltk.corpus import stopwords

doc=nlp("Don't be afraid to give up the good to go for the great")

for k,v in sorted(POS_count.items()):

text="The way to get started is to quit talking"

phrase= "NP: {<DT>?<JJ>*<NN>}"

pip install sklearn

# Train Word2Vec model

def use_word_embeddings(model, word, top_n=5):

text="The little boy ran away"

for gram in Ngram:

def build_language_model(text, n=2):

model = defaultdict(lambda: defaultdict(int))

for pair in pairs:

def generate_text(model, num_words=20, start_word=None, n=2):

words = [start_word] if n == 2 else list(start_word)

for _ in range(num_words - n + 1):

return ' '.join(words)

print("Generated text (bigram model):")

from sklearn.preprocessing import LabelEncoder

#Removes the missing values

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

#train Naive Bayes Classifier

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report

def train_text_classifier(X, y):

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,

# Train a Naive Bayes classifier

# Print classification report

return vectorizer, classifier

"I love this movie, it's amazing!",

"This book is terrible, I couldn't finish it.",

"The food at this restaurant is delicious.",

"The service here is awful, I'm never coming back.",

"What a great experience, highly recommended!",

y = ["positive", "negative", "positive", "negative", "positive"]

vectorizer, classifier = train_text_classifier(X, y)

new_text = "The product exceeded my expectations, I'm very satisfied."

prediction = classify_text(new_text, vectorizer, classifier)

print(f"Prediction for '{new_text}': {prediction}")

from nltk.sentiment import SentimentIntensityAnalyzer

if sentiment_scores['compound'] >= 0.05:

elifsentiment_scores['compound'] <= -0.05:

return sentiment, sentiment_scores

for text in texts:

sentiment, scores = analyze_sentiment(text)

"I absolutely love this product! It's amazing!",

"This is the worst experience I've ever had.",

"The movie was okay, nothing special.",

"I'm feeling pretty neutral about the whole situation.",

"The customer service was excellent and very helpful!"

Implementation of Text Summarization

from transformers import pipeline

def summarize_text(text, max_length=150, min_length=50):

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

summary = summarizer(text, max_length=max_length, min_length=min_length,

print("Original text length:", len(long_text))

print("Summary length:", len(summary))

You might also like