0% found this document useful (0 votes)

19 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

19 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

ment-analysis-text-classification

March 24, 2024

[24]: import pandas as pd

import nltk
import re #regex

#Splitting the data into trainig and testing

from sklearn.model_selection import train_test_split

#model
from sklearn.naive_bayes import MultinomialNB

#evaluation metrics
from sklearn import metrics

#stemming
from nltk.stem import PorterStemmer

#stopwords
from nltk.corpus import stopwords

# pandas and numpy

import pandas as pd
import numpy as np

#import count vectorizer

from sklearn.feature_extraction.text import CountVectorizer

#tokernizers
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer

#classification results
from sklearn.metrics import confusion_matrix,␣
↪accuracy_score,classification_report

#visualizations
import seaborn as sns

1
import matplotlib.pyplot as plt

[25]: #Loading the Dataset

data = pd.read_csv('Feedback.csv')

[26]: data.head()

[26]: Text Sentiment

0 I love spending time with my family. Positive
1 This movie is absolutely terrible. Negative
2 The food at that restaurant was amazing. Positive
3 I had a horrible experience at the dentist. Negative
4 The weather today is perfect. Positive

[27]: #row and coloumn count

data.shape

[27]: (20, 2)

[28]: # count of the negative and positive sentiments

data['Sentiment'].value_counts()

[28]: Positive 10
Negative 10
Name: Sentiment, dtype: int64

[29]: # assign the count vectorizer to a variable

countvectorizer=CountVectorizer()

# get the document term matrix

DTM=pd.DataFrame(countvectorizer.fit_transform(data["Text"]).toarray(),
columns=countvectorizer.get_feature_names_out(),index=None)

DTM

[29]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0

2
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… too top traffic ve view was wasn waste weather with

0 … 0 0 0 0 0 0 0 0 0 1
1 … 0 0 0 0 0 0 0 0 0 0
2 … 0 0 0 0 0 1 0 0 0 0
3 … 0 0 0 0 0 0 0 0 0 0
4 … 0 0 0 0 0 0 0 0 1 0
5 … 0 0 0 0 0 1 0 0 0 0
6 … 0 0 0 0 0 0 0 0 0 0
7 … 0 0 0 0 0 0 0 0 0 1
8 … 0 0 0 0 0 1 0 0 0 0
9 … 0 0 1 0 0 0 0 0 0 0
10 … 0 0 0 1 0 0 0 0 0 0
11 … 0 0 0 0 0 0 0 0 0 1
12 … 0 1 0 0 1 1 0 0 0 0
13 … 0 0 0 0 0 1 0 0 0 0
14 … 0 0 0 0 0 0 0 0 0 0
15 … 1 0 1 0 0 0 1 0 0 0
16 … 0 0 0 0 0 0 0 0 0 1
17 … 0 0 0 0 0 1 0 1 0 0
18 … 0 0 0 0 0 0 0 0 0 0
19 … 0 0 0 0 0 0 0 0 0 0

[20 rows x 76 columns]

[32]: DTM['Sentiment']=data['Sentiment']

DTM

[32]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0

3
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… top traffic ve view was wasn waste weather with Sentiment

0 … 0 0 0 0 0 0 0 0 1 Positive
1 … 0 0 0 0 0 0 0 0 0 Negative
2 … 0 0 0 0 1 0 0 0 0 Positive
3 … 0 0 0 0 0 0 0 0 0 Negative
4 … 0 0 0 0 0 0 0 1 0 Positive
5 … 0 0 0 0 1 0 0 0 0 Negative
6 … 0 0 0 0 0 0 0 0 0 Positive
7 … 0 0 0 0 0 0 0 0 1 Negative
8 … 0 0 0 0 1 0 0 0 0 Positive
9 … 0 1 0 0 0 0 0 0 0 Negative
10 … 0 0 1 0 0 0 0 0 0 Positive
11 … 0 0 0 0 0 0 0 0 1 Negative
12 … 1 0 0 1 1 0 0 0 0 Positive
13 … 0 0 0 0 1 0 0 0 0 Negative
14 … 0 0 0 0 0 0 0 0 0 Positive
15 … 0 1 0 0 0 1 0 0 0 Positive
16 … 0 0 0 0 0 0 0 0 1 Negative
17 … 0 0 0 0 1 0 1 0 0 Negative
18 … 0 0 0 0 0 0 0 0 0 Positive
19 … 0 0 0 0 0 0 0 0 0 Negative

[20 rows x 77 columns]

[8]: #preprocessing text function

def preprocess_text(text):
# Convert text to lowercase
text = text.apply(lambda x: x.lower())

# Tokenize text
text = text.apply(lambda x: nltk.word_tokenize(x))

# Remove stop words

4
stop_words = set(stopwords.words('english'))
text = text.apply(lambda x: [word for word in x if word not in stop_words])

# Stem text
stemmer = nltk.PorterStemmer()
text = text.apply(lambda x: [stemmer.stem(word) for word in x])

# Combine words back into a single string

text = text.apply(lambda x: ' '.join(x))

# Remove non-alphanumeric characters using regex

text = text.apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))

# Vectorize text using CountVectorizer

countvectorizer = CountVectorizer()
X = countvectorizer.fit_transform(text)

# Return the vectorized text and the vocabulary

return X, countvectorizer.vocabulary_

[9]: #Summarizing the Encoded Texts into a sparse matrix

text_counts=preprocess_text(data["Text"])[0]

[10]: #sparse matrix converting it to an array.

preprocess_text(data["Text"])[0].toarray()

preprocess_text(data["Text"])[0].toarray().shape

[10]: (20, 58)

[11]: #Printing the identified Unique words along with their indices
preprocess_text(data["Text"])[1]

[11]: {'love': 27,

'spend': 44,
'time': 50,
'famili': 18,
'movi': 30,
'absolut': 0,
'terribl': 49,
'food': 20,
'restaur': 41,
'amaz': 1,
'horribl': 22,
'experi': 17,
'dentist': 12,
'weather': 57,

5
'today': 51,
'perfect': 34,
'custom': 11,
'servic': 43,
'store': 46,
'aw': 2,
'realli': 40,
'enjoy': 15,
'book': 5,
'disappoint': 14,
'concert': 9,
'ca': 7,
'nt': 33,
'stand': 45,
'stuck': 47,
'traffic': 53,
'best': 4,
'pizza': 35,
've': 54,
'ever': 16,
'qualiti': 39,
'product': 36,
'view': 55,
'top': 52,
'mountain': 29,
'breathtak': 6,
'new': 31,
'design': 13,
'room': 42,
'bad': 3,
'surprisingli': 48,
'frustrat': 21,
'lack': 25,
'progress': 37,
'project': 38,
'complet': 8,
'wast': 56,
'money': 28,
'fantast': 19,
'last': 26,
'night': 32,
'internet': 24,
'connect': 10,
'hotel': 23}

[12]: #Splitting the data into trainig and testing

# x = text_counts

6
# y = data['Sentiment']

X_train, X_test, Y_train, Y_test = train_test_split(text_counts,␣

↪data['Sentiment'], test_size=0.2, random_state=5)

Training the model

[13]: #Creating the Naïve Bayes Classifier Model
MNB = MultinomialNB()

# Train the model with training data

MNB.fit(X_train, Y_train)

[13]: MultinomialNB()

Predict the class of the unseen data

[14]: #get the model predictions for the test set
y_pred = MNB.predict(X_test)
y_pred

[14]: array(['Negative', 'Negative', 'Positive', 'Negative'], dtype='<U8')

[15]: # compare the outputs

data = {'Actual': Y_test,
'Predicted': y_pred}

outputs = pd.DataFrame(data)
outputs

[15]: Actual Predicted

2 Positive Negative
5 Negative Negative
17 Negative Positive
19 Negative Negative

Get Evaluation Metrics

[16]: #accuracy values
accuracy_score(Y_test,y_pred)

[16]: 0.5

[17]: #obtain the confusion matrix

confusion_matrix(Y_test,y_pred)

[17]: array([[2, 1],

[1, 0]], dtype=int64)

7
[18]: #confusion matrix visualization
sns.heatmap(confusion_matrix(Y_test,y_pred),annot=True,fmt="g")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

Classifcation Report
[19]: print(classification_report(Y_test,y_pred))

precision recall f1-score support

Negative 0.67 0.67 0.67 3

Positive 0.00 0.00 0.00 1

accuracy 0.50 4
macro avg 0.33 0.33 0.33 4
weighted avg 0.50 0.50 0.50 4

[ ]:

8
[ ]:

[ ]:

Spectroil Q100
67% (3)
Spectroil Q100
100 pages
Top Strategic Technology Trends For 2022 Cybersecurity Mesh
No ratings yet
Top Strategic Technology Trends For 2022 Cybersecurity Mesh
14 pages
Solved Problems in Industrial Quality Control 20131 PDF
No ratings yet
Solved Problems in Industrial Quality Control 20131 PDF
59 pages
2021 UNI-T Instruments
No ratings yet
2021 UNI-T Instruments
70 pages
IT 118 - SIA - Module 5
No ratings yet
IT 118 - SIA - Module 5
23 pages
Natural Language Processing
No ratings yet
Natural Language Processing
22 pages
Chapter 10 - Text Analytics
No ratings yet
Chapter 10 - Text Analytics
13 pages
Alteryx Webinar Lecture 1 - Slides PDF
100% (1)
Alteryx Webinar Lecture 1 - Slides PDF
56 pages
Authors:: Apoorv Agarwal Boyi Xie Ilia Vovsha Owen Rambow Rebecca Passonneau
No ratings yet
Authors:: Apoorv Agarwal Boyi Xie Ilia Vovsha Owen Rambow Rebecca Passonneau
9 pages
Amazon Assignment Ex
No ratings yet
Amazon Assignment Ex
11 pages
MLA TAB Lecture2
No ratings yet
MLA TAB Lecture2
84 pages
ML Program Output
No ratings yet
ML Program Output
22 pages
Module 8 - Text - Update
No ratings yet
Module 8 - Text - Update
42 pages
Interface Management On Megaprojects: A Case Study
No ratings yet
Interface Management On Megaprojects: A Case Study
6 pages
BSNL Cellone Phase Iv FMCC
No ratings yet
BSNL Cellone Phase Iv FMCC
13 pages
Amazon Food Reviews Analysis
No ratings yet
Amazon Food Reviews Analysis
37 pages
NLP Lab Manual for B.E. Students
No ratings yet
NLP Lab Manual for B.E. Students
21 pages
Python NLP Techniques Guide
No ratings yet
Python NLP Techniques Guide
18 pages
NLP Tushar
No ratings yet
NLP Tushar
21 pages
6 - Text Vectorization-CSC688-SP22
No ratings yet
6 - Text Vectorization-CSC688-SP22
5 pages
Cybersecurity Tool for All Users
No ratings yet
Cybersecurity Tool for All Users
39 pages
Problem Statement
No ratings yet
Problem Statement
10 pages
Email Spam Classifier
No ratings yet
Email Spam Classifier
22 pages
Report
No ratings yet
Report
12 pages
Natural Language Processing
No ratings yet
Natural Language Processing
5 pages
Report On - Social Media Research Topic Modeling
No ratings yet
Report On - Social Media Research Topic Modeling
26 pages
Unit Iii - Knowledge Representation: Part - A
No ratings yet
Unit Iii - Knowledge Representation: Part - A
5 pages
Internet Banking Manual - Final
No ratings yet
Internet Banking Manual - Final
11 pages
Lord's Piso Wifi
No ratings yet
Lord's Piso Wifi
2 pages
Group 4 MovieReview
No ratings yet
Group 4 MovieReview
10 pages
AI Phash3
No ratings yet
AI Phash3
11 pages
Department of Education: Republic of The Philippines
No ratings yet
Department of Education: Republic of The Philippines
2 pages
VI Diff
No ratings yet
VI Diff
24 pages
Experiment 7 ML
No ratings yet
Experiment 7 ML
3 pages
Sentiment Analysis with NLTK
No ratings yet
Sentiment Analysis with NLTK
4 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
Q 3
No ratings yet
Q 3
2 pages
Accelerated Verifiable Fair Digital Exchange: Ntroduction
No ratings yet
Accelerated Verifiable Fair Digital Exchange: Ntroduction
10 pages
Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
98 pages
CS335 Lecture 1 Slides
No ratings yet
CS335 Lecture 1 Slides
30 pages
Social Media Sentimental Analysis 1
No ratings yet
Social Media Sentimental Analysis 1
30 pages
Sentiment Analysis of Tweets
No ratings yet
Sentiment Analysis of Tweets
9 pages
OEM OEM Preinstallation Preinstallation Kit (OPK) Overview Kit (OPK) Overview
No ratings yet
OEM OEM Preinstallation Preinstallation Kit (OPK) Overview Kit (OPK) Overview
32 pages
Day - 8 - Solutions: Non-Verbal - Coding and Decoding (Logical)
No ratings yet
Day - 8 - Solutions: Non-Verbal - Coding and Decoding (Logical)
8 pages
Advance RND
No ratings yet
Advance RND
24 pages
NLP Lab
No ratings yet
NLP Lab
18 pages
Chapter 4 Overview of Preventive Maintenance
No ratings yet
Chapter 4 Overview of Preventive Maintenance
14 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
Apache Storm
No ratings yet
Apache Storm
29 pages
Sentiment Analysis On Amazon Fine Food Reviews by Using Linear Machine Learning Models
No ratings yet
Sentiment Analysis On Amazon Fine Food Reviews by Using Linear Machine Learning Models
6 pages
Self Evaluation Exercises
No ratings yet
Self Evaluation Exercises
12 pages
Transformer Models for Sentiment Analysis
No ratings yet
Transformer Models for Sentiment Analysis
45 pages
Data Cleaning and Pre Processing 1
No ratings yet
Data Cleaning and Pre Processing 1
12 pages
Ukrainian Power Grid Cyberattack Analysis
No ratings yet
Ukrainian Power Grid Cyberattack Analysis
12 pages
17 Practicals
No ratings yet
17 Practicals
7 pages
Chapter 8 Text Analytics
No ratings yet
Chapter 8 Text Analytics
42 pages
Basenlp
No ratings yet
Basenlp
5 pages
Logistic Regression
No ratings yet
Logistic Regression
8 pages
AIML Lab: Regression Models Guide
No ratings yet
AIML Lab: Regression Models Guide
7 pages
Ir Practical 5
No ratings yet
Ir Practical 5
2 pages
Text Processing
No ratings yet
Text Processing
16 pages
Code
No ratings yet
Code
13 pages
Sumati
No ratings yet
Sumati
10 pages
Cómo Escribir Un Ensayo Paso A Paso
100% (1)
Cómo Escribir Un Ensayo Paso A Paso
7 pages
British Airways Forage Report
No ratings yet
British Airways Forage Report
12 pages
Raj DV Exp5
No ratings yet
Raj DV Exp5
6 pages
Topic Classifierby David Caleb
No ratings yet
Topic Classifierby David Caleb
7 pages
Sample Test ECDL CAD V1.5
No ratings yet
Sample Test ECDL CAD V1.5
6 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Td+Correction Enpu PDF Redresseur Équipement
No ratings yet
Td+Correction Enpu PDF Redresseur Équipement
1 page
1a NLTK
No ratings yet
1a NLTK
10 pages
Natural Language Processing-Section
No ratings yet
Natural Language Processing-Section
29 pages
Material Approval Application
No ratings yet
Material Approval Application
1 page
NLP Crecord Mid2
No ratings yet
NLP Crecord Mid2
36 pages
NLP Assignment
No ratings yet
NLP Assignment
12 pages
Rdgupta PPT Gi Sip Part-Ii3
No ratings yet
Rdgupta PPT Gi Sip Part-Ii3
39 pages
DSBDL Assn 07
No ratings yet
DSBDL Assn 07
4 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
Bengali Text Classification Distinguishing Saintly and Common Forms Using Machine Learning Model
No ratings yet
Bengali Text Classification Distinguishing Saintly and Common Forms Using Machine Learning Model
7 pages
ICT Safety and Security Guide
No ratings yet
ICT Safety and Security Guide
7 pages
Lab8 - ARM Memory
No ratings yet
Lab8 - ARM Memory
9 pages
Symbol Table
No ratings yet
Symbol Table
24 pages
Lab 5
No ratings yet
Lab 5
27 pages
Assignment
No ratings yet
Assignment
6 pages
DSBA+Master+Codebook+ +Text+Mining+&+TSF
No ratings yet
DSBA+Master+Codebook+ +Text+Mining+&+TSF
11 pages
Natural Language Processing
No ratings yet
Natural Language Processing
8 pages
Foundations of Python For AI
No ratings yet
Foundations of Python For AI
67 pages
Soper and Mitra-2013 Amcis-An Inquiry Into Mental Models of Web Interface Design
No ratings yet
Soper and Mitra-2013 Amcis-An Inquiry Into Mental Models of Web Interface Design
7 pages
DS7NLTK
No ratings yet
DS7NLTK
2 pages
21bce3701 Senti K9ar
No ratings yet
21bce3701 Senti K9ar
28 pages

Ment Analysis Text Classification

Uploaded by

Ment Analysis Text Classification

Uploaded by

ment-analysis-text-classification

March 24, 2024

[24]: import pandas as pd

#Splitting the data into trainig and testing

# pandas and numpy

#import count vectorizer

[25]: #Loading the Dataset

[26]: Text Sentiment

[27]: #row and coloumn count

[28]: # count of the negative and positive sentiments

[29]: # assign the count vectorizer to a variable

# get the document term matrix

… too top traffic ve view was wasn waste weather with

[20 rows x 76 columns]

… top traffic ve view was wasn waste weather with Sentiment

[20 rows x 77 columns]

[8]: #preprocessing text function

# Remove stop words

# Combine words back into a single string

# Remove non-alphanumeric characters using regex

# Vectorize text using CountVectorizer

# Return the vectorized text and the vocabulary

[9]: #Summarizing the Encoded Texts into a sparse matrix

[10]: #sparse matrix converting it to an array.

[10]: (20, 58)

[11]: {'love': 27,

[12]: #Splitting the data into trainig and testing

X_train, X_test, Y_train, Y_test = train_test_split(text_counts,␣

Training the model

# Train the model with training data

Predict the class of the unseen data

[14]: array(['Negative', 'Negative', 'Positive', 'Negative'], dtype='<U8')

[15]: # compare the outputs

[15]: Actual Predicted

Get Evaluation Metrics

[17]: #obtain the confusion matrix

[17]: array([[2, 1],

precision recall f1-score support

Negative 0.67 0.67 0.67 3

You might also like