Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
19 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

ment-analysis-text-classification

March 24, 2024

[24]: import pandas as pd


import nltk
import re #regex

#Splitting the data into trainig and testing


from sklearn.model_selection import train_test_split

#model
from sklearn.naive_bayes import MultinomialNB

#evaluation metrics
from sklearn import metrics

#stemming
from nltk.stem import PorterStemmer

#stopwords
from nltk.corpus import stopwords

# pandas and numpy


import pandas as pd
import numpy as np

#import count vectorizer


from sklearn.feature_extraction.text import CountVectorizer

#tokernizers
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer

#classification results
from sklearn.metrics import confusion_matrix,␣
↪accuracy_score,classification_report

#visualizations
import seaborn as sns

1
import matplotlib.pyplot as plt

[25]: #Loading the Dataset


data = pd.read_csv('Feedback.csv')

[26]: data.head()

[26]: Text Sentiment


0 I love spending time with my family. Positive
1 This movie is absolutely terrible. Negative
2 The food at that restaurant was amazing. Positive
3 I had a horrible experience at the dentist. Negative
4 The weather today is perfect. Positive

[27]: #row and coloumn count


data.shape

[27]: (20, 2)

[28]: # count of the negative and positive sentiments


data['Sentiment'].value_counts()

[28]: Positive 10
Negative 10
Name: Sentiment, dtype: int64

[29]: # assign the count vectorizer to a variable


countvectorizer=CountVectorizer()

# get the document term matrix


DTM=pd.DataFrame(countvectorizer.fit_transform(data["Text"]).toarray(),
columns=countvectorizer.get_feature_names_out(),index=None)

DTM

[29]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0

2
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… too top traffic ve view was wasn waste weather with


0 … 0 0 0 0 0 0 0 0 0 1
1 … 0 0 0 0 0 0 0 0 0 0
2 … 0 0 0 0 0 1 0 0 0 0
3 … 0 0 0 0 0 0 0 0 0 0
4 … 0 0 0 0 0 0 0 0 1 0
5 … 0 0 0 0 0 1 0 0 0 0
6 … 0 0 0 0 0 0 0 0 0 0
7 … 0 0 0 0 0 0 0 0 0 1
8 … 0 0 0 0 0 1 0 0 0 0
9 … 0 0 1 0 0 0 0 0 0 0
10 … 0 0 0 1 0 0 0 0 0 0
11 … 0 0 0 0 0 0 0 0 0 1
12 … 0 1 0 0 1 1 0 0 0 0
13 … 0 0 0 0 0 1 0 0 0 0
14 … 0 0 0 0 0 0 0 0 0 0
15 … 1 0 1 0 0 0 1 0 0 0
16 … 0 0 0 0 0 0 0 0 0 1
17 … 0 0 0 0 0 1 0 1 0 0
18 … 0 0 0 0 0 0 0 0 0 0
19 … 0 0 0 0 0 0 0 0 0 0

[20 rows x 76 columns]

[32]: DTM['Sentiment']=data['Sentiment']

DTM

[32]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0

3
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… top traffic ve view was wasn waste weather with Sentiment


0 … 0 0 0 0 0 0 0 0 1 Positive
1 … 0 0 0 0 0 0 0 0 0 Negative
2 … 0 0 0 0 1 0 0 0 0 Positive
3 … 0 0 0 0 0 0 0 0 0 Negative
4 … 0 0 0 0 0 0 0 1 0 Positive
5 … 0 0 0 0 1 0 0 0 0 Negative
6 … 0 0 0 0 0 0 0 0 0 Positive
7 … 0 0 0 0 0 0 0 0 1 Negative
8 … 0 0 0 0 1 0 0 0 0 Positive
9 … 0 1 0 0 0 0 0 0 0 Negative
10 … 0 0 1 0 0 0 0 0 0 Positive
11 … 0 0 0 0 0 0 0 0 1 Negative
12 … 1 0 0 1 1 0 0 0 0 Positive
13 … 0 0 0 0 1 0 0 0 0 Negative
14 … 0 0 0 0 0 0 0 0 0 Positive
15 … 0 1 0 0 0 1 0 0 0 Positive
16 … 0 0 0 0 0 0 0 0 1 Negative
17 … 0 0 0 0 1 0 1 0 0 Negative
18 … 0 0 0 0 0 0 0 0 0 Positive
19 … 0 0 0 0 0 0 0 0 0 Negative

[20 rows x 77 columns]

[8]: #preprocessing text function

def preprocess_text(text):
# Convert text to lowercase
text = text.apply(lambda x: x.lower())

# Tokenize text
text = text.apply(lambda x: nltk.word_tokenize(x))

# Remove stop words

4
stop_words = set(stopwords.words('english'))
text = text.apply(lambda x: [word for word in x if word not in stop_words])

# Stem text
stemmer = nltk.PorterStemmer()
text = text.apply(lambda x: [stemmer.stem(word) for word in x])

# Combine words back into a single string


text = text.apply(lambda x: ' '.join(x))

# Remove non-alphanumeric characters using regex


text = text.apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))

# Vectorize text using CountVectorizer


countvectorizer = CountVectorizer()
X = countvectorizer.fit_transform(text)

# Return the vectorized text and the vocabulary


return X, countvectorizer.vocabulary_

[9]: #Summarizing the Encoded Texts into a sparse matrix


text_counts=preprocess_text(data["Text"])[0]

[10]: #sparse matrix converting it to an array.


preprocess_text(data["Text"])[0].toarray()

preprocess_text(data["Text"])[0].toarray().shape

[10]: (20, 58)

[11]: #Printing the identified Unique words along with their indices
preprocess_text(data["Text"])[1]

[11]: {'love': 27,


'spend': 44,
'time': 50,
'famili': 18,
'movi': 30,
'absolut': 0,
'terribl': 49,
'food': 20,
'restaur': 41,
'amaz': 1,
'horribl': 22,
'experi': 17,
'dentist': 12,
'weather': 57,

5
'today': 51,
'perfect': 34,
'custom': 11,
'servic': 43,
'store': 46,
'aw': 2,
'realli': 40,
'enjoy': 15,
'book': 5,
'disappoint': 14,
'concert': 9,
'ca': 7,
'nt': 33,
'stand': 45,
'stuck': 47,
'traffic': 53,
'best': 4,
'pizza': 35,
've': 54,
'ever': 16,
'qualiti': 39,
'product': 36,
'view': 55,
'top': 52,
'mountain': 29,
'breathtak': 6,
'new': 31,
'design': 13,
'room': 42,
'bad': 3,
'surprisingli': 48,
'frustrat': 21,
'lack': 25,
'progress': 37,
'project': 38,
'complet': 8,
'wast': 56,
'money': 28,
'fantast': 19,
'last': 26,
'night': 32,
'internet': 24,
'connect': 10,
'hotel': 23}

[12]: #Splitting the data into trainig and testing


# x = text_counts

6
# y = data['Sentiment']

X_train, X_test, Y_train, Y_test = train_test_split(text_counts,␣


↪data['Sentiment'], test_size=0.2, random_state=5)

Training the model


[13]: #Creating the Naïve Bayes Classifier Model
MNB = MultinomialNB()

# Train the model with training data


MNB.fit(X_train, Y_train)

[13]: MultinomialNB()

Predict the class of the unseen data


[14]: #get the model predictions for the test set
y_pred = MNB.predict(X_test)
y_pred

[14]: array(['Negative', 'Negative', 'Positive', 'Negative'], dtype='<U8')

[15]: # compare the outputs


data = {'Actual': Y_test,
'Predicted': y_pred}

outputs = pd.DataFrame(data)
outputs

[15]: Actual Predicted


2 Positive Negative
5 Negative Negative
17 Negative Positive
19 Negative Negative

Get Evaluation Metrics


[16]: #accuracy values
accuracy_score(Y_test,y_pred)

[16]: 0.5

[17]: #obtain the confusion matrix


confusion_matrix(Y_test,y_pred)

[17]: array([[2, 1],


[1, 0]], dtype=int64)

7
[18]: #confusion matrix visualization
sns.heatmap(confusion_matrix(Y_test,y_pred),annot=True,fmt="g")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

Classifcation Report
[19]: print(classification_report(Y_test,y_pred))

precision recall f1-score support

Negative 0.67 0.67 0.67 3


Positive 0.00 0.00 0.00 1

accuracy 0.50 4
macro avg 0.33 0.33 0.33 4
weighted avg 0.50 0.50 0.50 4

[ ]:

8
[ ]:

[ ]:

[ ]:

You might also like