0% found this document useful (0 votes)

20 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

20 views9 pages

Ment Analysis Text Classification

Uploaded by

Nipuni

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

ment-analysis-text-classification

March 24, 2024

[24]: import pandas as pd

import nltk
import re #regex

#Splitting the data into trainig and testing

from sklearn.model_selection import train_test_split

#model
from sklearn.naive_bayes import MultinomialNB

#evaluation metrics
from sklearn import metrics

#stemming
from nltk.stem import PorterStemmer

#stopwords
from nltk.corpus import stopwords

# pandas and numpy

import pandas as pd
import numpy as np

#import count vectorizer

from sklearn.feature_extraction.text import CountVectorizer

#tokernizers
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer

#classification results
from sklearn.metrics import confusion_matrix,␣
↪accuracy_score,classification_report

#visualizations
import seaborn as sns

1
import matplotlib.pyplot as plt

[25]: #Loading the Dataset

data = pd.read_csv('Feedback.csv')

[26]: data.head()

[26]: Text Sentiment

0 I love spending time with my family. Positive
1 This movie is absolutely terrible. Negative
2 The food at that restaurant was amazing. Positive
3 I had a horrible experience at the dentist. Negative
4 The weather today is perfect. Positive

[27]: #row and coloumn count

data.shape

[27]: (20, 2)

[28]: # count of the negative and positive sentiments

data['Sentiment'].value_counts()

[28]: Positive 10
Negative 10
Name: Sentiment, dtype: int64

[29]: # assign the count vectorizer to a variable

countvectorizer=CountVectorizer()

# get the document term matrix

DTM=pd.DataFrame(countvectorizer.fit_transform(data["Text"]).toarray(),
columns=countvectorizer.get_feature_names_out(),index=None)

DTM

[29]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0

2
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… too top traffic ve view was wasn waste weather with

0 … 0 0 0 0 0 0 0 0 0 1
1 … 0 0 0 0 0 0 0 0 0 0
2 … 0 0 0 0 0 1 0 0 0 0
3 … 0 0 0 0 0 0 0 0 0 0
4 … 0 0 0 0 0 0 0 0 1 0
5 … 0 0 0 0 0 1 0 0 0 0
6 … 0 0 0 0 0 0 0 0 0 0
7 … 0 0 0 0 0 0 0 0 0 1
8 … 0 0 0 0 0 1 0 0 0 0
9 … 0 0 1 0 0 0 0 0 0 0
10 … 0 0 0 1 0 0 0 0 0 0
11 … 0 0 0 0 0 0 0 0 0 1
12 … 0 1 0 0 1 1 0 0 0 0
13 … 0 0 0 0 0 1 0 0 0 0
14 … 0 0 0 0 0 0 0 0 0 0
15 … 1 0 1 0 0 0 1 0 0 0
16 … 0 0 0 0 0 0 0 0 0 1
17 … 0 0 0 0 0 1 0 1 0 0
18 … 0 0 0 0 0 0 0 0 0 0
19 … 0 0 0 0 0 0 0 0 0 0

[20 rows x 76 columns]

[32]: DTM['Sentiment']=data['Sentiment']

DTM

[32]: absolutely amazing and at awful bad being best book breathtaking \
0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 1 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 1 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0
7 0 0 0 0 0 0 0 0 0 0

3
8 0 1 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 1 0 0
11 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 1
13 0 0 0 1 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 1 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0 0
18 0 0 0 1 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0

… top traffic ve view was wasn waste weather with Sentiment

0 … 0 0 0 0 0 0 0 0 1 Positive
1 … 0 0 0 0 0 0 0 0 0 Negative
2 … 0 0 0 0 1 0 0 0 0 Positive
3 … 0 0 0 0 0 0 0 0 0 Negative
4 … 0 0 0 0 0 0 0 1 0 Positive
5 … 0 0 0 0 1 0 0 0 0 Negative
6 … 0 0 0 0 0 0 0 0 0 Positive
7 … 0 0 0 0 0 0 0 0 1 Negative
8 … 0 0 0 0 1 0 0 0 0 Positive
9 … 0 1 0 0 0 0 0 0 0 Negative
10 … 0 0 1 0 0 0 0 0 0 Positive
11 … 0 0 0 0 0 0 0 0 1 Negative
12 … 1 0 0 1 1 0 0 0 0 Positive
13 … 0 0 0 0 1 0 0 0 0 Negative
14 … 0 0 0 0 0 0 0 0 0 Positive
15 … 0 1 0 0 0 1 0 0 0 Positive
16 … 0 0 0 0 0 0 0 0 1 Negative
17 … 0 0 0 0 1 0 1 0 0 Negative
18 … 0 0 0 0 0 0 0 0 0 Positive
19 … 0 0 0 0 0 0 0 0 0 Negative

[20 rows x 77 columns]

[8]: #preprocessing text function

def preprocess_text(text):
# Convert text to lowercase
text = text.apply(lambda x: x.lower())

# Tokenize text
text = text.apply(lambda x: nltk.word_tokenize(x))

# Remove stop words

4
stop_words = set(stopwords.words('english'))
text = text.apply(lambda x: [word for word in x if word not in stop_words])

# Stem text
stemmer = nltk.PorterStemmer()
text = text.apply(lambda x: [stemmer.stem(word) for word in x])

# Combine words back into a single string

text = text.apply(lambda x: ' '.join(x))

# Remove non-alphanumeric characters using regex

text = text.apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))

# Vectorize text using CountVectorizer

countvectorizer = CountVectorizer()
X = countvectorizer.fit_transform(text)

# Return the vectorized text and the vocabulary

return X, countvectorizer.vocabulary_

[9]: #Summarizing the Encoded Texts into a sparse matrix

text_counts=preprocess_text(data["Text"])[0]

[10]: #sparse matrix converting it to an array.

preprocess_text(data["Text"])[0].toarray()

preprocess_text(data["Text"])[0].toarray().shape

[10]: (20, 58)

[11]: #Printing the identified Unique words along with their indices
preprocess_text(data["Text"])[1]

[11]: {'love': 27,

'spend': 44,
'time': 50,
'famili': 18,
'movi': 30,
'absolut': 0,
'terribl': 49,
'food': 20,
'restaur': 41,
'amaz': 1,
'horribl': 22,
'experi': 17,
'dentist': 12,
'weather': 57,

5
'today': 51,
'perfect': 34,
'custom': 11,
'servic': 43,
'store': 46,
'aw': 2,
'realli': 40,
'enjoy': 15,
'book': 5,
'disappoint': 14,
'concert': 9,
'ca': 7,
'nt': 33,
'stand': 45,
'stuck': 47,
'traffic': 53,
'best': 4,
'pizza': 35,
've': 54,
'ever': 16,
'qualiti': 39,
'product': 36,
'view': 55,
'top': 52,
'mountain': 29,
'breathtak': 6,
'new': 31,
'design': 13,
'room': 42,
'bad': 3,
'surprisingli': 48,
'frustrat': 21,
'lack': 25,
'progress': 37,
'project': 38,
'complet': 8,
'wast': 56,
'money': 28,
'fantast': 19,
'last': 26,
'night': 32,
'internet': 24,
'connect': 10,
'hotel': 23}

[12]: #Splitting the data into trainig and testing

# x = text_counts

6
# y = data['Sentiment']

X_train, X_test, Y_train, Y_test = train_test_split(text_counts,␣

↪data['Sentiment'], test_size=0.2, random_state=5)

Training the model

[13]: #Creating the Naïve Bayes Classifier Model
MNB = MultinomialNB()

# Train the model with training data

MNB.fit(X_train, Y_train)

[13]: MultinomialNB()

Predict the class of the unseen data

[14]: #get the model predictions for the test set
y_pred = MNB.predict(X_test)
y_pred

[14]: array(['Negative', 'Negative', 'Positive', 'Negative'], dtype='<U8')

[15]: # compare the outputs

data = {'Actual': Y_test,
'Predicted': y_pred}

outputs = pd.DataFrame(data)
outputs

[15]: Actual Predicted

2 Positive Negative
5 Negative Negative
17 Negative Positive
19 Negative Negative

Get Evaluation Metrics

[16]: #accuracy values
accuracy_score(Y_test,y_pred)

[16]: 0.5

[17]: #obtain the confusion matrix

confusion_matrix(Y_test,y_pred)

[17]: array([[2, 1],

[1, 0]], dtype=int64)

7
[18]: #confusion matrix visualization
sns.heatmap(confusion_matrix(Y_test,y_pred),annot=True,fmt="g")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

Classifcation Report
[19]: print(classification_report(Y_test,y_pred))

precision recall f1-score support

Negative 0.67 0.67 0.67 3

Positive 0.00 0.00 0.00 1

accuracy 0.50 4
macro avg 0.33 0.33 0.33 4
weighted avg 0.50 0.50 0.50 4

[ ]:

8
[ ]:

[ ]:

NLP Lab Manual for B.E. Students
No ratings yet
NLP Lab Manual for B.E. Students
21 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
DSBA+Master+Codebook+ +Text+Mining+&+TSF
No ratings yet
DSBA+Master+Codebook+ +Text+Mining+&+TSF
11 pages
Chapter 10 - Text Analytics
No ratings yet
Chapter 10 - Text Analytics
13 pages
Python NLP Techniques Guide
No ratings yet
Python NLP Techniques Guide
18 pages
Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
Sentiment Analysis with NLTK
No ratings yet
Sentiment Analysis with NLTK
4 pages
NLP Lab
No ratings yet
NLP Lab
18 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
NLP with NLTK: Restaurant Reviews Analysis
No ratings yet
NLP with NLTK: Restaurant Reviews Analysis
5 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
NLP Crecord Mid2
No ratings yet
NLP Crecord Mid2
36 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
NLP Tushar
No ratings yet
NLP Tushar
21 pages
Chapter 8 Text Analytics
No ratings yet
Chapter 8 Text Analytics
42 pages
Natural Language Processing
No ratings yet
Natural Language Processing
22 pages
Self Evaluation Exercises
No ratings yet
Self Evaluation Exercises
12 pages
AI Phash3
No ratings yet
AI Phash3
11 pages
Social Media Sentimental Analysis 1
No ratings yet
Social Media Sentimental Analysis 1
30 pages
British Airways Forage Report
No ratings yet
British Airways Forage Report
12 pages
Basenlp
No ratings yet
Basenlp
5 pages
Report
No ratings yet
Report
12 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
Email Spam Classifier
No ratings yet
Email Spam Classifier
22 pages
NLP Assignment
No ratings yet
NLP Assignment
12 pages
21bce3701 Senti K9ar
No ratings yet
21bce3701 Senti K9ar
28 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
Text Preprocessing and Sentiment Analysis
No ratings yet
Text Preprocessing and Sentiment Analysis
13 pages
Amazon Food Reviews Analysis
No ratings yet
Amazon Food Reviews Analysis
37 pages
DSBDL Assn 07
No ratings yet
DSBDL Assn 07
4 pages
Natural Language Processing-Section
No ratings yet
Natural Language Processing-Section
29 pages
Module 8 - Text - Update
No ratings yet
Module 8 - Text - Update
42 pages
Experiment 7 ML
No ratings yet
Experiment 7 ML
3 pages
Lab 5
No ratings yet
Lab 5
27 pages
Ir Practical 5
No ratings yet
Ir Practical 5
2 pages
Group 4 MovieReview
No ratings yet
Group 4 MovieReview
10 pages
17 Practicals
No ratings yet
17 Practicals
7 pages
ML Program Output
No ratings yet
ML Program Output
22 pages
Research on Text Topic Modeling
No ratings yet
Research on Text Topic Modeling
26 pages
Raj DV Exp5
No ratings yet
Raj DV Exp5
6 pages
Foundations of Python For AI
No ratings yet
Foundations of Python For AI
67 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
MLA TAB Lecture2
No ratings yet
MLA TAB Lecture2
84 pages
Natural Language Processing
No ratings yet
Natural Language Processing
8 pages
DS7NLTK
No ratings yet
DS7NLTK
2 pages
Assignment
No ratings yet
Assignment
6 pages
Sentiment Analysis On Amazon Fine Food Reviews by Using Linear Machine Learning Models
No ratings yet
Sentiment Analysis On Amazon Fine Food Reviews by Using Linear Machine Learning Models
6 pages
Text Vectorization Techniques in NLP
No ratings yet
Text Vectorization Techniques in NLP
5 pages
Sentiment Analysis of Tweets
No ratings yet
Sentiment Analysis of Tweets
9 pages
Advance RND
No ratings yet
Advance RND
24 pages
Sumati
No ratings yet
Sumati
10 pages
Twitter Sentiment Analysis Techniques
No ratings yet
Twitter Sentiment Analysis Techniques
9 pages
Problem Statement
No ratings yet
Problem Statement
10 pages
Topic Classifierby David Caleb
No ratings yet
Topic Classifierby David Caleb
7 pages
Transformer Models for Sentiment Analysis
No ratings yet
Transformer Models for Sentiment Analysis
45 pages
Q 3
No ratings yet
Q 3
2 pages
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
98 pages
Amazon Assignment Ex
No ratings yet
Amazon Assignment Ex
11 pages
ෂඩ්වර්ගය
No ratings yet
ෂඩ්වර්ගය
18 pages
Soper and Mitra-2013 Amcis-An Inquiry Into Mental Models of Web Interface Design
No ratings yet
Soper and Mitra-2013 Amcis-An Inquiry Into Mental Models of Web Interface Design
7 pages
Data Cleaning and Pre Processing 1
No ratings yet
Data Cleaning and Pre Processing 1
12 pages
Text Processing with NLTK in Python
No ratings yet
Text Processing with NLTK in Python
16 pages
Logistic Regression
No ratings yet
Logistic Regression
8 pages
Apache Storm
No ratings yet
Apache Storm
29 pages
JarPro Jarring Analysis Software
No ratings yet
JarPro Jarring Analysis Software
4 pages
Slide PE Control
No ratings yet
Slide PE Control
57 pages
GT Homework 3 PDF
No ratings yet
GT Homework 3 PDF
3 pages
CNN-Based Intrusion Detection Study
No ratings yet
CNN-Based Intrusion Detection Study
7 pages
Essential Math For AI - ML
100% (1)
Essential Math For AI - ML
22 pages
Tal70037 2
No ratings yet
Tal70037 2
1 page
Stine Marie Berge - PHD
No ratings yet
Stine Marie Berge - PHD
147 pages
Module-4 Partial Differential Calculus
No ratings yet
Module-4 Partial Differential Calculus
30 pages
BDA Practical Exam Experiments List
No ratings yet
BDA Practical Exam Experiments List
21 pages
Lecture 10
No ratings yet
Lecture 10
23 pages
Gaussian States in Quantum Information
No ratings yet
Gaussian States in Quantum Information
47 pages
Maa HL 5.19 Differential Equations
No ratings yet
Maa HL 5.19 Differential Equations
27 pages
Tower of Hanoi Using Recursion
No ratings yet
Tower of Hanoi Using Recursion
7 pages
Finite Element Model Updating of Ship Hulls Subjected To Uncertain Wave Loading
No ratings yet
Finite Element Model Updating of Ship Hulls Subjected To Uncertain Wave Loading
18 pages
Question of The Day: N N N N
No ratings yet
Question of The Day: N N N N
8 pages
Optimization of Computer Simulation Models 1997 European Journal of Operatio
No ratings yet
Optimization of Computer Simulation Models 1997 European Journal of Operatio
24 pages
A Survey On Reinforcement Learning Methods For UAV Systems
No ratings yet
A Survey On Reinforcement Learning Methods For UAV Systems
36 pages
Example 4.5
No ratings yet
Example 4.5
19 pages
Symbolic Reduction of Block Diagrams
No ratings yet
Symbolic Reduction of Block Diagrams
2 pages
Statistics & Operations Research Plan
No ratings yet
Statistics & Operations Research Plan
28 pages
Guide To Becoming An AI Expert in 2025.
No ratings yet
Guide To Becoming An AI Expert in 2025.
21 pages
DeviceMien: Network Device Behavior Modeling For Identifying Unknown IoT Devices
No ratings yet
DeviceMien: Network Device Behavior Modeling For Identifying Unknown IoT Devices
12 pages
Fuzzy Sets and Applications Course Outline
No ratings yet
Fuzzy Sets and Applications Course Outline
2 pages
Programming for Engineers Course
No ratings yet
Programming for Engineers Course
2 pages
Tolerance Stack-Up Analysis - A Review
No ratings yet
Tolerance Stack-Up Analysis - A Review
7 pages
G8 - L2 (Term 2)
No ratings yet
G8 - L2 (Term 2)
14 pages
Ass 2
No ratings yet
Ass 2
27 pages
Linear Algebra Chapter 3
No ratings yet
Linear Algebra Chapter 3
27 pages
Examining Generative Adversarial Network For Smart Home DDoS Traffic Generation
No ratings yet
Examining Generative Adversarial Network For Smart Home DDoS Traffic Generation
6 pages
Chapter 5 Design Feedback Cotnrollers For Motor Drives
No ratings yet
Chapter 5 Design Feedback Cotnrollers For Motor Drives
23 pages

Ment Analysis Text Classification

Uploaded by

Ment Analysis Text Classification

Uploaded by

ment-analysis-text-classification

March 24, 2024

[24]: import pandas as pd

#Splitting the data into trainig and testing

# pandas and numpy

#import count vectorizer

[25]: #Loading the Dataset

[26]: Text Sentiment

[27]: #row and coloumn count

[28]: # count of the negative and positive sentiments

[29]: # assign the count vectorizer to a variable

# get the document term matrix

… too top traffic ve view was wasn waste weather with

[20 rows x 76 columns]

… top traffic ve view was wasn waste weather with Sentiment

[20 rows x 77 columns]

[8]: #preprocessing text function

# Remove stop words

# Combine words back into a single string

# Remove non-alphanumeric characters using regex

# Vectorize text using CountVectorizer

# Return the vectorized text and the vocabulary

[9]: #Summarizing the Encoded Texts into a sparse matrix

[10]: #sparse matrix converting it to an array.

[10]: (20, 58)

[11]: {'love': 27,

[12]: #Splitting the data into trainig and testing

X_train, X_test, Y_train, Y_test = train_test_split(text_counts,␣

Training the model

# Train the model with training data

Predict the class of the unseen data

[14]: array(['Negative', 'Negative', 'Positive', 'Negative'], dtype='<U8')

[15]: # compare the outputs

[15]: Actual Predicted

Get Evaluation Metrics

[17]: #obtain the confusion matrix

[17]: array([[2, 1],

precision recall f1-score support

Negative 0.67 0.67 0.67 3

You might also like