0% found this document useful (0 votes)

48 views18 pages

NLP Lab Programs

Uploaded by

jammuramadevi.23mtech.csm

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

48 views18 pages

NLP Lab Programs

Uploaded by

jammuramadevi.23mtech.csm

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

1.

Convert the text into tokens and find the word frequency

from collections import Counter

import re

def tokenize(text):
tokens = [Link](r'\b\w+\b', [Link]())
return tokens

def word_frequency(tokens):
frequency = Counter(tokens)
return frequency

text = "This is a simple text. This text is for testing the word frequency program. This is simple."

tokens = tokenize(text)

frequency = word_frequency(tokens)

for word, count in [Link]():

print(f"{word}: {count}")
2. Perform Lemmatization and Stemming

import nltk
from [Link] import word_tokenize
from [Link] import PorterStemmer, WordNetLemmatizer

[Link]('punkt')
[Link]('wordnet')
[Link]('omw-1.4')

def tokenize(text):
tokens = word_tokenize([Link]())
return tokens

def lemmatize(tokens):
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [[Link](token) for token in tokens]
return lemmatized_tokens

def stem(tokens):
stemmer = PorterStemmer()
stemmed_tokens = [[Link](token) for token in tokens]
return stemmed_tokens

text = "The striped bats are hanging on their feet for best."

tokens = tokenize(text)

lemmatized_tokens = lemmatize(tokens)
print("Lemmatized Tokens:")
print(lemmatized_tokens)

stemmed_tokens = stem(tokens)
print("\nStemmed Tokens:")
print(stemmed_tokens)
3. Implement Bi-gram

import nltk
from [Link] import word_tokenize
from [Link] import bigrams

[Link]('punkt')

def generate_bigrams(text):
tokens = word_tokenize([Link]())

bigram_list = list(bigrams(tokens))

return bigram_list

text = "The striped bats are hanging on their feet for best."

bigrams_result = generate_bigrams(text)

print("Bigrams:")
for bigram in bigrams_result:
print(bigram)
4. Identify parts-of Speech using Penn Treebank tag set

pip install nltk

import nltk
from [Link] import treebank
from [Link] import word_tokenize
from nltk import pos_tag

[Link]('averaged_perceptron_tagger')
[Link]('punkt')

sentence = "The quick brown fox jumps over the lazy dog."

words = word_tokenize(sentence)

tagged_words = pos_tag(words)

print("Sentence:", sentence)
print("Tagged Words:", tagged_words)

[Link]('treebank')

tagged_sentences = treebank.tagged_sents()

states = set()
observations = set()

for sentence in tagged_sentences:

for word, tag in sentence:
[Link](tag)
[Link]([Link]())

states = list(states)
observations = list(observations)

transition_counts = defaultdict(lambda: defaultdict(int))

for sentence in tagged_sentences:
prev_tag = "<s>"
for word, tag in sentence:
transition_counts[prev_tag][tag] += 1
prev_tag = tag
transition_counts[prev_tag]["</s>"] += 1

transition_probabilities = defaultdict(lambda: defaultdict(float))

for prev_tag, next_tags in transition_counts.items():
total_count = sum(next_tags.values())
for next_tag, count in next_tags.items():
transition_probabilities[prev_tag][next_tag] = count / total_count
emission_counts = defaultdict(lambda: defaultdict(int))
for sentence in tagged_sentences:
for word, tag in sentence:
emission_counts[tag][[Link]()] += 1

emission_probabilities = defaultdict(lambda: defaultdict(float))

for tag, words in emission_counts.items():
total_count = sum([Link]())
for word, count in [Link]():
emission_probabilities[tag][word] = count / total_count

initial_counts = defaultdict(int)
for sentence in tagged_sentences:
initial_counts[sentence[0][1]] += 1

initial_probabilities = defaultdict(float)
total_count = sum(initial_counts.values())
for tag, count in initial_counts.items():
initial_probabilities[tag] = count / total_count

def viterbi(observations, states, start_p, trans_p, emit_p):

V = [{}]
path = {}

for state in states:

V[0][state] = start_p[state] * emit_p[state].get(observations[0], 0)
path[state] = [state]

for t in range(1, len(observations)):

[Link]({})
newpath = {}

for y in states:
(prob, state) = max((V[t-1][y0] * trans_p[y0].get(y, 0) * emit_p[y].get(observations[t], 0),
y0) for y0 in states)
V[t][y] = prob
newpath[y] = path[state] + [y]

path = newpath

n = len(observations) - 1
(prob, state) = max((V[n][y], y) for y in states)
return (prob, path[state])

sentence = "the quick brown fox jumps over the lazy dog"
observations = [Link]().split()
prob, tags = viterbi(observations, states, initial_probabilities, transition_probabilities,
emission_probabilities)
print("Sentence:", sentence)
print("Tags:", tags)

from nltk import pos_tag, word_tokenize

from [Link] import RegexpParser

def chunk_sentence(sentence, grammar):

words = word_tokenize(sentence)
tagged = pos_tag(words)
parser = RegexpParser(grammar)
chunks = [Link](tagged)
return chunks

NP: {<DT|JJ|NN.*>+} # Chunk sequences of DT, JJ, NN

VP: {<VB.*><NP|PP|CLAUSE>+$} # Chunk verbs and their arguments
PP: {<IN><NP>} # Chunk prepositions followed by NP
CLAUSE: {<NP><VP>} # Chunk NP, VP
"""

sentence = "The quick brown fox jumps over the lazy dog"
chunks = chunk_sentence(sentence, grammar)
print(chunks)
6. Find the synonym of a word and antonym of a word using WordNet

import nltk
from [Link] import wordnet as wn

[Link]('wordnet')
[Link]('omw-1.4')

def get_synonyms(word):
synonyms = set()
for syn in [Link](word):
for lemma in [Link]():
[Link]([Link]())
return synonyms

def get_antonyms(word):
antonyms = set()
for syn in [Link](word):
for lemma in [Link]():
if [Link]():
[Link]([Link]()[0].name())
return antonyms

word = "happy"

synonyms = get_synonyms(word)
antonyms = get_antonyms(word)

print(f"Synonyms of '{word}':", synonyms)

print(f"Antonyms of '{word}':", antonyms)

7. Implement semantic role labeling to identify named entities (Same as Program-9)

Goto Command Prompt

Install Python 3.12.4

Set the path where your programs folder is runned

C:>>pip install spacy

C:>> python -m spacy download en_core_web_sm

C:>> python –version

C:>> pip --version

C:>> pip show spacy

C:>> import spacy

C:>> pip install jupyter

C:>> jupyter notebook

• This will open the Jupyter Environment

• Create a file “test_spacy.ipnyb” file

Paste the following code in single row

import spacy

# Load the spaCy model

nlp = [Link]("en_core_web_sm")

# Example text
text = "Barack Obama was born in Hawaii. He was elected president in 2008."

# Process the text

doc = nlp(text)

# Extract named entities

named_entities = [([Link], ent.label_) for ent in [Link]]

# Print named entities

print("Named Entities:", named_entities)

C:>> python test_spacy.py

8. Implement POS tagging using LSTM

!pip install tensorflow

import tensorflow as tf
print(tf.__version__)
!pip install numpy tensorflow keras scikit-learn nltk

import numpy as np
import tensorflow as tf
from [Link] import pad_sequences
from [Link] import to_categorical
from [Link] import Sequential
from [Link] import LSTM, Dense, Embedding, TimeDistributed
from sklearn.model_selection import train_test_split
import nltk
from [Link] import treebank
from [Link] import word_tokenize

[Link]('treebank')
[Link]('punkt')

def load_data():
sentences = [Link]()
tags = treebank.tagged_sents()

words = [[Link]() for sentence in sentences for word in sentence]

tags_flattened = [tag for sentence in tags for _, tag in sentence]

unique_words = sorted(set(words))
unique_tags = sorted(set(tags_flattened))

word_to_index = {word: i + 2 for i, word in enumerate(unique_words)}

tag_to_index = {tag: i + 1 for i, tag in enumerate(unique_tags)}

word_to_index['<PAD>'] = 0
word_to_index['<UNK>'] = 1
tag_to_index['<PAD>'] = 0

index_to_word = {i: word for word, i in word_to_index.items()}

index_to_tag = {i: tag for tag, i in tag_to_index.items()}
X = [[word_to_index.get(word, 1) for word in sentence] for sentence in sentences]
y = [[tag_to_index[tag] for word, tag in sentence] for sentence in tags]

max_len = max(len(sentence) for sentence in X)

X = pad_sequences(X, maxlen=max_len, padding='post')
y = pad_sequences(y, maxlen=max_len, padding='post')
y = to_categorical(y, num_classes=len(tag_to_index))

return X, y, word_to_index, tag_to_index, index_to_word, index_to_tag

# Load data and mappings

X, y, word_to_index, tag_to_index, index_to_word, index_to_tag = load_data()

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
[Link](Embedding(input_dim=len(word_to_index), output_dim=50,
input_length=X_train.shape[1]))
[Link](LSTM(units=100, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))
[Link](TimeDistributed(Dense(len(tag_to_index), activation='softmax')))
[Link](optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model

history = [Link](X_train, y_train, batch_size=32, epochs=5, validation_split=0.1, verbose=1)

# Evaluate the model

loss, accuracy = [Link](X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")
# Predict POS tags for a new sentence
def predict_pos(sentence):
tokens = word_tokenize([Link]())
indices = [word_to_index.get(token, 1) for token in tokens]
indices = pad_sequences([indices], maxlen=X_train.shape[1], padding='post')
predictions = [Link](indices)
predicted_tags = [index_to_tag[[Link](tag)] for tag in predictions[0]]
return list(zip(tokens, predicted_tags))

# Example usage
sentence = "Barack Obama was born in Hawaii."
print(predict_pos(sentence))
10. Develop a movie review system (sentiment analysis on movie data)

!pip install numpy pandas scikit-learn nltk

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from [Link] import Pipeline
from [Link] import classification_report, accuracy_score
import nltk
[Link]('movie_reviews')
[Link]('punkt')

from [Link] import movie_reviews

from [Link] import load_files

# Load the movie reviews dataset from NLTK

def load_movie_reviews():
reviews = []
labels = []

for fileid in movie_reviews.fileids():

category = [Link]('/')[0] # 'pos' or 'neg'
with movie_reviews.open(fileid) as f:
review = [Link]()
[Link](review)
[Link](category)

return [Link]({'review': reviews, 'sentiment': labels})

data = load_movie_reviews()
# Map sentiments to binary labels
data['sentiment'] = data['sentiment'].map({'pos': 1, 'neg': 0})

# Split the data into training and testing sets

X = data['review']
y = data['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build a pipeline with CountVectorizer and MultinomialNB
pipeline = Pipeline([
('vectorizer', CountVectorizer()), # Converts text to feature vectors
('classifier', MultinomialNB()) # Naive Bayes classifier
])

# Train the model

[Link](X_train, y_train)

# Make predictions on the test set

y_pred = [Link](X_test)

# Evaluate the model

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

def predict_sentiment(review):
prediction = [Link]([review])
sentiment = 'Positive' if prediction[0] == 1 else 'Negative'
return sentiment

# Example usage
new_review = "The movie was fantastic! I really enjoyed the performances."
print(f"Review: {new_review}")
print(f"Predicted Sentiment: {predict_sentiment(new_review)}")

NLP Session 4
No ratings yet
NLP Session 4
13 pages
R22 NLP Python Programs
No ratings yet
R22 NLP Python Programs
15 pages
Python NLP Techniques Guide
No ratings yet
Python NLP Techniques Guide
18 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
DSBD 7 Ass
No ratings yet
DSBD 7 Ass
9 pages
Self Evaluation Exercises
No ratings yet
Self Evaluation Exercises
12 pages
NLP Assignment (917722H031)
No ratings yet
NLP Assignment (917722H031)
18 pages
NLP Lab
No ratings yet
NLP Lab
18 pages
7 TextAnalysis
No ratings yet
7 TextAnalysis
3 pages
NLP Lab Codes Till Mod3
No ratings yet
NLP Lab Codes Till Mod3
7 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
Shubham Jade MSC It 31031420010 NLP Practical Journal
No ratings yet
Shubham Jade MSC It 31031420010 NLP Practical Journal
17 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
C24064 - NLP - Lab Manual
No ratings yet
C24064 - NLP - Lab Manual
28 pages
NLP Lab 2
No ratings yet
NLP Lab 2
6 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
19 pages
NLP Lab
No ratings yet
NLP Lab
7 pages
NLP Practical Journal 2023-24
No ratings yet
NLP Practical Journal 2023-24
22 pages
NLP Record
No ratings yet
NLP Record
23 pages
NLP Lab Manual for CSE Students
No ratings yet
NLP Lab Manual for CSE Students
28 pages
NLP Lab Programms
No ratings yet
NLP Lab Programms
9 pages
Viterbi POS Tagging in Python
No ratings yet
Viterbi POS Tagging in Python
26 pages
NLP Notebook
No ratings yet
NLP Notebook
20 pages
NLP Record
No ratings yet
NLP Record
15 pages
NLP Lab Manual - Final
No ratings yet
NLP Lab Manual - Final
15 pages
TSA Lab Manual New
No ratings yet
TSA Lab Manual New
14 pages
Python Text Processing Techniques
No ratings yet
Python Text Processing Techniques
13 pages
Lemmatization and POS Tagging in NLTK
No ratings yet
Lemmatization and POS Tagging in NLTK
2 pages
I041 NLP Assignment5
No ratings yet
I041 NLP Assignment5
12 pages
NLP PRGRM-1
No ratings yet
NLP PRGRM-1
7 pages
NLP Python Code Examples and Techniques
No ratings yet
NLP Python Code Examples and Techniques
16 pages
NLP Core Using NLTK: Dr. Muhammad Nouman Durrani
No ratings yet
NLP Core Using NLTK: Dr. Muhammad Nouman Durrani
42 pages
Python
No ratings yet
Python
9 pages
Week 6
No ratings yet
Week 6
1 page
1a NLTK
No ratings yet
1a NLTK
10 pages
NLP Projects
No ratings yet
NLP Projects
4 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
Natural Language Processing Practical Journal
No ratings yet
Natural Language Processing Practical Journal
27 pages
DS 7
No ratings yet
DS 7
3 pages
Python NLP Practical Exercises
No ratings yet
Python NLP Practical Exercises
14 pages
NLP Lab Manual for CSE Students
No ratings yet
NLP Lab Manual for CSE Students
45 pages
Tokenization (Breaking Text Into Words) : Import From Import From Import From Import
No ratings yet
Tokenization (Breaking Text Into Words) : Import From Import From Import From Import
11 pages
Sumati
No ratings yet
Sumati
10 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
21 pages
NLP - Cheatsheet
No ratings yet
NLP - Cheatsheet
10 pages
Natural Language Processing Journal
No ratings yet
Natural Language Processing Journal
73 pages
NLP Techniques for Text Processing
No ratings yet
NLP Techniques for Text Processing
41 pages
NLP Exp 4
No ratings yet
NLP Exp 4
2 pages
Query Expansion Using WordNet
No ratings yet
Query Expansion Using WordNet
6 pages
NLP - Exp 1 11
No ratings yet
NLP - Exp 1 11
29 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
7 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
Machine Learning for NLP: Tokenization & Features
No ratings yet
Machine Learning for NLP: Tokenization & Features
37 pages
Practicle 7-Notes
No ratings yet
Practicle 7-Notes
2 pages
All Practicals
No ratings yet
All Practicals
33 pages
123 NLP 456
No ratings yet
123 NLP 456
4 pages
Sentence Similarity Checker Code
No ratings yet
Sentence Similarity Checker Code
3 pages
Error! Unknown Document Property Name - Error! Unknown Document Property Name
No ratings yet
Error! Unknown Document Property Name - Error! Unknown Document Property Name
11 pages
Cambridge IGCSE: Chemistry 0620/21
No ratings yet
Cambridge IGCSE: Chemistry 0620/21
16 pages
Maths P1 Nov2020
No ratings yet
Maths P1 Nov2020
11 pages
Zener Diode Breakdown Mechanisms Explained
No ratings yet
Zener Diode Breakdown Mechanisms Explained
8 pages
Motion Tracking Using Kalman Filter Matlab Code
100% (2)
Motion Tracking Using Kalman Filter Matlab Code
2 pages
Internal Assignment: M.Sc. (Computer Science) - Previous
No ratings yet
Internal Assignment: M.Sc. (Computer Science) - Previous
7 pages
Syl Number Theory XXX
No ratings yet
Syl Number Theory XXX
14 pages
Vertical Intercoolers With Coil 250# DWP: TOP NP Lift Lugs
No ratings yet
Vertical Intercoolers With Coil 250# DWP: TOP NP Lift Lugs
1 page
High Speed Machining Center: Long Chang Machinery Co., LTD
No ratings yet
High Speed Machining Center: Long Chang Machinery Co., LTD
12 pages
Mud Crab Reproduction in RAS
No ratings yet
Mud Crab Reproduction in RAS
11 pages
STM32U5 Series IEC 60730 Self-Test Library User Guide
No ratings yet
STM32U5 Series IEC 60730 Self-Test Library User Guide
59 pages
Free Study Material For RPF Constable Exam 81741173643
No ratings yet
Free Study Material For RPF Constable Exam 81741173643
6 pages
Magnetostatic Maxwell's Equations
No ratings yet
Magnetostatic Maxwell's Equations
47 pages
Well Description: Lead Slurry 999 2276 10 550 Tail Slurry 212 1038 550 650
No ratings yet
Well Description: Lead Slurry 999 2276 10 550 Tail Slurry 212 1038 550 650
8 pages
Kantipur City College: Purbanchal University
No ratings yet
Kantipur City College: Purbanchal University
13 pages
Residential Building Floor Plans
No ratings yet
Residential Building Floor Plans
8 pages
SCGS Sec 3 EOY QP
No ratings yet
SCGS Sec 3 EOY QP
18 pages
Beginners Guide To Prusa MK4
No ratings yet
Beginners Guide To Prusa MK4
9 pages
Physical Chemistry Handwritten Notes
100% (1)
Physical Chemistry Handwritten Notes
52 pages
HVAC Course Outline 2021-2022
No ratings yet
HVAC Course Outline 2021-2022
3 pages
Amount of Substance HW1
No ratings yet
Amount of Substance HW1
5 pages
DC Machines: Commutation, Laws, and Calculations
No ratings yet
DC Machines: Commutation, Laws, and Calculations
2 pages
Probability
No ratings yet
Probability
2 pages
LoRa Mesh Network for Smart Cities
No ratings yet
LoRa Mesh Network for Smart Cities
12 pages
ztc1500 Crane
50% (2)
ztc1500 Crane
32 pages
1s95qsen 23-11-2021 Adl500 HW QS
No ratings yet
1s95qsen 23-11-2021 Adl500 HW QS
96 pages
GENG 222 - Chapter 1
No ratings yet
GENG 222 - Chapter 1
40 pages
Stepper Driver A - NDC 96
No ratings yet
Stepper Driver A - NDC 96
9 pages
Backend Development Course Overview
No ratings yet
Backend Development Course Overview
14 pages
Paper Id-96
No ratings yet
Paper Id-96
6 pages

NLP Lab Programs

Uploaded by

NLP Lab Programs

Uploaded by

1.

from collections import Counter

for word, count in [Link]():

pip install nltk

POS Tags defined in treebank

for sentence in tagged_sentences:

transition_counts = defaultdict(lambda: defaultdict(int))

transition_probabilities = defaultdict(lambda: defaultdict(float))

emission_probabilities = defaultdict(lambda: defaultdict(float))

def viterbi(observations, states, start_p, trans_p, emit_p):

for state in states:

for t in range(1, len(observations)):

from nltk import pos_tag, word_tokenize

def chunk_sentence(sentence, grammar):

NP: {<DT|JJ|NN.*>+} # Chunk sequences of DT, JJ, NN

print(f"Synonyms of '{word}':", synonyms)

7. Implement semantic role labeling to identify named entities (Same as Program-9)

Goto Command Prompt

Install Python 3.12.4

Set the path where your programs folder is runned

C:>>pip install spacy

C:>> python -m spacy download en_core_web_sm

C:>> python –version

C:>> pip --version

C:>> pip show spacy

C:>> pip install jupyter

C:>> jupyter notebook

• This will open the Jupyter Environment

Paste the following code in single row

# Load the spaCy model

# Process the text

# Extract named entities

# Print named entities

C:>> python test_spacy.py

!pip install tensorflow

words = [[Link]() for sentence in sentences for word in sentence]

word_to_index = {word: i + 2 for i, word in enumerate(unique_words)}

index_to_word = {i: word for word, i in word_to_index.items()}

max_len = max(len(sentence) for sentence in X)

return X, y, word_to_index, tag_to_index, index_to_word, index_to_tag

# Load data and mappings

# Split the data

# Train the model

# Evaluate the model

!pip install numpy pandas scikit-learn nltk

from [Link] import movie_reviews

# Load the movie reviews dataset from NLTK

for fileid in movie_reviews.fileids():

return [Link]({'review': reviews, 'sentiment': labels})

# Split the data into training and testing sets

# Train the model

# Make predictions on the test set

# Evaluate the model

You might also like