0% found this document useful (0 votes)

4 views3 pages

NLP Week 3 prog

The document outlines the construction of a part of speech tagger using a Hidden Markov Model (HMM) and the Viterbi algorithm for decoding sentences. It includes training data consisting of (word, tag) pairs, and details the steps for counting frequencies, normalizing counts, and implementing the Viterbi algorithm. The final section provides a test example demonstrating the predicted tags for a given sentence.

Uploaded by

Bhargavi Chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views3 pages

NLP Week 3 prog

Uploaded by

Bhargavi Chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

3.

To construct a part of speech tagger using Hidden markov model and implement the viterbi algorithm to decode the
most probable sentence

# Simple POS Tagger using HMM + Viterbi

# Training data: (word, tag) pairs

training_data = [

[('the', 'DET'), ('cat', 'NOUN'), ('sat', 'VERB')],

[('the', 'DET'), ('dog', 'NOUN'), ('barked', 'VERB')],

[('a', 'DET'), ('cat', 'NOUN'), ('meowed', 'VERB')],

from collections import defaultdict

import math

# Step 1: Count frequencies

tags = set()

start_counts = defaultdict(int)

transition_counts = defaultdict(lambda: defaultdict(int))

emission_counts = defaultdict(lambda: defaultdict(int))

for sentence in training_data:

prev_tag = None

for i, (word, tag) in enumerate(sentence):

[Link](tag)

emission_counts[tag][word] += 1

if i == 0:

start_counts[tag] += 1

if prev_tag:

transition_counts[prev_tag][tag] += 1

prev_tag = tag
# Convert counts to probabilities

def normalize(counts):

total = sum([Link]())

return {k: v / total for k, v in [Link]()}

start_probs = normalize(start_counts)

transition_probs = {t: normalize(transition_counts[t]) for t in tags}

emission_probs = {t: normalize(emission_counts[t]) for t in tags}

# Step 2: Viterbi algorithm

def viterbi(sentence):

V = [{}]

path = {}

# Initialization

for tag in tags:

V[0][tag] = [Link](start_probs.get(tag, 1e-6)) + \

[Link](emission_probs[tag].get(sentence[0], 1e-6))

path[tag] = [tag]

# Recursion

for t in range(1, len(sentence)):

[Link]({})

new_path = {}

for curr_tag in tags:

(prob, best_prev) = max(

(V[t-1][prev_tag] + [Link](transition_probs[prev_tag].get(curr_tag, 1e-6)) +

[Link](emission_probs[curr_tag].get(sentence[t], 1e-6)), prev_tag)

for prev_tag in tags

V[t][curr_tag] = prob

new_path[curr_tag] = path[best_prev] + [curr_tag]

path = new_path

# Termination

(prob, final_tag) = max((V[-1][tag], tag) for tag in tags)

return path[final_tag]

# Test

test_sentence = ['the', 'cat', 'barked']

print("Sentence:", test_sentence)

print("Predicted Tags:", viterbi(test_sentence))

UNIT 1
No ratings yet
UNIT 1
31 pages
Statistical LM
No ratings yet
Statistical LM
19 pages
Unit I
No ratings yet
Unit I
15 pages
NLP U Ii
No ratings yet
NLP U Ii
25 pages

NLP Week 3 prog

Uploaded by

NLP Week 3 prog

Uploaded by

3.

# Simple POS Tagger using HMM + Viterbi

# Training data: (word, tag) pairs

[('the', 'DET'), ('cat', 'NOUN'), ('sat', 'VERB')],

[('the', 'DET'), ('dog', 'NOUN'), ('barked', 'VERB')],

[('a', 'DET'), ('cat', 'NOUN'), ('meowed', 'VERB')],

from collections import defaultdict

# Step 1: Count frequencies

transition_counts = defaultdict(lambda: defaultdict(int))

emission_counts = defaultdict(lambda: defaultdict(int))

for sentence in training_data:

for i, (word, tag) in enumerate(sentence):

return {k: v / total for k, v in [Link]()}

transition_probs = {t: normalize(transition_counts[t]) for t in tags}

emission_probs = {t: normalize(emission_counts[t]) for t in tags}

# Step 2: Viterbi algorithm

for tag in tags:

V[0][tag] = [Link](start_probs.get(tag, 1e-6)) + \

for t in range(1, len(sentence)):

for curr_tag in tags:

(prob, best_prev) = max(

(V[t-1][prev_tag] + [Link](transition_probs[prev_tag].get(curr_tag, 1e-6)) +

[Link](emission_probs[curr_tag].get(sentence[t], 1e-6)), prev_tag)

for prev_tag in tags

new_path[curr_tag] = path[best_prev] + [curr_tag]

(prob, final_tag) = max((V[-1][tag], tag) for tag in tags)

test_sentence = ['the', 'cat', 'barked']

print("Predicted Tags:", viterbi(test_sentence))

You might also like