0% found this document useful (0 votes)
7 views4 pages

IR Assignment5

The document is an assignment from Walchand Institute of Technology focusing on Information Retrieval, authored by Ayush Pande. It includes a Python program that implements various querying patterns for searching through documents, utilizing libraries like NLTK for text processing. The program features a graphical user interface for users to select query methods such as prefix, suffix, query match, and edit distance.

Uploaded by

vinayostwal707
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views4 pages

IR Assignment5

The document is an assignment from Walchand Institute of Technology focusing on Information Retrieval, authored by Ayush Pande. It includes a Python program that implements various querying patterns for searching through documents, utilizing libraries like NLTK for text processing. The program features a graphical user interface for users to select query methods such as prefix, suffix, query match, and edit distance.

Uploaded by

vinayostwal707
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

WALCHAND INSTITUTE OF TECHNOLOGY, SOLAPUR

INFORMATION TECHNOLOGY
2021-22 SEMESTER - II
ASSIGNMENT - 5
Subject: Information Retrieval

Name: Ayush pande Roll no: 74 Class: Final Year Btech IT

Title: Searching using various Querying Patterns

Program Code:

import os
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
def get_docs(dataset_path):
docs = []
for doc_file in os.listdir(dataset_path):
docs.append(os.path.join(dataset_path, doc_file))
return docs

dataset_path="docs"
print(dataset_path)
docs = get_docs(dataset_path)
print(docs)
corpus = []
stop_words = set(stopwords.words('english'))
for doc in docs:
with open(doc, mode='r') as f:
text = f.read()
word_tokens = word_tokenize(text)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
filtered_sentence = []
for w in word_tokens:
if (w not in stop_words) and (w not in ".,/!@#$%^&*()_+={}[]|:0123456789 ;'")and len(w)>=2:
filtered_sentence.append(w)
corpus.append(list(set(filtered_sentence)))
print(corpus)
from tkinter import *
root = Tk()
root.geometry("500x550")
root.title('Pattern Matching')
var = IntVar()
variable1=StringVar() # Value saved here
variable2=StringVar() # Value saved here
var.set(1)
def quit_loop():
print("Selection:",var.get())
global selection
global query
query=variable1.get()
selection = var.get()
result = []
listNodes = Listbox(root, width=80, height=7)
listNodes.grid(column=0, row=12)
if selection == 1:
for i in range(len(corpus)):
for m in corpus[i]:
if m.startswith(query):
print(docs[i])
result.append(docs[i])
result=list(set(result))
for i in result:
listNodes.insert(END,i)
elif selection ==2:
for i in range(len(corpus)):
for m in corpus[i]:
if m.endswith(query):
result.append(docs[i])
result = list(set(result))
for i in result:
listNodes.insert(END, i)
elif selection == 3:
for i in range(len(corpus)):
if query in corpus[i]:
result.append(docs[i])
result = list(set(result))
for i in result:
listNodes.insert(END, i)
elif selection == 4:
def show():
from nltk.metrics import edit_distance
res = []
for i in range(len(corpus)):
for m in corpus[i]:
if (int(variable2.get()) > edit_distance(query, m)):
res.append(docs[i])
res = list(set(res))
for i in res:
listNodes.insert(END, i)
Label(root, text="Enter Min, Edit Distance : ").grid(row=9, sticky=W)
Entry(root, width=30, textvariable=variable2).place(y=160, x=150)
Button(root, text="Show Results", command=show).place(y=160, x=350)
else:
print("Select Proper Button")

2
Label(root, text = "Select Query Processing Methon: ").grid(row=0, sticky=W)
Radiobutton(root, text = "Prefix", variable=var, value = 1).grid(row=1, sticky=W)
Radiobutton(root, text = "Suffix", variable=var, value = 2).grid(row=2, sticky=W)
Radiobutton(root, text = "Query Match", variable=var, value = 3).grid(row=3, sticky=W)
Radiobutton(root, text = "Edit Distance", variable=var, value = 4).grid(row=4, sticky=W)
Label(root, text = "Enter Query").grid(row=6, sticky=W)
Entry(root, width=30, textvariable=variable1).place(y=125, x=90)
Button(root, text = "Show Results", command=quit_loop).grid(row=8, sticky=W)
root.mainloop()

Screenshots/Output:

3
4

You might also like