0% found this document useful (0 votes)
24 views32 pages

QnA Corpus Tool

The document contains the implementation of a Q&A tool using a trie and a min-heap to manage and retrieve paragraphs from books based on user queries. It defines several classes including Node, minheap, and QNA_tool, which handle data structures for storing book information and processing queries. The tool allows for inserting sentences, retrieving top paragraphs related to a question, and interacting with a language model API for generating responses.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views32 pages

QnA Corpus Tool

The document contains the implementation of a Q&A tool using a trie and a min-heap to manage and retrieve paragraphs from books based on user queries. It defines several classes including Node, minheap, and QNA_tool, which handle data structures for storing book information and processing queries. The tool allows for inserting sentences, retrieving top paragraphs related to a question, and interacting with a language model API for generating responses.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 32

Node.

cpp
#include "Node.h"
#include <assert.h>

Node::Node() {
}

Node::Node(int b_code, int pg, int para, int s_no, int off){
book_code = b_code;
page = pg;
paragraph = para;
sentence_no = s_no;
offset = off;
this->left=nullptr;
this->right=nullptr;
}

Node.h
#pragma once
#include <string>
using namespace std;

class Node {
public:
Node* left;
Node* right;

int book_code;
int page;
int paragraph;
int sentence_no;
int offset;

Node();
Node(int b_code, int pg, int para, int s_no, int off);
};

QnA tool.cpp
#include <assert.h>
#include <sstream>
#include "qna_tool.h"

class minheapnode{
public:
int bookcode=0;
int pagno=0;
int parno=0;
long double frac=0;
minheapnode(int b,int p,int pr,long double f){
bookcode=b;
pagno=p;
parno=pr;
frac=f;
}
minheapnode(){}
};

class minheap{
public:
// vector<minheapnode> arr;
vector<minheapnode> heap;
int k;
minheap(int k){
heap.resize(k);
this->k=k;
}
minheap(){}

void swap(minheapnode &a, minheapnode &b) {


minheapnode temp = a;
a = b;
b = temp;
}

int getParentIndex(int i) {
return (i - 1) / 2;
}

int getLeftChildIndex(int i) {
return 2 * i + 1;
}

int getRightChildIndex(int i) {
return 2 * i + 2;
}

void heapifyUp(int index) {


if (index == 0) return; // base condition for termination of a
recursive invocation of the fnc

int parentIndex = getParentIndex(index);

if (heap[parentIndex].frac > heap[index].frac) {


swap(heap[parentIndex], heap[index]);
heapifyUp(parentIndex);
}
}
void heapifyDown(int index) {
int leftChild = getLeftChildIndex(index);
int rightChild = getRightChildIndex(index);

if (leftChild >= heap.size()) return; // No children

int minindex = index;

if (heap[minindex].frac > heap[leftChild].frac) {


minindex = leftChild;
}

if (rightChild < heap.size() && heap[minindex].frac >


heap[rightChild].frac) {
minindex = rightChild;
}

if (minindex != index) {
swap(heap[minindex], heap[index]);
heapifyDown(minindex);
}
}
minheapnode minElem(){
return heap[0];
}

minheapnode deleteMin() {
// if (heap.empty()) {
// cout << "Heap is empty!" << endl;
// return 0;
// }
minheapnode temp=heap[0];
heap[0] = heap.back();
heap.pop_back();
heapifyDown(0);
return temp;

void insert(minheapnode val) {


// if (heap.size()>k){
if (heap[0].frac<val.frac){
heap[0]=val;
heapifyDown(0);
}
// }
// else{
// heap.push_back(val); //satisfies the structural prop
// heapifyUp(heap.size() - 1);
// }
}

// void printHeap (){


// for (const auto &elem : heap) {
// cout << elem.frac << " ";
// }
// cout << endl;
// }

void makeheapempty(){
heap.clear();
}
// void insertt(minheapnode val){
// if (heap[0].frac<val.frac){
// heap[0]=val;
// heapifyDown(0);
// }
// }

};

using namespace std;


class paranode{
public:
long double frac;
paranode(){
frac=0;
}
};

class pagenode{
public:
vector<paranode*> vec;
pagenode(int paramax){
vec.resize(paramax,nullptr);
}
};

class booknode{
public:
vector<pagenode*> vec;
booknode(int pagamax){
vec.resize(pagamax,nullptr);
}
};

class hashnode{
public:
vector<booknode*> vec;
hashnode(int bookmax){
vec.resize(bookmax,nullptr);
}
};

class triehash{
public:
hashnode* root;
minheap minhp;
int bkmax;
int pgmax;
int prmax;
triehash(int bookmax,int pagemax,int paramax,int k){
root=new hashnode(bookmax);
bkmax=bookmax;
pgmax=pagemax;
prmax=paramax;
minhp=minheap(k);
}
void insert(word* wordlist){
if (wordlist==nullptr) return;
if (wordlist->head==nullptr) return;
wordnode* temp=wordlist->head->next;
long double frac=(static_cast<long
double>(wordlist->corpuscount+1))/(wordlist->csvcount+1);
while (temp!=wordlist->tail){
int bkcode=temp->bookcode;
int pgno=temp->pageno;
int parno=temp->parano;
if (!root->vec[bkcode]) root->vec[bkcode]=new booknode(pgmax);
if (!root->vec[bkcode]->vec[pgno])
root->vec[bkcode]->vec[pgno]=new pagenode(prmax);
if (!root->vec[bkcode]->vec[pgno]->vec[parno])
root->vec[bkcode]->vec[pgno]->vec[parno]=new paranode();
root->vec[bkcode]->vec[pgno]->vec[parno]->frac+=frac;
temp=temp->next;
}
}

void heapinsert(){
for (int i=0;i<bkmax;i++){
if (root->vec[i]){
for (int j=0;j<pgmax;j++){
if (root->vec[i]->vec[j]){
for (int k=0;k<prmax;k++){
if (root->vec[i]->vec[j]->vec[k]){

minhp.insert(minheapnode(i,j,k,root->vec[i]->vec[j]->vec[k]->frac));
}
}
}
}
}
}
}
Node* givelargestk(int k){
Node* temp=nullptr;
// Node*head=nullptr;
for (int i=0;i<k;i++){
minheapnode m=minhp.deleteMin();
if (temp==nullptr){
temp=new Node(m.bookcode,m.pagno,m.parno,0,0);
// temp=head;
}
else {
Node* n=new Node(m.bookcode,m.pagno,m.parno,0,0);
temp->left=n;
n->right=temp;
temp=n;
}
}
// temp->right=nullptr;
// head->left=nullptr;
return temp;
}

};

string tolower(string s){


string d="";
for (char i:s){
int k=i-'A';
if (k>=0 && k<=25) i=i+32;
d+=i;
}
return d;
}

vector<string> getmyword(string sentence){


int n=sentence.length();
vector<string> v;
string s="";
string sp=" .,-:!\"\'()?[];@";
for (char i:sentence){
bool flag=false;
for (char c:sp){
if (c==i){
flag=true;
if (s!="") {
s=tolower(s);
v.push_back(s);
}
s="";
break;
}
}
if (!flag) s+=i;
}
if (s!=""){
s=tolower(s);
v.push_back(s);
}
return v;

QNA_tool::QNA_tool(){
// Implement your function here
Dict dict;
bmax=0;
pgmaxx=0;
prmaxx=0;

QNA_tool::~QNA_tool(){
// Implement your function here
}

void QNA_tool::insert_sentence(int book_code, int page, int paragraph, int


sentence_no, string sentence){
// Implement your function here
dict.insert_sentence(book_code, page, paragraph,
sentence_no,sentence);
pgmaxx=max(page,pgmaxx);
bmax=max(bmax,book_code);
prmaxx=max(prmaxx,paragraph);
//
searchengine.insert_sentence(book_code,page,paragraph,sentence_no,sentence
);
return;
}

Node* QNA_tool::get_top_k_para(string question, int k) {


// Implement your function here
vector<string> qvec=getmyword(question);
triehash* updatehash=new triehash(bmax+3,pgmaxx+3,prmaxx+3,k);
for (string i:qvec){
updatehash->insert(dict.get_word_count(i));
}
updatehash->heapinsert();
return updatehash->givelargestk(k);
}

Node* QNA_tool::get_top_k_modified_para(string question,int k){


vector<string> qvec=getmyword(question);
triehash* updatehash=new triehash(bmax+3,pgmaxx+3,prmaxx+3,k);
for (string i:qvec){
if (!dict.is_unwanted(i)){
updatehash->insert(dict.get_word_count(i));
}
}
updatehash->heapinsert();
return updatehash->givelargestk(k);
}

void QNA_tool::query(string question, string filename){


// Implement your function here
std::cout << "Q: " << question << std::endl;
std::cout << "A: " << "Studying COL106 :)" << std::endl;
Node* root=get_top_k_modified_para(question,5);

query_llm(filename,root,5,"sk-5DcOuSVBYXluGzkJUutRT3BlbkFJNKtruXeIUJbE2UP2
fUlb",question);
return;
}

std::string QNA_tool::get_paragraph(int book_code, int page, int


paragraph){

cout << "Book_code: " << book_code << " Page: " << page << "
Paragraph: " << paragraph << endl;

std::string filename = "mahatma-gandhi-collected-works-volume-";


filename += to_string(book_code);
filename += ".txt";

std::ifstream inputFile(filename);

std::string tuple;
std::string sentence;

if (!inputFile.is_open()) {
std::cerr << "Error: Unable to open the input file " << filename
<< "." << std::endl;
exit(1);
}

std::string res = "";


while (std::getline(inputFile, tuple, ')') && std::getline(inputFile,
sentence)) {
// Get a line in the sentence
tuple += ')';

int metadata[5];
std::istringstream iss(tuple);

// Temporary variables for parsing


std::string token;

// Ignore the first character (the opening parenthesis)


iss.ignore(1);

// Parse and convert the elements to integers


int idx = 0;
while (std::getline(iss, token, ',')) {
// Trim leading and trailing white spaces
size_t start = token.find_first_not_of(" ");
size_t end = token.find_last_not_of(" ");
if (start != std::string::npos && end != std::string::npos) {
token = token.substr(start, end - start + 1);
}

// Check if the element is a number or a string


if (token[0] == '\'') {
// Remove the single quotes and convert to integer
int num = std::stoi(token.substr(1, token.length() - 2));
metadata[idx] = num;
} else {
// Convert the element to integer
int num = std::stoi(token);
metadata[idx] = num;
}
idx++;
}

if(
(metadata[0] == book_code) &&
(metadata[1] == page) &&
(metadata[2] == paragraph)
){
res += sentence;
}
}

inputFile.close();
return res;
}

void QNA_tool::query_llm(string filename, Node* root, int k, string


API_KEY, string question){

// first write the k paragraphs into different files

Node* traverse = root;


int num_paragraph = 0;

while(num_paragraph < k){


assert(traverse != nullptr);
string p_file = "paragraph_";
p_file += to_string(num_paragraph);
p_file += ".txt";
// delete the file if it exists
remove(p_file.c_str());
ofstream outfile(p_file);
string paragraph = get_paragraph(traverse->book_code,
traverse->page, traverse->paragraph);
assert(paragraph != "$I$N$V$A$L$I$D$");
outfile << paragraph;
outfile.close();
traverse = traverse->right;
num_paragraph++;
}

// write the query to query.txt


ofstream outfile("query.txt");
outfile << "These are the excerpts from Mahatma Gandhi's books.\nOn
the basis of this, ";
outfile << question;
// You can add anything here - show all your creativity and skills of
using ChatGPT
outfile.close();

// you do not need to necessarily provide k paragraphs - can configure


yourself

// python3 <filename> API_KEY num_paragraphs query.txt


string command = "python ";
command += filename;
command += " ";
command += API_KEY;
command += " ";
command += to_string(k);
command += " ";
command += "query.txt";

system(command.c_str());
return;
}
// #undef int

Qna_tool.h
#pragma once
#include <iostream>
#include <fstream>
#include "Node.h"
#include "dict.h"

using namespace std;

class QNA_tool {

private:

// You are free to change the implementation of this function


void query_llm(string filename, Node* root, int k, string API_KEY,
string question);
// filename is the python file which will call ChatGPT API
// root is the head of the linked list of paragraphs
// k is the number of paragraphs (or the number of nodes in linked
list)
// API_KEY is the API key for ChatGPT
// question is the question asked by the user

// You can add attributes/helper functions here


Dict dict;
int bmax;
int pgmaxx;
int prmaxx;

public:

/* Please do not touch the attributes and


functions within the guard lines placed below */
/* ------------------------------------------- */

QNA_tool(); // Constructor
~QNA_tool(); // Destructor

void insert_sentence(int book_code, int page, int paragraph, int


sentence_no, string sentence);
// This function is similar to the functions in dict and search
// The corpus will be provided to you via this function
// It will be called for each sentence in the corpus

Node* get_top_k_para(string question, int k);


// This function takes in a question, preprocess it
// And returns a list of paragraphs which contain the question
// In each Node, you must set: book_code, page, paragraph (other
parameters won't be checked)

std::string get_paragraph(int book_code, int page, int paragraph);


// Given the book_code, page number, and the paragraph number, returns
the string paragraph.
// Searches through the corpus.

void query(string question, string filename);


// This function takes in a question and a filename.
// It should write the final answer to the specified filename.
/* -----------------------------------------*/
/* Please do not touch the code above this line */
Node* get_top_k_modified_para(string question, int k);
// You can add attributes/helper functions here
};

Search.cpp
// Do NOT add any other includes
#include "search.h"

SearchEngine::SearchEngine(){
// Implement your function here
}

SearchEngine::~SearchEngine(){
// Implement your function here
}

void SearchEngine::insert_sentence(int book_code, int page, int paragraph,


int sentence_no, string sentence){
// Implement your function here
return;
}

Node* SearchEngine::search(string pattern, int& n_matches){


// Implement your function here
return nullptr;
}

Search.h
// Do NOT add any other includes
#include <string>
#include <vector>
#include <iostream>
#include "Node.h"
using namespace std;

class SearchEngine {
private:
// You can add attributes/helper functions here

public:
/* Please do not touch the attributes and
functions within the guard lines placed below */
/* ------------------------------------------- */
SearchEngine();

~SearchEngine();

void insert_sentence(int book_code, int page, int paragraph, int


sentence_no, string sentence);

Node* search(string pattern, int& n_matches);

/* -----------------------------------------*/
};

Tester.cpp
#include<bits/stdc++.h>
#include "Node.h"
#include "qna_tool.h"

using namespace std;

int main(){

QNA_tool qna_tool;

int num_books = 98;

for(int i = 1; i <= num_books; i++){

std::cout << "Inserting book " << i << std::endl;

std::string filename = "mahatma-gandhi-collected-works-volume-";


filename += to_string(i);
filename += ".txt";
std::ifstream inputFile(filename);

if (!inputFile.is_open()) {
std::cerr << "Error: Unable to open the input file
mahatma-gandhi." << std::endl;
return 1;
}

std::string tuple;
std::string sentence;

while (std::getline(inputFile, tuple, ')') &&


std::getline(inputFile, sentence)) {
// Get a line in the sentence
tuple += ')';

std::vector<int> metadata;
std::istringstream iss(tuple);

// Temporary variables for parsing


std::string token;

// Ignore the first character (the opening parenthesis)


iss.ignore(1);

// Parse and convert the elements to integers


while (std::getline(iss, token, ',')) {
// Trim leading and trailing white spaces
size_t start = token.find_first_not_of(" ");
size_t end = token.find_last_not_of(" ");
if (start != std::string::npos && end !=
std::string::npos) {
token = token.substr(start, end - start + 1);
}

// Check if the element is a number or a string


if (token[0] == '\'') {
// Remove the single quotes and convert to integer
int num = std::stoi(token.substr(1, token.length() -
2));
metadata.push_back(num);
} else {
// Convert the element to integer
int num = std::stoi(token);
metadata.push_back(num);
}
}

// Now we have the string in sentence


// And the other info in metadata
// Add to the dictionary

// Insert in the qna_tool


qna_tool.insert_sentence(metadata[0], metadata[1],
metadata[2], metadata[3], sentence);
}

inputFile.close();

string question ;

/*
Part-1 : Get the top-k paragraphs
Checking Correctness
*/

// Let's try to ask a simple question to the qna_tool


// Node* head = qna_tool.get_top_k_modified_para(question, 5);

// vector<string> paras;
// while(head != nullptr){
// string res = qna_tool.get_paragraph(head->book_code,
head->page, head->paragraph);
// paras.push_back(res);
// head = head->right;
// }
// for(int i = 0; i < (int)paras.size(); i++){
// cout << paras[i] << endl << endl << endl;
// }

/*
Part-2 : Query the LLM
*/
while(getline(cin,question))
{qna_tool.query(question, "api_call.py");}

return 0;
}

Dict.cpp
// Do NOT add any other includes
#include "dict.h"

string tolowerr(string s){


string d="";
for (char i:s){
int k=i-'A';
if (k>=0 && k<=25) i=i+32;
d+=i;
}
return d;
}

vector<string> getword(string sentence){


int n=sentence.length();
vector<string> v;
string s="";
string sp=" .,-:!\"\'()?[];@";
for (char i:sentence){
bool flag=false;
for (char c:sp){
if (c==i){
flag=true;
if (s!="") v.push_back(s);
s="";
break;
}
}
if (!flag) s+=i;
}
if (s!="") v.push_back(s);
return v;

Dict::Dict(){}

Dict::~Dict(){
sr.~trie();
}

void Dict::insert_sentence(int book_code, int page, int paragraph, int


sentence_no, string sentence){
vector<string> v=getword(sentence);
for (string i:v){
i=tolowerr(i);
sr.insert(book_code,page,paragraph,i);
}
return;
}

// int Dict::get_word_count(string wd){


// word* temp=sr.search(wd);
// return temp->corpuscount;
// }

word* Dict::get_word_count(string wd){


return sr.search(wd);
}
void Dict::dump_dictionary(string filename){
// sr.dump(filename);
return;
}
bool Dict::is_unwanted(string i){
return sr.uwords->search(i);
}

Dict.h
// Do NOT add any other includes
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <fstream>
using namespace std;
/*.................................*/
class uwnode{
public:
bool endofword;
uwnode* ary[53];
uwnode(){
endofword=false;
for (int i=0;i<53;i++){
ary[i]=nullptr;
}
}
~uwnode(){
for (int i=0;i<53;i++){
if (ary[i]) delete ary[i];
}
}
};

class uwtrie{
public:
uwnode* root;
uwtrie(){
root=new uwnode();
}
int asciii(char c){
int k=c;
if (k>=35 && k<=38) return k-35;
else if (k==42 || k==43) return k-38;
else if (k>=47 && k<=57) return k-41;
else if (k>=60 && k<=63) return k-43;
else if (k==92) return 21;
else if (k>=94 && k<=122) return k-72;
else if (k==124) return 51;
else return 52;
}
int num(int k){
if (k>=0 && k<=3) return k+35;
else if (k==4 || k==5) return k+38;
else if (k>=6 && k<=16) return k+41;
else if (k>=17 && k<=20) return k+43;
else if (k==21) return 92;
else if (k>=22 && k<=50) return k+72;
else if (k==51) return 124;
else return 126;

}
void insert(string s) {
uwnode* temp = root;
for (int c=0;c<s.size();c++) {
int k = asciii(s[c]);
if (!temp->ary[k]) {
temp->ary[k] = new uwnode();
}
temp = temp->ary[k];
}
temp->endofword = true;
}
bool search(string s) {
uwnode* temp = root;
for (char c : s) {
int k = asciii(c);
if (temp==nullptr || !temp->ary[k]) return false;
temp = temp->ary[k];
}
return temp->endofword;
}

// long long stringToInt(const string& str) {


// long long result = 0;
// size_t i = 0;
// while (i < str.size() && isdigit(str[i])) {
// result = result * 10 + (str[i] - '0');
// ++i;
// }
// return result;
// }

void read_file(string file_name){


ifstream file(file_name);
string word;
// string f;
// getline(file,f);
while (file>>word) {
// istringstream iss(line);
// string word, countStr;
// file>>f;
// Tokenize the line using comma as a delimiter
// getline(iss, word);
// getline(iss, countStr, ',');

// Convert count from string to integer


// long long count = stringToInt(countStr);
insert(word);
}
file.close();
}
uwnode* khtm(uwnode* &root){
if (!root) return nullptr;
for (int i=0;i<53;i++){
root->ary[i]=khtm(root->ary[i]);
}
delete root;
root=nullptr;
return nullptr;
}

~uwtrie(){
root=khtm(root);
}
};

class csvtrienode {
public:
bool endofword;
long long count;
csvtrienode* arr[53];

csvtrienode() : endofword(false) ,count(0) {


for (int i=0;i<53;i++){
arr[i]=nullptr;
}
}
~csvtrienode(){
for (int i=0;i<53;i++){
if (arr[i]) delete arr[i];
}
}
};
class csvtrie {
public:
csvtrienode* root;
csvtrie() : root(new csvtrienode()) {}

int asciii(char c){


int k=c;
if (k>=35 && k<=38) return k-35;
else if (k==42 || k==43) return k-38;
else if (k>=47 && k<=57) return k-41;
else if (k>=60 && k<=63) return k-43;
else if (k==92) return 21;
else if (k>=94 && k<=122) return k-72;
else if (k==124) return 51;
else return 52;
}

int num(int k){


if (k>=0 && k<=3) return k+35;
else if (k==4 || k==5) return k+38;
else if (k>=6 && k<=16) return k+41;
else if (k>=17 && k<=20) return k+43;
else if (k==21) return 92;
else if (k>=22 && k<=50) return k+72;
else if (k==51) return 124;
else return 126;

void insert(string s,long long count) {


csvtrienode* temp = root;
for (int c=0;c<s.size();c++) {
int k = asciii(s[c]);
if (!temp->arr[k]) {
temp->arr[k] = new csvtrienode();
}
temp = temp->arr[k];
}
temp->endofword = true;
temp->count = count;
}
long long search(string s) {
csvtrienode* temp = root;
for (char c : s) {
int k = asciii(c);
if (temp==nullptr || !temp->arr[k]) return 0;
temp = temp->arr[k];
}
if (!temp->endofword) return 0;
return temp->count;
}

long long stringToInt(const string& str) {


long long result = 0;
size_t i = 0;
while (i < str.size() && isdigit(str[i])) {
result = result * 10 + (str[i] - '0');
++i;
}
return result;
}

void read_file(string file_name){


ifstream file(file_name);
string line;
string f;
getline(file,f);
while (getline(file,line)) {
istringstream iss(line);
string word, countStr;

// Tokenize the line using comma as a delimiter


getline(iss, word, ',');
getline(iss, countStr, ',');

// Convert count from string to integer


long long count = stringToInt(countStr);
insert(word,count);
}
file.close();
}
csvtrienode* khtm(csvtrienode* &root){
if (!root) return nullptr;
for (int i=0;i<53;i++){
root->arr[i]=khtm(root->arr[i]);
}
delete root;
root=nullptr;
return nullptr;
}

~csvtrie(){
root=khtm(root);
}
};

/*.....................................*/
/*..........................................*/
class wordnode{
public:
int bookcode;
int pageno;
int parano;
wordnode* next;
wordnode* prev;
wordnode(int book,int page,int par){
bookcode=book;
pageno=page;
parano=par;
next=nullptr;
prev=nullptr;
}
wordnode(){
bookcode=-1;
parano=-1;
pageno=-1;
next=nullptr;
prev=nullptr;
}
};

class word{
public:
string myword;
long long corpuscount;
long long csvcount;
wordnode* head;
wordnode* tail;
word(string mword,long long csvcou){
corpuscount=0;
csvcount=csvcou;
myword=mword;
head=new wordnode();
tail=new wordnode();
head->next=tail;
tail->prev=head;
}
void insert(int book,int pg,int par){
wordnode* n=new wordnode(book,pg,par);
tail->prev->next=n;
n->prev=tail->prev;
tail->prev=n;
n->next=tail;
corpuscount++;
}
~word(){
wordnode* temp=head->next;
while (temp!=tail){
wordnode* newtemp=temp;
temp=temp->next;
delete newtemp;
}
delete tail;
delete head;
}

};

class trienode{
public:
bool endofword;
word* wordlist;
trienode* arr[53];
trienode() : endofword(false) {
wordlist=nullptr;
for (int i=0;i<53;i++){
arr[i]=nullptr;
}
}
~trienode(){
for (int i=0;i<53;i++){
if (arr[i]!=nullptr){
if (wordlist!=nullptr){
delete wordlist;
}
delete arr[i];
}
}
}
};

class trie{
public:
trienode* root;
csvtrie* csv;
uwtrie* uwords;
trie(){
root=new trienode();
csv=new csvtrie();
uwords=new uwtrie();
csv->read_file("unigram_freq.csv");
uwords->read_file("unwanted_words.txt");
}

int asciii(char c){


int k=c;
if (k>=35 && k<=38) return k-35;
else if (k==42 || k==43) return k-38;
else if (k>=47 && k<=57) return k-41;
else if (k>=60 && k<=63) return k-43;
else if (k==92) return 21;
else if (k>=94 && k<=122) return k-72;
else if (k==124) return 51;
else return 52;
}

int num(int k){


if (k>=0 && k<=3) return k+35;
else if (k==4 || k==5) return k+38;
else if (k>=6 && k<=16) return k+41;
else if (k>=17 && k<=20) return k+43;
else if (k==21) return 92;
else if (k>=22 && k<=50) return k+72;
else if (k==51) return 124;
else return 126;
}

void insert(int book_code, int page, int paragraph,string s){


trienode* temp=root;
for (int c=0;c<s.size();c++) {
int k = asciii(s[c]);
if (!temp->arr[k]) {
temp->arr[k] = new trienode();
}
temp = temp->arr[k];
}
if (temp->endofword){
// temp->wordlist->corpuscount++;
temp->wordlist->insert(book_code,page,paragraph);
}
else{
temp->endofword=true;
temp->wordlist=new word(s,csv->search(s));
// temp->wordlist->corpuscount++;
temp->wordlist->insert(book_code,page,paragraph);
}
}
word* search(string s){
trienode* temp = root;
for (char c : s) {
int k = asciii(c);
if (temp==nullptr || !temp->arr[k]) return 0;
temp = temp->arr[k];
}
if (!temp->endofword) return nullptr;
return temp->wordlist;
}
trienode* khtm(trienode* &root){
if (!root) return nullptr;
for (int i=0;i<53;i++){
root->arr[i]=khtm(root->arr[i]);
}
delete root;
root=nullptr;
return nullptr;
}
~trie(){
root=khtm(root);
// csv->~csvtrie();
}
};

/*............................................*/

class Dict {
private:
// You can add attributes/helper functions here
trie sr;

public:
/* Please do not touch the attributes and
functions within the guard lines placed below */
/* ------------------------------------------- */
Dict();

~Dict();

void insert_sentence(int book_code, int page, int paragraph, int


sentence_no, string sentence);

word* get_word_count(string wd);

void dump_dictionary(string filename);

/* -----------------------------------------*/
bool is_unwanted(string i);
};

code uses only 4 files


1. Node.cpp
2. dict.cpp
3. qna_tool.cpp
4. tester.cpp
for part 1 one should comment out the part 2 code in tester which include
lastline i.e qna_tool.query(question, "api_call.py");

now set the question along with total para need


you will receive topk para list along .txt file of paragraphs

for part 2 one should comment out whole part 1 code in tester and set
the question and diretly run these files you will find para along with chatgpt
answer on console

for runnig in both part write g++ Node.cpp dict.cpp qna_tool.cpp tester.cpp -o f
and then ./f

You might also like