0% found this document useful (0 votes)

17 views6 pages

Rag Model

The document outlines a Python script that processes emails using Google Generative AI and LangChain. It includes functions for cleaning email content, extracting relevant data, appending data to an Excel file, and generating replies based on previous email context. The script also demonstrates how to handle a sample email, retrieve relevant chunks from a database, and update an Excel file with extracted information.

Uploaded by

sudarshanainwad10

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

17 views6 pages

Rag Model

Uploaded by

sudarshanainwad10

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 6

import pandas as pd

import re

import os

import google.generativeai as genai

from langchain_text_splitters import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer

from chromadb import Client

# from dotenv import load_dotenv # No longer needed if using Colab secrets

from google.colab import userdata

# Load environment variables (using Colab secrets now)

API_KEY = userdata.get("GOOGLE_API_KEY")

if not API_KEY:

raise ValueError("GOOGLE_API_KEY not found. Please set it in Colab secrets.")

genai.configure(api_key=API_KEY)

# Use the working model

model = genai.GenerativeModel('gemini-1.5-flash-latest')

# Initialize components (assuming these are already initialized from previous steps)

# Initialize the text splitter

text_splitter = RecursiveCharacterTextSplitter(

chunk_size=100,

chunk_overlap=20,

length_function=len,

is_separator_regex=False,

)
# Initialize the embedding model

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize an in-memory ChromaDB client and get the collection

client = Client()

collection = client.get_or_create_collection(name="email_chunks")

# Define cleaning, extraction, and append functions (re-defining for clarity in this block)

def clean_email_content(email_content):

"""

Cleans and formats email content by removing HTML tags and special characters.

"""

cleaned_content = re.sub(r'<.*?>', '', email_content)

cleaned_content = re.sub(r'[^a-zA-Z0-9\s.,!?;:]', '', cleaned_content)

cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()

return cleaned_content

def extract_data_for_excel(email_content):

"""

Extracts sender, subject, content summary, and contact number from email content.

(Simplified for this example, assuming sender/subject are known from the email structure)

"""

extracted_data = {

'Sender': 'N/A',

'Subject': 'N/A',

'Summary': 'N/A',

'Contact': 'N/A'

extracted_data['Summary'] = email_content[:100] + '...' if len(email_content) > 100 else

email_content
contact_match = re.search(r'[\d\s-]{7,}', email_content)

if contact_match:

extracted_data['Contact'] = contact_match.group(0).strip()

return extracted_data

def append_to_excel(data_dict, file_path):

"""

Appends extracted email data to an Excel file.

"""

# Ensure the directory exists

os.makedirs(os.path.dirname(file_path), exist_ok=True)

if not os.path.exists(file_path):

df = pd.DataFrame([data_dict])

df.to_excel(file_path, index=False)

else:

df = pd.DataFrame([data_dict])

with pd.ExcelWriter(file_path, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:

df.to_excel(writer, index=False, header=False, startrow=writer.book.active.max_row)

def search_email_chunks(query_string, n_results=3):

"""

Searches the ChromaDB collection for email chunks relevant to a query.

"""

query_embedding = embedding_model.encode(query_string).tolist()

results = collection.query(

query_embeddings=[query_embedding],

n_results=n_results,

include=['documents', 'distances', 'metadatas']

)

return results

def generate_reply(original_email_content, retrieved_chunks):

"""

Generates a suitable email reply using the original email and retrieved context.

"""

prompt = f"""Original Email:

{original_email_content}

Relevant Context from past emails:

"""

for i, chunk in enumerate(retrieved_chunks):

prompt += f"Chunk {i+1}: {chunk}\n"

prompt += "\nBased on the original email and the relevant context, generate a concise and helpful
reply."

try:

response = model.generate_content(prompt)

return response.text

except Exception as e:

print(f"Error generating content: {e}")

return "Could not generate a reply at this time."

# Integrated function to process a new email

def process_new_email(new_email_content_raw, sender, subject,

excel_file_path='extracted_email_data.xlsx'):

"""

Simulates receiving and processing a new email through the RAG workflow.

"""
print(f"Processing new email from {sender} with subject: {subject}")

try:

# 1. Clean the email content

cleaned_content = clean_email_content(new_email_content_raw)

print("Email content cleaned.")

# 2. Chunk the cleaned content (optional for RAG query, but good for adding to DB if needed)

# chunks = text_splitter.split_text(cleaned_content)

# print(f"Email content chunked into {len(chunks)} chunks.")

# 3. Retrieve relevant context from the vector database

retrieval_results = search_email_chunks(cleaned_content)

retrieved_chunks = [doc for sublist in retrieval_results.get('documents', []) for doc in sublist]

print(f"Retrieved {len(retrieved_chunks)} relevant chunks.")

# 4. Generate a reply using the language model

generated_reply = generate_reply(cleaned_content, retrieved_chunks)

print("\nGenerated Reply:")

print(generated_reply)

# 5. Extract data for Excel and append

extracted_data = extract_data_for_excel(cleaned_content)

extracted_data['Sender'] = sender # Add sender from input

extracted_data['Subject'] = subject # Add subject from input

append_to_excel(extracted_data, excel_file_path)

print(f"\nExtracted data appended to {excel_file_path}.")

return generated_reply

except Exception as e:

print(f"\nAn error occurred during email processing: {e}")

return "An error occurred during processing."

# 3. Create a sample "new" email string

sample_new_email_content = """

Hello team,

Just following up on the project update meeting. Could someone confirm the deadline for phase 2
implementation?

Also, please let me know if you need anything from my side. You can reach me at 555-1234.

Thanks,

Alice

"""

sample_sender = "Alice"

sample_subject = "Follow up on Project Update"

# 4. Call the integrated function with the sample new email

print("--- Starting New Email Processing ---")

process_new_email(sample_new_email_content, sample_sender, sample_subject)

print("--- New Email Processing Finished ---")

# 5. Optionally, read and display the updated Excel file

try:

updated_extracted_df = pd.read_excel('extracted_email_data.xlsx')

print("\nUpdated Excel File Content:")

display(updated_extracted_df)

except FileNotFoundError:

print("\nExcel file not found after processing.")

ICS Lab 1
No ratings yet
ICS Lab 1
5 pages
Python
No ratings yet
Python
7 pages
Spam Email Detection and Deletion
No ratings yet
Spam Email Detection and Deletion
5 pages
Python Email Reading & Attachment Download
No ratings yet
Python Email Reading & Attachment Download
2 pages
Coding For Bulk Email Sender
No ratings yet
Coding For Bulk Email Sender
4 pages
MailMentor Scope of Work
No ratings yet
MailMentor Scope of Work
3 pages
Quiz 2
No ratings yet
Quiz 2
11 pages
LP IV Changes
No ratings yet
LP IV Changes
7 pages
Complete Code
No ratings yet
Complete Code
6 pages
NLP - Colaboratory
No ratings yet
NLP - Colaboratory
14 pages
AI Project2024 2025format
No ratings yet
AI Project2024 2025format
26 pages
FICE Project Report Spam
No ratings yet
FICE Project Report Spam
14 pages
HappyFox Backend Assignment
100% (1)
HappyFox Backend Assignment
3 pages
Docs
No ratings yet
Docs
8 pages
Day 5 & 6 Python Internship - Ipynb - Colab
No ratings yet
Day 5 & 6 Python Internship - Ipynb - Colab
5 pages
SWconstruction 02
No ratings yet
SWconstruction 02
8 pages
Annotated Listing of The System
No ratings yet
Annotated Listing of The System
120 pages
Email Header Analyser
No ratings yet
Email Header Analyser
5 pages
DeepSeek Email Classification Overview
No ratings yet
DeepSeek Email Classification Overview
8 pages
Email Spam Filtering Using Machine Learning in Python Ex No: 1 Date: 20/6/25
No ratings yet
Email Spam Filtering Using Machine Learning in Python Ex No: 1 Date: 20/6/25
5 pages
Automate Email with Python smtplib & imaplib
No ratings yet
Automate Email with Python smtplib & imaplib
8 pages
Email Data Processing Guide
No ratings yet
Email Data Processing Guide
83 pages
Gmail
No ratings yet
Gmail
3 pages
Code
No ratings yet
Code
3 pages
Code Output
No ratings yet
Code Output
12 pages
Smart Email Assistant Assignment
No ratings yet
Smart Email Assistant Assignment
4 pages
Moxebinha
No ratings yet
Moxebinha
2 pages
Email Validation Script Overview
No ratings yet
Email Validation Script Overview
2 pages
Chatgpt Code Chat Data
No ratings yet
Chatgpt Code Chat Data
32 pages
Resumne Bulider
No ratings yet
Resumne Bulider
11 pages
AI For Business Lab Mannual
No ratings yet
AI For Business Lab Mannual
16 pages
Zoom
No ratings yet
Zoom
20 pages
Codesrepl
No ratings yet
Codesrepl
16 pages
Python Spam Mail Detection Program
No ratings yet
Python Spam Mail Detection Program
2 pages
RPA Project Guide for Students
No ratings yet
RPA Project Guide for Students
22 pages
Email Analysis and Whitelisting Script
No ratings yet
Email Analysis and Whitelisting Script
11 pages
Python Assignment Harsh Ue218122
No ratings yet
Python Assignment Harsh Ue218122
8 pages
Automation Cheat Sheet 2.0
100% (1)
Automation Cheat Sheet 2.0
6 pages
Roadmap For Building An Automated Email Sender in Python
No ratings yet
Roadmap For Building An Automated Email Sender in Python
8 pages
VVR Bpa
No ratings yet
VVR Bpa
9 pages
Python
No ratings yet
Python
4 pages
Main Py
No ratings yet
Main Py
2 pages
RAG With Reinforcement Learning
No ratings yet
RAG With Reinforcement Learning
40 pages
Abstract
No ratings yet
Abstract
2 pages
Neel
No ratings yet
Neel
12 pages
How To Build An Email Slicer Using Python Seminar 2
No ratings yet
How To Build An Email Slicer Using Python Seminar 2
3 pages
Practical List
No ratings yet
Practical List
8 pages
Email Header Analyser
No ratings yet
Email Header Analyser
5 pages
Automation Cheat Sheet 2.0
100% (1)
Automation Cheat Sheet 2.0
6 pages
AI Intern Assignment - InveeSync
No ratings yet
AI Intern Assignment - InveeSync
4 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
Automation Scripts Part1
No ratings yet
Automation Scripts Part1
13 pages
Abap Email
No ratings yet
Abap Email
4 pages
Da
No ratings yet
Da
49 pages
Document
No ratings yet
Document
11 pages
Login Page Tkinter: Widgets Used To Create Tkinter
No ratings yet
Login Page Tkinter: Widgets Used To Create Tkinter
12 pages
LLM Prcess
No ratings yet
LLM Prcess
7 pages
Requirment of Shilong State
No ratings yet
Requirment of Shilong State
32 pages
Jan 2025 - Present 6 Month: Extra Curriculum Activities &CERTIFICAIONS
No ratings yet
Jan 2025 - Present 6 Month: Extra Curriculum Activities &CERTIFICAIONS
1 page
4 - 08 - 2025 - Dos and Donts
No ratings yet
4 - 08 - 2025 - Dos and Donts
4 pages
TSIS Report
No ratings yet
TSIS Report
1 page
Crystal Growth Completed .
No ratings yet
Crystal Growth Completed .
1 page
NISM Data Science Certification Fees
No ratings yet
NISM Data Science Certification Fees
4 pages
8 - 01 - 2025 Lrmanu
No ratings yet
8 - 01 - 2025 Lrmanu
4 pages
Coursera UFXW873QKHGE
No ratings yet
Coursera UFXW873QKHGE
1 page
Polishing Process
No ratings yet
Polishing Process
2 pages
Report IIT Bombay
No ratings yet
Report IIT Bombay
6 pages
270/271 Companion Guide for MEVS
No ratings yet
270/271 Companion Guide for MEVS
38 pages
Configuration of SAP MDG, Consolidation
No ratings yet
Configuration of SAP MDG, Consolidation
34 pages
BDABook Wiley
No ratings yet
BDABook Wiley
330 pages
MEAN Stack: JavaScript Web Development Guide
No ratings yet
MEAN Stack: JavaScript Web Development Guide
33 pages
BCA Programming Lab Syllabus Overview
No ratings yet
BCA Programming Lab Syllabus Overview
9 pages
SAP E2E BI Housekeeping Guide
No ratings yet
SAP E2E BI Housekeeping Guide
11 pages
Backend Developer Naren Surya Tanuj Dasari Resume
No ratings yet
Backend Developer Naren Surya Tanuj Dasari Resume
2 pages
PL/SQL Programs for Multiplication Tables
No ratings yet
PL/SQL Programs for Multiplication Tables
33 pages
Ontology - Based Knowledge Management System and AP
No ratings yet
Ontology - Based Knowledge Management System and AP
9 pages
Kavya Srivastava Resume CPQ - 4bd610bd c085 45fb A789 Cfa5fdbcfd16
No ratings yet
Kavya Srivastava Resume CPQ - 4bd610bd c085 45fb A789 Cfa5fdbcfd16
4 pages
CH 3-Interactive SQL & Advance SQL: Performance Tuning (INBUILT Functions - String & Arithmatic Functions String Functions
No ratings yet
CH 3-Interactive SQL & Advance SQL: Performance Tuning (INBUILT Functions - String & Arithmatic Functions String Functions
40 pages
Blockchain Technology Overview
No ratings yet
Blockchain Technology Overview
2 pages
QM 20242 Cs5228 Lecture01 Introduction
No ratings yet
QM 20242 Cs5228 Lecture01 Introduction
80 pages
Chapter 4 - Hadoop Ecosystem
No ratings yet
Chapter 4 - Hadoop Ecosystem
24 pages
Module 2 - Statistical Foundations
No ratings yet
Module 2 - Statistical Foundations
108 pages
DBT Util Package
No ratings yet
DBT Util Package
14 pages
Wordpress
No ratings yet
Wordpress
4 pages
Class X IT Exam Solutions
No ratings yet
Class X IT Exam Solutions
9 pages
Expert Mathematician & Economist
No ratings yet
Expert Mathematician & Economist
1 page
Oracle DBA Notes
78% (9)
Oracle DBA Notes
17 pages
12c Dataguard Switchover Best Practices Using DGMGRL (Dataguard Broker Command Prompt)
No ratings yet
12c Dataguard Switchover Best Practices Using DGMGRL (Dataguard Broker Command Prompt)
7 pages
MySQL Update, Delete, Alter Commands
No ratings yet
MySQL Update, Delete, Alter Commands
6 pages
Introduction To Database Security: Chapter Objectives
No ratings yet
Introduction To Database Security: Chapter Objectives
32 pages
Aws General
No ratings yet
Aws General
325 pages
Android App Dev for Java Programmers
No ratings yet
Android App Dev for Java Programmers
4 pages
Document 2860935.1
No ratings yet
Document 2860935.1
2 pages
Harika Kellampalli
No ratings yet
Harika Kellampalli
1 page
CS8080 Irt Unit Ii Qbank Main
No ratings yet
CS8080 Irt Unit Ii Qbank Main
8 pages
STD - 8 Chap 3
No ratings yet
STD - 8 Chap 3
4 pages
A Comparative Study of Various Approaches To Explore Factors For Vehicle Collision
No ratings yet
A Comparative Study of Various Approaches To Explore Factors For Vehicle Collision
12 pages

Rag Model

Uploaded by

Rag Model

Uploaded by

import pandas as pd

import google.generativeai as genai

from langchain_text_splitters import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer

from chromadb import Client

# from dotenv import load_dotenv # No longer needed if using Colab secrets

from google.colab import userdata

# Load environment variables (using Colab secrets now)

raise ValueError("GOOGLE_API_KEY not found. Please set it in Colab secrets.")

# Use the working model

# Initialize the text splitter

# Initialize an in-memory ChromaDB client and get the collection

cleaned_content = re.sub(r'<.*?>', '', email_content)

cleaned_content = re.sub(r'[^a-zA-Z0-9\s.,!?;:]', '', cleaned_content)

cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()

extracted_data['Summary'] = email_content[:100] + '...' if len(email_content) > 100 else

def append_to_excel(data_dict, file_path):

Appends extracted email data to an Excel file.

# Ensure the directory exists

with pd.ExcelWriter(file_path, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:

df.to_excel(writer, index=False, header=False, startrow=writer.book.active.max_row)

def search_email_chunks(query_string, n_results=3):

Searches the ChromaDB collection for email chunks relevant to a query.

include=['documents', 'distances', 'metadatas']

def generate_reply(original_email_content, retrieved_chunks):

prompt = f"""Original Email:

Relevant Context from past emails:

for i, chunk in enumerate(retrieved_chunks):

prompt += f"Chunk {i+1}: {chunk}\n"

print(f"Error generating content: {e}")

return "Could not generate a reply at this time."

# Integrated function to process a new email

def process_new_email(new_email_content_raw, sender, subject,

# 1. Clean the email content

print("Email content cleaned.")

# print(f"Email content chunked into {len(chunks)} chunks.")

# 3. Retrieve relevant context from the vector database

retrieved_chunks = [doc for sublist in retrieval_results.get('documents', []) for doc in sublist]

print(f"Retrieved {len(retrieved_chunks)} relevant chunks.")

# 4. Generate a reply using the language model

generated_reply = generate_reply(cleaned_content, retrieved_chunks)

# 5. Extract data for Excel and append

extracted_data['Sender'] = sender # Add sender from input

extracted_data['Subject'] = subject # Add subject from input

print(f"\nExtracted data appended to {excel_file_path}.")

print(f"\nAn error occurred during email processing: {e}")

# 3. Create a sample "new" email string

sample_subject = "Follow up on Project Update"

# 4. Call the integrated function with the sample new email

print("--- Starting New Email Processing ---")

process_new_email(sample_new_email_content, sample_sender, sample_subject)

print("--- New Email Processing Finished ---")

# 5. Optionally, read and display the updated Excel file

print("\nUpdated Excel File Content:")

print("\nExcel file not found after processing.")

You might also like