I want to parse a data file

Is chatgpt has the ability to parse a file?
I mean by that I can tell him what to do and he read the file and do it.

Why do you need to include all job postings in the same context? :thinking:

I did something like that with davinci back in the day, here’s the approximate workflow:

  1. tear the CV apart to determine capabilities (c)
  2. for each job posting, slice it into requirements (r)
  3. use either embeddings or the model to set up the matrix M = f(c, r) = c_i ~~ c_j
  4. gather \mathbf{v} = \displaystyle \bigvee_{j} M_{ij}, (it’s just \mathbf{v} = [r_i \in \mathbf{c}]), get the sum of that, and normalize it by |\mathbf{r}|. That’s your acceptance likelihood score. Transpose M for your personal skill match score. Take your top 30 of your favorite and bob’s your uncle. You can use some TF-IDF or something to merge them if you like. But I didn’t explore that too much, I just took the acceptance likelihood.
I found some prompt code for nr 2, surprisingly
import { Configuration, OpenAIApi } from 'openai';
import { defineSecret } from 'firebase-functions/v2/params'
//import { OpenAIApiAxiosParamCreator } from 'openai/dist/api';
//import axios from 'axios';

const postamblev4 = `


## Given the information above, fill the following JSON object. It's ok to have empty arrays. For key concepts, if there is a mention of 'something similar', include a word that describes all concepts that are meant. Don't include whitespaces in JSON output.
{
"Job Title": string,
"Company": string,
"Job Post Date": string,
"Pay Range": string,
"Location": string ,
"Remote Job": "yes" | "no" | "unknown",
"Job Responsibilities": {"description": string, "key concepts": string[]}[],
"Required Experience": {"description": string, "key concepts": string[], "years"?: number}[],
"Preferred Experience": {"description": string, "key concepts": string[], "years"?: number}[],
"Required Education": {"description": string, "key concepts": string[]}[],
"Preferred Education": {"description": string, "key concepts": string[]}[],
"Required Certifications": {"description": string, "key concepts": string[]}[],
"Preferred Certifications":  {"description": string, "key concepts": string[]}[]
}

`;
type Postamblev4 = {
    "Job Title": string,
    "Company": string,
    "Job Post Date": string,
    "Pay Range": string,
    "Location": string ,
    "Remote Job": "yes" | "no" | "unknown",
    "Job Responsibilities": {"description": string, "key concepts": string[]}[],
    "Required Experience": {"description": string, "key concepts": string[], "years"?: number}[],
    "Preferred Experience": {"description": string, "key concepts": string[], "years"?: number}[],
    "Required Education": {"description": string, "key concepts": string[]}[],
    "Preferred Education": {"description": string, "key concepts": string[]}[],
    "Required Certifications": {"description": string, "key concepts": string[]}[],
    "Preferred Certifications":  {"description": string, "key concepts": string[]}[]
}


const OPENAI_PARSEJOBDESCRIPTION_API_KEY = defineSecret('OPENAI_PARSEJOBDESCRIPTION_API_KEY');

export const parseJobDescription1 = async (jobDescription: string, status: (update: string)=>void = ()=>{}): Promise<object> => {
    let cost = 0;

    // if failed to load api key, return empty object
    if(!OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()){
        return {cost: 1, error: "failed to load api key"};
    }
    
    const configuration = new Configuration({apiKey: OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()});
    const openai = new OpenAIApi(configuration);

    status("parsing responsibilities");


    const responsibilities = await openai.createCompletion({
        model: "text-davinci-003",
        prompt: "# This is a web scrape of job description\n\n" + jobDescription + "\n\n\n# In an itemized list, what are the job responsibilities? write them in past tense\n\n",
        temperature: 0.7,
        max_tokens: 1024,
        top_p: 1,
        frequency_penalty: 0,
        presence_penalty: 0,
    });


    status("finished parsing responsibilities");
    
    cost += responsibilities.data.usage?.total_tokens || 4000;

    return {
        responsibilities: responsibilities.data,
        cost: cost,
    };

}

export const parseJobDescription2 = async (jobDescription: string, status: (update: string)=>void = ()=>{}): Promise<{
    cost: number,
    error?: string,
    parsedData?: Postamblev4,
    data?: object,
    model?: string
}> => {
    let cost = 0;

    // if failed to load api key, return empty object
    if(!OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()){
        return {cost: 1, error: "failed to load api key"};
    }
    
    const configuration = new Configuration({apiKey: OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()});
    const openai = new OpenAIApi(configuration);

    status("parsing job description 2");

    const preamble = `## This is a web scrape of job description
    
    `;

    const cleaner = (text: string) : string => {
        // remove all #'s if they're the first non whitespace characters on a line
        text = text.replace(/^(\s*)#*/gm, "$1");

        // remove all ##'s if they follow 5 or more spaces
        text = text.replace(/(\s{2,})##*/gm, "$1--");

        return text;
    }

//    const postamblev1 = `
//
//    ## Given the information above, fill the following JSON object. It's ok to have empty arrays. For keywords, if there is a mention of "something similar", include a word that describes all concepts that are meant.
//    {
//        "Job Title": string,
//        "Company": string,
//        "Job Post Date": string,
//        "Pay Range": string,
//        "Location": string ,
//        "Remote Job": "yes" | "no" | "unknown",
//        "Job Responsibilities": {"description": string, "keywords": string[]}[],
//        "Required Experience": {"description": string, "keywords": string[], "years"?: number}[], // don't include years in description
//        "Preferred Experience": {"description": string, "keywords": string[], "years"?: number}[], // don't include years in description
//        "Required Education": {"description": string, "keywords": string[]}[],
//        "Preferred Education": {"description": string, "keywords": string[]}[],
//        "Required Certifications": {"description": string, "keywords": string[]}[],
//        "Preferred Certifications":  {"description": string, "keywords": string[]}[]
//    }
//    
//    `;

// @ts-ignore
    const postamblev2 = `


## Given the information above, fill the following JSON object. It's ok to have empty arrays. For keywords, if there is a mention of "something similar", include a word that describes all concepts that are meant. Don't include whitespaces in JSON output.
{
"Job Title": string,
"Company": string,
"Job Post Date": string,
"Pay Range": string,
"Location": string ,
"Remote Job": "yes" | "no" | "unknown",
"Job Responsibilities": {"description": string, "keywords": string[]}[],
"Required Experience": {"description": string, "keywords": string[], "years"?: number}[],
"Preferred Experience": {"description": string, "keywords": string[], "years"?: number}[],
"Required Education": {"description": string, "keywords": string[]}[],
"Preferred Education": {"description": string, "keywords": string[]}[],
"Required Certifications": {"description": string, "keywords": string[]}[],
"Preferred Certifications":  {"description": string, "keywords": string[]}[]
}

`;  

// underscores dramatically increase the token count in output
// @ts-ignore
const postamblev3 = `


## Given the information above, fill the following JSON object. It's ok to have empty arrays. For keywords, if there is a mention of "something similar", include a word that describes all concepts that are meant. Don't include whitespaces in JSON output.
{
"Job_Title": string,
"Company": string,
"Job_Post Date": string,
"Pay_Range": string,
"Location": string ,
"Remote_Job": "yes" | "no" | "unknown",
"Job_Responsibilities": {"description": string, "key_concepts": string[]}[],
"Required_Experience": {"description": string, "key_concepts": string[], "years"?: number}[],
"Preferred_Experience": {"description": string, "key_concepts": string[], "years"?: number}[],
"Required_Education": {"description": string, "key_concepts": string[]}[],
"Preferred_Education": {"description": string, "key_concepts": string[]}[],
"Required_Certifications": {"description": string, "key_concepts": string[]}[],
"Preferred_Certifications":  {"description": string, "key_concepts": string[]}[]
}

`;  



    const response = await openai.createCompletion({
        model: "text-davinci-003",
        prompt: preamble + cleaner(jobDescription) + postamblev4,
        temperature: 0.7,
        max_tokens: 2000,
        top_p: 1,
        frequency_penalty: 0,
        presence_penalty: 0,
    });

    status("finished parsing jobdescription 2.4");
    
    cost += response.data.usage?.total_tokens || 4000;

    
    // parse the data for downstream ops
    let parsedData: Postamblev4;
    try{
        // @ts-ignore
        parsedData = JSON.parse(response.data.choices[0].text);
    }catch(e){
        console.warn("failed to re-parse job description 2.4", e, response.data.choices[0].text);
        return {cost: 1, error: "failed to parse output", data: response.data};
    }

    return {
        parsedData: parsedData,
        data: response.data,
        cost: cost,
        model: "parseJobDescription2.4"
    };

}

export const pastTense1 = async (phraseList: string[], status: (update: string)=>void = ()=>{}): Promise<{phrases: string[], data?: object, cost: number, error?: string, model?: string}> => {
    let cost = 0;

    // if failed to load api key, return empty object
    if(!OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()){
        return {cost: 1, error: "failed to load api key", phrases: []};
    }
    
    const configuration = new Configuration({apiKey: OPENAI_PARSEJOBDESCRIPTION_API_KEY.value()});
    const openai = new OpenAIApi(configuration);

    status("past tense 1");

    const preamble = `## Here's a list of strings found in a job description.

 `;

    const cleaner = (text: string) : string => {
        // remove all #'s if they're the first non whitespace characters on a line
        text = text.replace(/^(\s*)#*/gm, "$1");

        // remove all ##'s if they follow 5 or more spaces
        text = text.replace(/(\s{2,})##*/gm, "$1--");

        return text;
    }

const pasttensifyPostamblev1 = `

## Rewrite them as past tense sentences, so it appears that something was done as part of a job. example:  "2+ years experience with Big Data, Cloud Native Platforms" => "Worked with Big Data and Cloud Native Platforms"

["`

    
    const response = await openai.createCompletion({
        model: "text-davinci-003",
        prompt: preamble + cleaner(JSON.stringify(phraseList)) + pasttensifyPostamblev1,
        temperature: 0.7,
        max_tokens: 2000,
        top_p: 1,
        frequency_penalty: 0,
        presence_penalty: 0,
    });

    status("finished past tense 1");
    
    cost += response.data.usage?.total_tokens || 4000;

    
    // parse the data for downstream ops
    let parsedData: string[];
    try{
        // @ts-ignore
        parsedData = JSON.parse('["'+response.data.choices[0].text);
    }catch(e){
        console.warn("failed to re-parse job description 2.4", e, response.data.choices[0].text);
        return {cost: 1, error: "failed to parse output", data: response.data, phrases: []};
    }

    return {
        phrases: parsedData,
        data: response.data,
        cost: cost,
        model: "pasttensify1"
    };

}

HTH, gl!

1 Like