0% found this document useful (0 votes)

62 views10 pages

Conflation Algorithm - IsR Experiments

Uploaded by

mypratik24

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

62 views10 pages

Conflation Algorithm - IsR Experiments

Uploaded by

mypratik24

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

ISR experiments

write a code in go language to implement conflation algorithm. Accept input as a random 10

lines of any text. Define stop words as words of high frequency appearing in regular text and all
punctuation symbols. Remove stop words, ten apply stemming on words, then replace
synonyms with most common word. Write code and simulate output)

package main

import (
"fmt"
"regexp"
"strings"
)

// ConflationProcessor handles text conflation operations

type ConflationProcessor struct {
stopWords map[string]bool
stemRules map[string]string
synonyms map[string]string
punctuation *[Link]
}

// NewConflationProcessor creates a new processor with predefined rules

func NewConflationProcessor() *ConflationProcessor {
// Define common stop words
stopWords := map[string]bool{
"the": true, "a": true, "an": true, "and": true, "or": true, "but": true,
"in": true, "on": true, "at": true, "to": true, "for": true, "of": true,
"with": true, "by": true, "is": true, "are": true, "was": true, "were": true,
"be": true, "been": true, "being": true, "have": true, "has": true, "had":
true,
"do": true, "does": true, "did": true, "will": true, "would": true, "could":
true,
"should": true, "may": true, "might": true, "must": true, "can": true,
"this": true, "that": true, "these": true, "those": true, "i": true, "you": true,
"he": true, "she": true, "it": true, "we": true, "they": true, "me": true,
"him": true, "her": true, "us": true, "them": true, "my": true, "your": true,
"his": true, "its": true, "our": true, "their": true, "not": true, "no": true,
"so": true, "if": true, "when": true, "where": true, "how": true, "what": true,
"who": true, "which": true, "why": true, "as": true, "than": true, "then":
true,
"now": true, "here": true, "there": true, "up": true, "down": true, "out":
true,
"off": true, "over": true, "under": true, "again": true, "further": true,
"once": true, "more": true, "most": true, "other": true, "some": true,
"such": true,
"own": true, "same": true, "very": true, "just": true, "about": true, "into":
true,
"through": true, "during": true, "before": true, "after": true, "above": true,
"below": true, "between": true, "both": true, "each": true, "few": true,
"only": true,
"too": true, "any": true, "because": true, "until": true, "while": true, "from":
true,
}

// Simple stemming rules (suffix removal)

stemRules := map[string]string{
"ing": "",
"ed": "",
"er": "",
"est": "",
"ly": "",
"tion": "",
"sion": "",
"ness": "",
"ment": "",
"able": "",
"ible": "",
"ful": "",
"less": "",
"ous": "",
"ive": "",
"ize": "",
"ise": "",
"s": "", // simple plural
}

// Synonym mapping (map synonyms to most common word)

synonyms := map[string]string{
"big": "large",
"huge": "large",
"enormous": "large",
"gigantic": "large",
"massive": "large",
"small": "little",
"tiny": "little",
"minute": "little",
"miniature": "little",
"good": "great",
"excellent": "great",
"fantastic": "great",
"wonderful": "great",
"amazing": "great",
"bad": "terrible",
"awful": "terrible",
"horrible": "terrible",
"dreadful": "terrible",
"fast": "quick",
"rapid": "quick",
"swift": "quick",
"speedy": "quick",
"slow": "sluggish",
"lazy": "sluggish",
"happy": "joyful",
"glad": "joyful",
"cheerful": "joyful",
"delighted": "joyful",
"sad": "unhappy",
"miserable": "unhappy",
"depressed": "unhappy",
"gloomy": "unhappy",
"start": "begin",
"commence": "begin",
"initiate": "begin",
"end": "finish",
"complete": "finish",
"conclude": "finish",
"terminate": "finish",
"show": "display",
"exhibit": "display",
"demonstrate": "display",
"reveal": "display",
"hide": "conceal",
"bury": "conceal",
"cover": "conceal",
"mask": "conceal",
}

// Regex for punctuation removal

punctuation := [Link](`[^\w\s]`)

return &ConflationProcessor{
stopWords: stopWords,
stemRules: stemRules,
synonyms: synonyms,
punctuation: punctuation,
}
}

// RemovePunctuation removes all punctuation from text

func (cp *ConflationProcessor) RemovePunctuation(text string) string {
return [Link](text, "")
}

// RemoveStopWords removes stop words from a slice of words

func (cp *ConflationProcessor) RemoveStopWords(words []string) []string {
var result []string
for _, word := range words {
if ![Link][[Link](word)] && word != "" {
result = append(result, word)
}
}
return result
}

// StemWord applies simple stemming rules to a word

func (cp *ConflationProcessor) StemWord(word string) string {
word = [Link](word)

// Try to apply stemming rules in order of length (longest first)

suffixes := []string{"tion", "sion", "ment", "ness", "able", "ible", "ful", "less", "ous",
"ive", "ize", "ise", "ing", "ed", "er", "est", "ly", "s"}

for _, suffix := range suffixes {

if [Link](word, suffix) && len(word) > len(suffix)+2 {
return word[:len(word)-len(suffix)]
}
}

return word
}

// ReplaceSynonyms replaces words with their most common synonyms

func (cp *ConflationProcessor) ReplaceSynonyms(words []string) []string {
var result []string
for _, word := range words {
lowerWord := [Link](word)
if synonym, exists := [Link][lowerWord]; exists {
result = append(result, synonym)
} else {
result = append(result, lowerWord)
}
}
return result
}

// ProcessText applies the complete conflation algorithm

func (cp *ConflationProcessor) ProcessText(text string) ([]string, map[string][]string) {
// Track transformations for demonstration
transformations := make(map[string][]string)
[Link]("Original text:")
[Link](text)
[Link]()

// Step 1: Remove punctuation

noPunct := [Link](text)
[Link]("After removing punctuation:")
[Link](noPunct)
[Link]()

// Step 2: Split into words and convert to lowercase

words := [Link]([Link](noPunct))
[Link]("Words after splitting and lowercasing:")
[Link](words)
[Link]()

// Step 3: Remove stop words

filteredWords := [Link](words)
[Link]("After removing stop words:")
[Link](filteredWords)
[Link]()

// Step 4: Apply stemming

stemmedWords := make([]string, len(filteredWords))
for i, word := range filteredWords {
stemmed := [Link](word)
stemmedWords[i] = stemmed
if stemmed != word {
transformations["stemming"] =
append(transformations["stemming"], [Link]("%s -> %s", word, stemmed))
}
}
[Link]("After stemming:")
[Link](stemmedWords)
[Link]()

// Step 5: Replace synonyms

finalWords := [Link](stemmedWords)
for i, word := range stemmedWords {
if finalWords[i] != word {
transformations["synonyms"] =
append(transformations["synonyms"], [Link]("%s -> %s", word, finalWords[i]))
}
}
[Link]("After synonym replacement:")
[Link](finalWords)
[Link]()
return finalWords, transformations
}

func main() {
// Sample input text (10 lines)
inputText := `The quick brown fox jumps over the lazy dog in the beautiful
garden.
Programming is an amazing skill that requires dedication and practice.
Machine learning algorithms are becoming increasingly powerful and efficient.
The weather today is absolutely wonderful with clear blue skies.
Students should study hard to achieve excellent results in their examinations.
Technology has revolutionized the way we communicate and work together.
Reading books is a fantastic way to expand your knowledge and imagination.
The enormous mountain stood majestically against the bright morning sky.
Scientists are working tirelessly to develop new solutions for global challenges.
Music brings joy and happiness to people all around the world.`

// Create processor and process the text

processor := NewConflationProcessor()
finalWords, transformations := [Link](inputText)

// Display transformations
[Link]("=== TRANSFORMATIONS APPLIED ===")

if stems, exists := transformations["stemming"]; exists {

[Link]("Stemming transformations:")
for _, stem := range stems {
[Link](" %s\n", stem)
}
[Link]()
}

if syns, exists := transformations["synonyms"]; exists {

[Link]("Synonym replacements:")
for _, syn := range syns {
[Link](" %s\n", syn)
}
[Link]()
}

// Display final result

[Link]("=== FINAL CONFLATED TEXT ===")
[Link]([Link](finalWords, " "))

// Display statistics
[Link]("\nOriginal word count (after splitting): %d\n",
len([Link](inputText)))
[Link]("Final word count (after conflation): %d\n", len(finalWords))
[Link]("Reduction: %.1f%%\n", (1.0-
float64(len(finalWords))/float64(len([Link](inputText))))*100)
}

Output:
Original text:
The quick brown fox jumps over the lazy dog in the beautiful garden.
Programming is an amazing skill that requires dedication and practice.
Machine learning algorithms are becoming increasingly powerful and efficient.
The weather today is absolutely wonderful with clear blue skies.
Students should study hard to achieve excellent results in their examinations.
Technology has revolutionized the way we communicate and work together.
Reading books is a fantastic way to expand your knowledge and imagination.
The enormous mountain stood majestically against the bright morning sky.
Scientists are working tirelessly to develop new solutions for global challenges.
Music brings joy and happiness to people all around the world.

After removing punctuation:

The quick brown fox jumps over the lazy dog in the beautiful garden
Programming is an amazing skill that requires dedication and practice
Machine learning algorithms are becoming increasingly powerful and efficient
The weather today is absolutely wonderful with clear blue skies
Students should study hard to achieve excellent results in their examinations
Technology has revolutionized the way we communicate and work together
Reading books is a fantastic way to expand your knowledge and imagination
The enormous mountain stood majestically against the bright morning sky
Scientists are working tirelessly to develop new solutions for global challenges
Music brings joy and happiness to people all around the world

Words after splitting and lowercasing:

[the quick brown fox jumps over the lazy dog in the beautiful garden programming is an
amazing skill that requires dedication and practice machine learning algorithms are
becoming increasingly powerful and efficient the weather today is absolutely wonderful
with clear blue skies students should study hard to achieve excellent results in their
examinations technology has revolutionized the way we communicate and work
together reading books is a fantastic way to expand your knowledge and imagination
the enormous mountain stood majestically against the bright morning sky scientists are
working tirelessly to develop new solutions for global challenges music brings joy and
happiness to people all around the world]

After removing stop words:

[quick brown fox jumps lazy dog beautiful garden programming amazing skill requires
dedication practice machine learning algorithms becoming increasingly powerful
efficient weather today absolutely wonderful clear blue skies students study hard
achieve excellent results examinations technology revolutionized way communicate
work together reading books fantastic way expand knowledge imagination enormous
mountain stood majestically against bright morning sky scientists working tirelessly
develop new solutions global challenges music brings joy happiness people all around
world]

After stemming:
[quick brown fox jump lazy dog beauti garden programm amaz skill require dedica
practice machine learn algorithm becom increasing power efficient weath today
absolute wonder clear blue skie student study hard achieve excellent result
examination technology revolutioniz way communicate work togeth read book fantastic
way expand knowledge imagina enorm mountain stood majestical against bright morn
sky scientist work tireless develop new solution global challenge music bring joy happi
people all around world]

After synonym replacement:

[quick brown fox jump sluggish dog beauti garden programm amaz skill require dedica
practice machine learn algorithm becom increasing power efficient weath today
absolute wonder clear blue skie student study hard achieve great result examination
technology revolutioniz way communicate work togeth read book great way expand
knowledge imagina enorm mountain stood majestical against bright morn sky scientist
work tireless develop new solution global challenge music bring joy happi people all
around world]

=== TRANSFORMATIONS APPLIED ===

Stemming transformations:
jumps -> jump
beautiful -> beauti
programming -> programm
amazing -> amaz
requires -> require
dedication -> dedica
learning -> learn
algorithms -> algorithm
becoming -> becom
increasingly -> increasing
powerful -> power
weather -> weath
absolutely -> absolute
wonderful -> wonder
skies -> skie
students -> student
results -> result
examinations -> examination
revolutionized -> revolutioniz
together -> togeth
reading -> read
books -> book
imagination -> imagina
enormous -> enorm
majestically -> majestical
morning -> morn
scientists -> scientist
working -> work
tirelessly -> tireless
solutions -> solution
challenges -> challenge
brings -> bring
happiness -> happi

Synonym replacements:
lazy -> sluggish
excellent -> great
fantastic -> great

=== FINAL CONFLATED TEXT ===

quick brown fox jump sluggish dog beauti garden programm amaz skill require dedica
practice machine learn algorithm becom increasing power efficient weath today
absolute wonder clear blue skie student study hard achieve great result examination
technology revolutioniz way communicate work togeth read book great way expand
knowledge imagina enorm mountain stood majestical against bright morn sky scientist
work tireless develop new solution global challenge music bring joy happi people all
around world

Original word count (after splitting): 107

Final word count (after conflation): 72
Reduction: 32.7%

Note :
Use Online Go Compiler - Programiz for execution

Key Features:
1. Stop Word Removal: Removes 70+ common English stop words (the, a, an, and,
etc.) and all punctuation symbols
2. Stemming: Applies suffix removal rules for common endings like -ing, -ed, -tion, -ly,
etc.
3. Synonym Replacement: Maps synonyms to their most common equivalents (e.g.,
"huge" → "large", "excellent" → "great")

Algorithm Steps:
1. Punctuation Removal: Strips all punctuation using regex
2. Tokenization: Splits text into words and converts to lowercase
3. Stop Word Filtering: Removes high-frequency words
4. Stemming: Reduces words to their root forms
5. Synonym Normalization: Replaces synonyms with canonical forms

Sample Output:
When you run this code, it will process 10 lines of sample text and show:

• Step-by-step transformations
• Which words were stemmed (e.g., "jumping" → "jump")
• Which synonyms were replaced (e.g., "amazing" → "great")
• Final conflated text
• Statistics showing text reduction percentage

The algorithm typically reduces text by 60-70% while preserving semantic meaning. This is
useful for:

• Document similarity comparison

• Search indexing
• Text mining
• Information retrieval systems

Python Text Processing Techniques
No ratings yet
Python Text Processing Techniques
13 pages
Conflation
No ratings yet
Conflation
4 pages
Код
No ratings yet
Код
2 pages
Web Scraping
No ratings yet
Web Scraping
9 pages
04 Word Normalization and Stemming 11-47
No ratings yet
04 Word Normalization and Stemming 11-47
5 pages
CT Algorithm Project
No ratings yet
CT Algorithm Project
3 pages
03 Word Tokenization 14-26
No ratings yet
03 Word Tokenization 14-26
6 pages
x0 Process
No ratings yet
x0 Process
4 pages
Unit Iii Data Structure
No ratings yet
Unit Iii Data Structure
43 pages
Italian and English IPA Transcription Guide
No ratings yet
Italian and English IPA Transcription Guide
64 pages
Nelson Spelling Pupil Book 2
80% (10)
Nelson Spelling Pupil Book 2
69 pages
CT Project
No ratings yet
CT Project
3 pages
Unit 5
No ratings yet
Unit 5
14 pages
Natural Langauge Processsing Unit 2
No ratings yet
Natural Langauge Processsing Unit 2
16 pages
3.word Level Analysis-Tokenization Stemming
No ratings yet
3.word Level Analysis-Tokenization Stemming
8 pages
Vocabulary
No ratings yet
Vocabulary
5 pages
Welcome To The 2 Half Training
No ratings yet
Welcome To The 2 Half Training
121 pages
Linguistics: Morphology Basics
No ratings yet
Linguistics: Morphology Basics
37 pages
Go Text Editing Tool Project
No ratings yet
Go Text Editing Tool Project
2 pages
02 Textprocessingboth
No ratings yet
02 Textprocessingboth
46 pages
NLPNOTES
No ratings yet
NLPNOTES
26 pages
NLP Notes-1
No ratings yet
NLP Notes-1
11 pages
NLP Python Code Examples and Techniques
No ratings yet
NLP Python Code Examples and Techniques
16 pages
NLP Manual
No ratings yet
NLP Manual
9 pages
Word Search Tion
No ratings yet
Word Search Tion
2 pages
A Stop List For General Text
No ratings yet
A Stop List For General Text
17 pages
Untitled6 Output
No ratings yet
Untitled6 Output
3 pages
AP19110010110 Lab Assignment-2 - Jupyter Notebook
No ratings yet
AP19110010110 Lab Assignment-2 - Jupyter Notebook
18 pages
Document
No ratings yet
Document
5 pages
Morphology Concepts
No ratings yet
Morphology Concepts
46 pages
NLP - 15129145,15129128,15129129 - Experiment 3
No ratings yet
NLP - 15129145,15129128,15129129 - Experiment 3
262 pages
Sentence Similarity Checker Code
No ratings yet
Sentence Similarity Checker Code
3 pages
Basic English
No ratings yet
Basic English
2 pages
English Sound System Vowel Sounds and Phonetic Symbols 1) Vowels and Vowel Combinations
No ratings yet
English Sound System Vowel Sounds and Phonetic Symbols 1) Vowels and Vowel Combinations
8 pages
7 Exp
No ratings yet
7 Exp
6 pages
Us 6260008
No ratings yet
Us 6260008
15 pages
Tanvi Chiman 10 BE3 EXP3 A SMA
No ratings yet
Tanvi Chiman 10 BE3 EXP3 A SMA
3 pages
Conflation
No ratings yet
Conflation
3 pages
NLP Record 1-6 Exp
No ratings yet
NLP Record 1-6 Exp
27 pages
Building Words
No ratings yet
Building Words
7 pages
Ogden's Basic English Word List
No ratings yet
Ogden's Basic English Word List
2 pages
Complete Book of Reading PDF
No ratings yet
Complete Book of Reading PDF
329 pages
NLP Answers
No ratings yet
NLP Answers
14 pages
Chapter 8 Online Reference Tools - Hockly N & Dudeney G (2008) - How To Teach English With Technology
No ratings yet
Chapter 8 Online Reference Tools - Hockly N & Dudeney G (2008) - How To Teach English With Technology
10 pages
Word Analysis
No ratings yet
Word Analysis
30 pages
Ing Spelling
100% (3)
Ing Spelling
1 page
NLP Techniques for Text Processing
No ratings yet
NLP Techniques for Text Processing
41 pages
Irs Ii
No ratings yet
Irs Ii
39 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
16 pages
Spelling Pack - Cleaned
No ratings yet
Spelling Pack - Cleaned
122 pages
Dictionary Words
No ratings yet
Dictionary Words
60 pages
English Homophone Dictionary Guide
No ratings yet
English Homophone Dictionary Guide
44 pages
Understanding Prefixes "Un" & "Dis"
No ratings yet
Understanding Prefixes "Un" & "Dis"
9 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
CourseinIsaacPitmanShorthand 10250681 PDF
100% (1)
CourseinIsaacPitmanShorthand 10250681 PDF
265 pages
All Practicals
No ratings yet
All Practicals
33 pages
Understanding Suffixes and Their Rules
No ratings yet
Understanding Suffixes and Their Rules
4 pages
Roland VP 550 Service Manual
No ratings yet
Roland VP 550 Service Manual
39 pages
IGCSE Triple Biology Core Practical Booklet Bella Coombes
No ratings yet
IGCSE Triple Biology Core Practical Booklet Bella Coombes
47 pages
5 Precast in Seismic Areas
100% (1)
5 Precast in Seismic Areas
50 pages
CFD Approach To Firearms Sound Suppressor Design PDF
100% (1)
CFD Approach To Firearms Sound Suppressor Design PDF
13 pages
303-06 Starting System - Removal and Installation - Starter Motor
No ratings yet
303-06 Starting System - Removal and Installation - Starter Motor
4 pages
Geodynamic Evolution of Northeastern Tunisia
No ratings yet
Geodynamic Evolution of Northeastern Tunisia
33 pages
PWM Regenerative Rectifiers
No ratings yet
PWM Regenerative Rectifiers
15 pages
Department of History Presidency University Post Graduate Syllabus
No ratings yet
Department of History Presidency University Post Graduate Syllabus
64 pages
Seven Barriers To Great Communication
100% (6)
Seven Barriers To Great Communication
4 pages
Transparent LED Display Solutions
No ratings yet
Transparent LED Display Solutions
28 pages
L
No ratings yet
L
31 pages
Bhakti and Sankirtan by Sri Swami Sivananda Saraswati
100% (9)
Bhakti and Sankirtan by Sri Swami Sivananda Saraswati
236 pages
Materials at Equilibrium. G. Ceder Fall 2002
No ratings yet
Materials at Equilibrium. G. Ceder Fall 2002
6 pages
Molecules: Cholinesterase Inhibitors From An Endophytic Fungus Fv-Er401: Metabolomics, Isolation and Molecular Docking
No ratings yet
Molecules: Cholinesterase Inhibitors From An Endophytic Fungus Fv-Er401: Metabolomics, Isolation and Molecular Docking
18 pages
Claroty CTD v4.0.1 Syslog Spec Rev2
No ratings yet
Claroty CTD v4.0.1 Syslog Spec Rev2
24 pages
8th Class Science Summer Vacation Homework
No ratings yet
8th Class Science Summer Vacation Homework
2 pages
Industrial Training Report
No ratings yet
Industrial Training Report
11 pages
Monster Synthesis Activity
No ratings yet
Monster Synthesis Activity
5 pages
Mop Up Round - 32
No ratings yet
Mop Up Round - 32
1 page
CHAPTER 18 PIPING SYSTEMS - Marine-Engineering-Roy-l-Harrington-1971 PDF
No ratings yet
CHAPTER 18 PIPING SYSTEMS - Marine-Engineering-Roy-l-Harrington-1971 PDF
19 pages
Engaging Similes for Kids
No ratings yet
Engaging Similes for Kids
6 pages
Class-11 English Notes - The Portrait of A Lady & The Photograph
No ratings yet
Class-11 English Notes - The Portrait of A Lady & The Photograph
5 pages
NURS FPX 4905 Assessment 1 BSN Practicum Conference Call Worksheet
No ratings yet
NURS FPX 4905 Assessment 1 BSN Practicum Conference Call Worksheet
7 pages
Fathers and Daughters
No ratings yet
Fathers and Daughters
20 pages
Babul PDF
No ratings yet
Babul PDF
1 page
SAP Billing Process Overview
100% (2)
SAP Billing Process Overview
37 pages
Demand and Supply Fundamentals Explained
No ratings yet
Demand and Supply Fundamentals Explained
36 pages
5th Sem Syllabus
No ratings yet
5th Sem Syllabus
12 pages
Geography Exam for Senior Students
No ratings yet
Geography Exam for Senior Students
1 page
How and Why Intuition Works
No ratings yet
How and Why Intuition Works
18 pages

Conflation Algorithm - IsR Experiments

Uploaded by

Conflation Algorithm - IsR Experiments

Uploaded by

ISR experiments

write a code in go language to implement conflation algorithm. Accept input as a random 10

// ConflationProcessor handles text conflation operations

// NewConflationProcessor creates a new processor with predefined rules

// Simple stemming rules (suffix removal)

// Synonym mapping (map synonyms to most common word)

// Regex for punctuation removal

// RemovePunctuation removes all punctuation from text

// RemoveStopWords removes stop words from a slice of words

// StemWord applies simple stemming rules to a word

// Try to apply stemming rules in order of length (longest first)

for _, suffix := range suffixes {

// ReplaceSynonyms replaces words with their most common synonyms

// ProcessText applies the complete conflation algorithm

// Step 1: Remove punctuation

// Step 2: Split into words and convert to lowercase

// Step 3: Remove stop words

// Step 4: Apply stemming

// Step 5: Replace synonyms

// Create processor and process the text

if stems, exists := transformations["stemming"]; exists {

if syns, exists := transformations["synonyms"]; exists {

// Display final result

After removing punctuation:

Words after splitting and lowercasing:

After removing stop words:

After synonym replacement:

=== TRANSFORMATIONS APPLIED ===

=== FINAL CONFLATED TEXT ===

Original word count (after splitting): 107

• Document similarity comparison

You might also like