Assignment No.
-2
Question:-Design a lexical analyzer for given language and the lexical ananlyzer should ignore
redundant spaces,tabs and new lines.it should also ignore comments.although the syntax
specifictions states that identifiers can be arbitrarily long,you may restrict the length to some
resonable value.Simulate the same in C language. After lexical analysis seperate file for each type
of tokens will be created
Code:-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_IDENTIFIER_LENGTH 100
#define MAX_BUFFER_SIZE 1024
// Function to identify keywords
int isKeyword(const char *word) {
const char *keywords[] = {"int", "float", "double", "char", "if", "else", "while", "for", "return",
"break", "continue"};
for (int i = 0; i < 11; i++) {
if (strcmp(word, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
// Function to write token to a corresponding file
void writeTokenToFile(const char *token, const char *tokenType) {
FILE *file = fopen(tokenType, "a");
if (file == NULL) {
printf("Error opening file for writing: %s\n", tokenType);
return;
}
fprintf(file, "%s\n", token);
fclose(file);
}
// Function to handle identifiers or keywords
void handleIdentifierOrKeyword(char *word) {
if (isKeyword(word)) {
writeTokenToFile(word, "keywords.txt");
} else {
writeTokenToFile(word, "identifiers.txt");
}
}
// Function to handle numbers
void handleNumber(char *word) {
writeTokenToFile(word, "numbers.txt");
}
// Function to handle operators and punctuation
void handleOperatorOrPunctuation(char ch) {
char op[2] = {ch, '\0'};
writeTokenToFile(op, "operators_and_punctuation.txt");
}
// Function to ignore comments
void skipComments(FILE *sourceFile) {
char ch;
while ((ch = fgetc(sourceFile)) != EOF) {
if (ch == '*' && (ch = fgetc(sourceFile)) == '/') {
return; // End of block comment
}
}
}
// Function to read and process the source file
void processSourceCode(FILE *sourceFile) {
char ch, word[MAX_IDENTIFIER_LENGTH];
int i = 0;
while ((ch = fgetc(sourceFile)) != EOF) {
// Ignore white spaces and tabs
if (isspace(ch)) {
continue;
}
// Handle single-line comments
if (ch == '/' && (ch = fgetc(sourceFile)) == '/') {
while ((ch = fgetc(sourceFile)) != '\n' && ch != EOF); // Skip the comment
continue;
}
// Handle block comments
if (ch == '/' && (ch = fgetc(sourceFile)) == '*') {
skipComments(sourceFile);
continue;
}
// Handle identifiers and keywords
if (isalpha(ch) || ch == '_') {
word[i++] = ch;
while (isalnum((ch = fgetc(sourceFile))) || ch == '_') {
word[i++] = ch;
}operators_and_punctuation.txt:
word[i] = '\0';
handleIdentifierOrKeyword(word);
ungetc(ch, sourceFile);
i = 0;
}
// Handle numbers
else if (isdigit(ch)) {
word[i++] = ch;
while (isdigit((ch = fgetc(sourceFile)))) {
word[i++] = ch;
}
word[i] = '\0';
handleNumber(word);
ungetc(ch, sourceFile);
i = 0;
}
// Handle operators and punctuation
else if (strchr("+-*/=<>!%&|^;,.(){}", ch)) {
handleOperatorOrPunctuation(ch);
}
}
}
int main() {
FILE *sourceFile = fopen("source_code.txt", "r");
if (sourceFile == NULL) {
printf("Error opening source code file.\n");
return 1;
}
// Create and clear output files before writing tokens
fclose(fopen("keywords.txt", "w"));
fclose(fopen("identifiers.txt", "w"));
fclose(fopen("numbers.txt", "w"));
fclose(fopen("operators_and_punctuation.txt", "w"));
// Process the source code
processSourceCode(sourceFile);
// Close the source file
fclose(sourceFile);
printf("Lexical analysis completed. Tokens written to corresponding files.\n");
return 0;
}
output:-
source_code.txt (Input File):
int main() {
int x = 10;
float y = 20.5;
if (x < y) {
x = x + 1;
}
return 0;
}
Generated Files:
1.keywords.txt:
int
float
if
return
2.identifiers.txt:
main
x
y
3.numbers.txt:
10
20
5
1
0
4. operators_and_punctuation.txt:
(
)
=
<
+
;
{
}
;