Created
October 31, 2018 10:18
-
-
Save AmalJossy/09d0dc881811bd6eb5a36783ffd654a7 to your computer and use it in GitHub Desktop.
Program to to simulate lexical analyse of a C program
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdlib.h> | |
#include<stdio.h> | |
#include<ctype.h> | |
#include<string.h> | |
char buff[255]; | |
FILE *fpout; | |
int i; | |
void append(char ch){buff[i++]=ch;buff[i]='\0';} | |
int identify(){ | |
char keywords[32][8] = {"auto","break","case","char","const","continue","default", | |
"do","double","else","enum","extern","float","for","goto", | |
"if","int","long","register","return","short","signed", | |
"sizeof","static","struct","switch","typedef","union", | |
"unsigned","void","volatile","while"}; | |
//char op[][3]={"+","-","%","*","/","<","<=",">",">=","==","!=","&&","||"}; | |
//complete this | |
int j; | |
for(j=0;j<32;j++){ | |
if(strcmp(buff,keywords[j])==0){ | |
return 0; | |
} | |
} | |
return 1; | |
} | |
int categorizeFirst(char ch){ | |
if(isalpha(ch)) return 1; // kw and id starts with alpha | |
if(isdigit(ch)) return 5; // num lit start with num | |
if(ch=='_') return 2; //if _ surely id | |
if(ch=='"') return 5; // str lit start with quotes | |
if(ch=='\'') return 5; // same as above | |
if(isspace(ch)) return 6; // spaces and stuff | |
if( ch=='=' || ch=='+' || ch=='<' || ch=='>' || ch=='*' || ch=='/' || ch=='-' || ch=='%' || ch=='&' ) return 3; | |
if(ch=='#') return 7; // compiler directives | |
if(ispunct(ch)) return 4; // dl or op | |
if(ch=='/') return 8; //comments | |
return 16; | |
} | |
int tokenize(int type){ | |
if(i==0){ return 1; } | |
i=0; | |
char types[5][3]={"KW","ID","OP","DL","LT"}; | |
if(type==1) type=identify(); | |
printf("<%s,%s>\n",buff,types[type]); | |
fprintf(fpout,"<%s,%s>\n",buff,types[type]); | |
return 0; | |
} | |
int main(){ | |
FILE *fp; | |
char ch,head; | |
int count=0; | |
int curr,prev; | |
fp=fopen("code.txt","r"); | |
fpout=fopen("output.txt","w"); | |
if(fp==NULL){ | |
printf("file doesn't exist\n"); | |
return 0; | |
} | |
ch=fgetc(fp); | |
curr=categorizeFirst(ch); | |
while(ch!=EOF){ | |
curr=categorizeFirst(ch); | |
switch(curr){ | |
case 1:append(ch); | |
while((ch=fgetc(fp))!=EOF && (isalnum(ch) || ch=='_')) { // for words that can be KW or ID | |
append(ch); | |
} | |
tokenize(1);break; | |
case 2:append(ch); | |
while((ch=fgetc(fp))!=EOF && (isalnum(ch) || ch=='_')) { // for words that can be ID | |
append(ch); | |
} | |
tokenize(1);break; | |
case 3:append(ch); // Char is OP (XD | |
ch=fgetc(fp); | |
curr=categorizeFirst(ch); | |
if(ch=='*'){ //monkey patch sorry | |
while((ch=fgetc(fp))!='*'){} // skip comments | |
ch=getc(fp); // skip end of comment | |
ch=getc(fp); | |
i=0; | |
break; | |
} | |
if(ch=='/'){ | |
while((ch=fgetc(fp))!='\n'){} | |
ch=getc(fp); | |
i=0; | |
break; | |
} | |
if(curr==3){ | |
append(ch); | |
ch=fgetc(fp); | |
} | |
tokenize(2);break; | |
case 4:append(ch); // Char is DL | |
ch=fgetc(fp); | |
tokenize(3);break; | |
case 5:if(ch=='"'){ // LITERAL IN DOUBLE quotes | |
append(ch); | |
while((ch=fgetc(fp))!='"'){ | |
if(ch=='\\'){ | |
append(ch); | |
ch=fgetc(fp); | |
} | |
append(ch); | |
} | |
append(ch); | |
ch=fgetc(fp); | |
} | |
if(ch=='\''){ // LITERAL IN single quotes | |
append(ch); | |
while((ch=fgetc(fp))!='\''){ | |
if(ch=='\\'){ | |
append(ch); | |
ch=fgetc(fp); | |
} | |
append(ch); | |
} | |
append(ch); | |
ch=fgetc(fp); | |
} | |
if(isdigit(ch)){ // numeric literals | |
append(ch); | |
while(isdigit((ch=fgetc(fp)))){ | |
append(ch); | |
} | |
if(ch=='.' || ch=='e' || ch=='E'){ | |
append(ch); | |
while(isdigit((ch=fgetc(fp)))){ | |
append(ch); | |
} | |
} | |
} | |
tokenize(4); | |
break; | |
case 7:while((ch=fgetc(fp))!='\n'){} | |
ch=getc(fp); | |
break; | |
/*case 8:ch=getc(fp); | |
if(ch=='*'){ | |
while((ch=fgetc(fp))!='*'){} // skip comments | |
ch=getc(fp); // skip end of comment | |
} | |
if(ch=='/'){ | |
while((ch=fgetc(fp))!='\n'){} | |
} | |
ch=getc(fp); | |
break;*/ | |
default:tokenize(0);ch=fgetc(fp); | |
} | |
} | |
fclose(fp); | |
fclose(fpout); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment