Created
June 11, 2010 17:49
-
-
Save justjkk/434807 to your computer and use it in GitHub Desktop.
Convert Data from TSV to JSON for use in Django Models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%{ | |
#include<stdio.h> | |
#include<string.h> | |
#define MAX_COLUMN_COUNT 256 | |
#define STARTING_PK 1 | |
#define MODEL_NAME "sample.model" | |
typedef enum {false,true} bool; | |
int noc=0,pk=STARTING_PK-1,ci=0; | |
bool is_header=true,is_first_record=true,read_column_name=true,read_column_value=false,write_new_record=false; | |
char column_name[MAX_COLUMN_COUNT][256]; | |
char *trim(char *str); | |
%} | |
DELIMITER [\t] | |
LINE_DELIMITER [\n] | |
TEXT [^\n\t]* | |
%% | |
{LINE_DELIMITER} { | |
if(!is_header) { | |
printf("}}"); | |
} | |
write_new_record=true; | |
read_column_value=true; | |
pk++; | |
ci=1; | |
is_header=false; | |
}; | |
{DELIMITER} { | |
if(is_header) { | |
read_column_name=true; | |
noc++; | |
} | |
else { | |
printf(",\"%s\": ",column_name[ci]); | |
read_column_value=true; | |
ci++; | |
} | |
}; | |
{TEXT} { | |
if(write_new_record) { | |
if(is_first_record) { | |
is_first_record=false; | |
} | |
else { | |
printf(","); | |
} | |
printf("{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]); | |
write_new_record=false; | |
} | |
if(is_header && read_column_name) { | |
strcpy(column_name[noc],trim(yytext)); | |
read_column_name=false; | |
} | |
else if(!is_header && read_column_value) { | |
if(*yytext!=0) { | |
printf("\"%s\"",yytext); | |
} | |
else { | |
printf("null"); | |
} | |
read_column_value=false; | |
} | |
}; | |
%% | |
int main(int argc, char **argv) | |
{ | |
int i; | |
if(argc>1) | |
{ | |
FILE *file; | |
file = fopen(argv[1],"r"); | |
if(!file) | |
{ | |
printf("Could not open %s\n",argv[1]); | |
exit(0); | |
} | |
yyin=file; | |
} | |
for(i=0;i<MAX_COLUMN_COUNT;i++) | |
{ | |
*column_name[i]='\0'; | |
} | |
printf("["); | |
yylex(); | |
printf("]\n"); | |
return 0; | |
} | |
int yywrap() | |
{ | |
return 1; | |
} | |
char *trim(char *str) | |
{ | |
int len; | |
while(*str==' ') | |
{ | |
str++; | |
} | |
len = strlen(str); | |
while(len>0 && str[len-1]==' ') | |
{ | |
//str[len-1]='\0'; | |
len--; | |
} | |
str[len]='\0'; | |
return str; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%{ | |
#include<stdio.h> | |
#include<string.h> | |
#define MAX_COLUMN_COUNT 256 | |
#define STARTING_PK 1 | |
char MODEL_NAME[100]= "sample.modelname" | |
typedef enum {false,true} bool; | |
int noc=0,pk=STARTING_PK,ci=0; | |
bool is_header=true,is_first_record=true,read_column_name=true,read_column_value=false,write_new_record=false; | |
char column_name[MAX_COLUMN_COUNT][256]; | |
char *trim(char *str); | |
%} | |
DELIMITER [\t] | |
LINE_DELIMITER [\n] | |
CHAR [^\n\t] | |
TEXT {CHAR}* | |
%s READ_HEADER | |
%s READ_COLUMN_NAME | |
%s READ_COLUMN_VALUE | |
%s READ_NEW_RECORD | |
%s READ_BODY | |
%s DEAD_STATE | |
%% | |
<INITIAL>{TEXT} { | |
noc++; | |
strcpy(column_name[noc-1],trim(yytext)); | |
printf("[{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]); | |
ci=1; | |
pk++; | |
BEGIN READ_HEADER; | |
}; | |
<INITIAL>{LINE_DELIMITER} { | |
/* Eat up initial empty lines */ | |
}; | |
<READ_HEADER>{DELIMITER} { | |
noc++; | |
BEGIN READ_COLUMN_NAME; | |
}; | |
<READ_HEADER>{LINE_DELIMITER}/{TEXT} { | |
BEGIN READ_COLUMN_VALUE; | |
}; | |
<READ_COLUMN_NAME>{TEXT} { | |
strcpy(column_name[noc-1],trim(yytext)); | |
BEGIN READ_HEADER; | |
}; | |
<READ_COLUMN_VALUE>{TEXT} { | |
printf("\"%s\"",yytext); | |
BEGIN READ_BODY; | |
}; | |
<READ_COLUMN_VALUE>{DELIMITER} { | |
printf("null"); | |
printf(",\"%s\": ",column_name[ci]); | |
ci++; | |
BEGIN READ_COLUMN_VALUE; | |
}; | |
<READ_BODY>{DELIMITER} { | |
printf(",\"%s\": ",column_name[ci]); | |
ci++; | |
BEGIN READ_COLUMN_VALUE; | |
}; | |
<READ_BODY>{LINE_DELIMITER}/{CHAR} { | |
printf("}},{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]); | |
pk++; | |
ci=1; | |
BEGIN READ_COLUMN_VALUE; | |
}; | |
<READ_BODY>{LINE_DELIMITER} { | |
printf("}}]"); | |
BEGIN DEAD_STATE; | |
}; | |
%% | |
int main(int argc, char **argv) | |
{ | |
int i; | |
if(argc>1) | |
{ | |
FILE *file; | |
file = fopen(argv[1],"r"); | |
if(!file) | |
{ | |
printf("Could not open %s\n",argv[1]); | |
exit(0); | |
} | |
yyin=file; | |
} | |
for(i=0;i<MAX_COLUMN_COUNT;i++) | |
{ | |
*column_name[i]='\0'; | |
} | |
yylex(); | |
return 0; | |
} | |
int yywrap() | |
{ | |
return 1; | |
} | |
char *trim(char *str) | |
{ | |
int len; | |
while(*str==' ') | |
{ | |
str++; | |
} | |
len = strlen(str); | |
while(len>0 && str[len-1]==' ') | |
{ | |
//str[len-1]='\0'; | |
len--; | |
} | |
str[len]='\0'; | |
return str; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment