Skip to content

Instantly share code, notes, and snippets.

@justjkk
Created June 11, 2010 17:49
Show Gist options
  • Save justjkk/434807 to your computer and use it in GitHub Desktop.
Save justjkk/434807 to your computer and use it in GitHub Desktop.
Convert Data from TSV to JSON for use in Django Models
%{
#include<stdio.h>
#include<string.h>
#define MAX_COLUMN_COUNT 256
#define STARTING_PK 1
#define MODEL_NAME "sample.model"
typedef enum {false,true} bool;
int noc=0,pk=STARTING_PK-1,ci=0;
bool is_header=true,is_first_record=true,read_column_name=true,read_column_value=false,write_new_record=false;
char column_name[MAX_COLUMN_COUNT][256];
char *trim(char *str);
%}
DELIMITER [\t]
LINE_DELIMITER [\n]
TEXT [^\n\t]*
%%
{LINE_DELIMITER} {
if(!is_header) {
printf("}}");
}
write_new_record=true;
read_column_value=true;
pk++;
ci=1;
is_header=false;
};
{DELIMITER} {
if(is_header) {
read_column_name=true;
noc++;
}
else {
printf(",\"%s\": ",column_name[ci]);
read_column_value=true;
ci++;
}
};
{TEXT} {
if(write_new_record) {
if(is_first_record) {
is_first_record=false;
}
else {
printf(",");
}
printf("{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]);
write_new_record=false;
}
if(is_header && read_column_name) {
strcpy(column_name[noc],trim(yytext));
read_column_name=false;
}
else if(!is_header && read_column_value) {
if(*yytext!=0) {
printf("\"%s\"",yytext);
}
else {
printf("null");
}
read_column_value=false;
}
};
%%
int main(int argc, char **argv)
{
int i;
if(argc>1)
{
FILE *file;
file = fopen(argv[1],"r");
if(!file)
{
printf("Could not open %s\n",argv[1]);
exit(0);
}
yyin=file;
}
for(i=0;i<MAX_COLUMN_COUNT;i++)
{
*column_name[i]='\0';
}
printf("[");
yylex();
printf("]\n");
return 0;
}
int yywrap()
{
return 1;
}
char *trim(char *str)
{
int len;
while(*str==' ')
{
str++;
}
len = strlen(str);
while(len>0 && str[len-1]==' ')
{
//str[len-1]='\0';
len--;
}
str[len]='\0';
return str;
}
%{
#include<stdio.h>
#include<string.h>
#define MAX_COLUMN_COUNT 256
#define STARTING_PK 1
char MODEL_NAME[100]= "sample.modelname"
typedef enum {false,true} bool;
int noc=0,pk=STARTING_PK,ci=0;
bool is_header=true,is_first_record=true,read_column_name=true,read_column_value=false,write_new_record=false;
char column_name[MAX_COLUMN_COUNT][256];
char *trim(char *str);
%}
DELIMITER [\t]
LINE_DELIMITER [\n]
CHAR [^\n\t]
TEXT {CHAR}*
%s READ_HEADER
%s READ_COLUMN_NAME
%s READ_COLUMN_VALUE
%s READ_NEW_RECORD
%s READ_BODY
%s DEAD_STATE
%%
<INITIAL>{TEXT} {
noc++;
strcpy(column_name[noc-1],trim(yytext));
printf("[{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]);
ci=1;
pk++;
BEGIN READ_HEADER;
};
<INITIAL>{LINE_DELIMITER} {
/* Eat up initial empty lines */
};
<READ_HEADER>{DELIMITER} {
noc++;
BEGIN READ_COLUMN_NAME;
};
<READ_HEADER>{LINE_DELIMITER}/{TEXT} {
BEGIN READ_COLUMN_VALUE;
};
<READ_COLUMN_NAME>{TEXT} {
strcpy(column_name[noc-1],trim(yytext));
BEGIN READ_HEADER;
};
<READ_COLUMN_VALUE>{TEXT} {
printf("\"%s\"",yytext);
BEGIN READ_BODY;
};
<READ_COLUMN_VALUE>{DELIMITER} {
printf("null");
printf(",\"%s\": ",column_name[ci]);
ci++;
BEGIN READ_COLUMN_VALUE;
};
<READ_BODY>{DELIMITER} {
printf(",\"%s\": ",column_name[ci]);
ci++;
BEGIN READ_COLUMN_VALUE;
};
<READ_BODY>{LINE_DELIMITER}/{CHAR} {
printf("}},{\"pk\": %d, \"model\": \"%s\", \"fields\": {\"%s\": ",pk,MODEL_NAME,column_name[0]);
pk++;
ci=1;
BEGIN READ_COLUMN_VALUE;
};
<READ_BODY>{LINE_DELIMITER} {
printf("}}]");
BEGIN DEAD_STATE;
};
%%
int main(int argc, char **argv)
{
int i;
if(argc>1)
{
FILE *file;
file = fopen(argv[1],"r");
if(!file)
{
printf("Could not open %s\n",argv[1]);
exit(0);
}
yyin=file;
}
for(i=0;i<MAX_COLUMN_COUNT;i++)
{
*column_name[i]='\0';
}
yylex();
return 0;
}
int yywrap()
{
return 1;
}
char *trim(char *str)
{
int len;
while(*str==' ')
{
str++;
}
len = strlen(str);
while(len>0 && str[len-1]==' ')
{
//str[len-1]='\0';
len--;
}
str[len]='\0';
return str;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment