Created
March 9, 2012 16:43
-
-
Save davorg/2007422 to your computer and use it in GitHub Desktop.
Create MySQL Table Definition and Insert Statements From CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
use Text::ParseWords; | |
use List::Util 'max'; | |
my $file = shift || die "Need a csv file to process\n"; | |
my $table_name = 'Addresses'; | |
my $engine = 'InnoDB'; | |
my $charset = 'latin1'; | |
# Read and process header line | |
open my $data, '<', $file or die "Cannot open $file for reading\n"; | |
my $header = <$data>; | |
chomp $header; | |
# @cols will be an array of hashes. Each has will contain details | |
# of one of the columns | |
my @cols = map { { name => $_ } } parse_line ',', 0, $header; | |
for (@cols) { | |
$_->{name} =~ s/\s+/_/g; | |
$_->{name} =~ s/'/\\'/g; | |
} | |
# Read the rest of the data | |
while (<$data>) { | |
chomp; | |
my @row = parse_line ',', 0, $_ ; | |
for my $v (@row) { | |
s/'/\\'/g; | |
} | |
foreach my $col_no (0 .. $#cols) { | |
push @{$cols[$col_no]{values}}, $row[$col_no]; | |
} | |
} | |
# Analyse the data | |
foreach (@cols) { | |
guess_type($_); | |
} | |
# Output the table | |
open my $table, '>', 'mysql_create_table2.sql' or | |
die "Can't open file for table: $!"; | |
print $table table_def(@cols); | |
# Output the data | |
open my $values, '>', 'mysql_data_values2.sql' or | |
die "Can't open file for values: $!"; | |
for my $row (0 .. $#{$cols[0]{values}}) { | |
print $values insert($row, @cols); | |
} | |
die Dumper \@cols; | |
# Analyse an array of column hashes and fill in various information | |
# about the columns by looking at the data values in each column. | |
sub guess_type { | |
my $column = shift; | |
$column->{type} = 'varchar'; | |
foreach my $val (@{$column->{values}}) { | |
if ($val !~ /^-?\d+(\.\d+)?$/) { | |
$column->{type} = 'varchar'; | |
last; | |
} | |
if ($val =~ /^-?\d+\.\d+$/) { | |
$column->{type} = 'decimal'; | |
} else { | |
$column->{type} = 'int'; | |
} | |
} | |
if ($column->{type} eq 'decimal') { | |
$column->{dec1} = max map { length +(split /\./)[0] } @{$column->{values}}; | |
$column->{dec2} = max map { length +(split /\./)[1] } @{$column->{values}}; | |
} else { | |
$column->{length} = max map { length $_ } @{$column->{values}}; | |
} | |
} | |
# Return a table definition string given an array of column hashes. | |
sub table_def { | |
my @columns = @_; | |
return "\n\nCREATE TABLE `$table_name` (\n" . | |
join(",\n", map { column_def($_) } @columns) . | |
"\n) ENGINE=$engine DEFAULT CHARSET=$charset\n" . | |
"\n\n"; | |
} | |
# Given a column hash, return a string containing the SQL column definition. | |
sub column_def { | |
my $column = shift; | |
my $def = " `$column->{name}` $column->{type} "; | |
if ($column->{type} eq 'decimal') { | |
my $dec_length = $column->{dec1} + $column->{dec2}; | |
$def .= "($dec_length,$column->{dec2})"; | |
} else { | |
$def .= "($column->{length})"; | |
} | |
return $def; | |
} | |
# Given a row number and an array of column hashes, return a string | |
# containing an SQL insert statement for the given row. | |
sub insert { | |
my ($row, @columns) = @_; | |
return "insert into $table_name (" . | |
join(', ', map { $_->{name} } @columns) . | |
") \nvalues (" . | |
join(', ', map { "'$_->{values}[$row]'" } @cols) . | |
");\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See http://scriptingmysql.wordpress.com/2011/09/02/convert-csv-file-to-mysql-database-via-perl/ for the description of the problem I'm trying to solve and more discussion.