Last active
August 29, 2015 14:26
-
-
Save nihilismus/48b120014982bebf784f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Fixes CSV files from DENUE (INEGI), which have some tuples/registers | |
# splitted in two lines. | |
# | |
# Copyright © 2015 Antonio Hernández Blas <[email protected]> | |
# This program is free software. It comes without any warranty, to | |
# the extent permitted by applicable law. You can redistribute it | |
# and/or modify it under the terms of the Do What The Fuck You Want | |
# To Public License, Version 2, as published by Sam Hocevar. See | |
# http://www.wtfpl.net/ for more details. | |
me=$(basename $0) | |
input_file=$1 | |
output_file=FIXED_${input_file} | |
tmp_file=TMP_${input_file} | |
if [ ! -f "${input_file}" ]; then | |
echo | |
echo "Error, you must indicate which CSV file to proccess" | |
echo "Example:" | |
echo " [$(whoami)@$(hostname) $(basename $(pwd))]$ sh ${me} DENUE_INEGI_x.csv" | |
echo | |
exit 1 | |
fi | |
cat /dev/null > ${output_file} | |
echo | |
echo "> Input file: ${input_file}" | |
fromdos < ${input_file} > ${tmp_file} | |
total_lines=$(wc -l ${tmp_file} | cut -d ' ' -f 1) | |
current_line=1 | |
current_error=1 | |
echo "> Number of lines to process: ${total_lines} ..." | |
echo | |
cat ${tmp_file} | while read line; do | |
if $(echo $line | grep -qE '"$'); then | |
echo -en "Current line: ${current_line}\r\ | |
\rCurrent line: ${current_line}" | |
echo $line >> ${output_file} | |
else | |
echo | |
echo " Error #${current_error} in line: ${current_line}" | |
echo -n $line >> ${output_file} | |
current_error=$(expr ${current_error} + 1) | |
fi | |
current_line=$(expr ${current_line} + 1) | |
done | |
echo | |
echo | |
echo "> Done" | |
echo "> Output file: ${output_file}" | |
echo | |
rm -f ${tmp_file} | |
#EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment