-
-
Save DecisionNerd/3de707bc656cf757a0cb to your computer and use it in GitHub Desktop.
#!/bin/bash | |
# CSV to JSON converter using BASH | |
# original script from http://blog.secaserver.com/2013/12/convert-csv-json-bash/ | |
# thanks SecaGuy! | |
# Usage ./csv2json.sh input.csv > output.json | |
input=$1 | |
[ -z $1 ] && echo "No CSV input file specified" && exit 1 | |
[ ! -e $input ] && echo "Unable to locate $1" && exit 1 | |
read first_line < $input | |
a=0 | |
headings=`echo $first_line | awk -F, {'print NF'}` | |
lines=`cat $input | wc -l` | |
while [ $a -lt $headings ] | |
do | |
head_array[$a]=$(echo $first_line | awk -v x=$(($a + 1)) -F"," '{print $x}') | |
a=$(($a+1)) | |
done | |
c=0 | |
echo "{" | |
while [ $c -lt $lines ] | |
do | |
read each_line | |
if [ $c -ne 0 ]; then | |
d=0 | |
echo -n "{" | |
while [ $d -lt $headings ] | |
do | |
each_element=$(echo $each_line | awk -v y=$(($d + 1)) -F"," '{print $y}') | |
if [ $d -ne $(($headings-1)) ]; then | |
echo -n ${head_array[$d]}":"$each_element"," | |
else | |
echo -n ${head_array[$d]}":"$each_element | |
fi | |
d=$(($d+1)) | |
done | |
if [ $c -eq $(($lines-1)) ]; then | |
echo "}" | |
else | |
echo "}," | |
fi | |
fi | |
c=$(($c+1)) | |
done < $input | |
echo "}" |
Okay, I didn't like that we couldn't use commas, so I updated it to work with them. This could probably be done in a more elegant way by someone who is better with awk or sed than I am, but here is a quick implementation that works for at least all the CSVs I have thrown at it (an admittedly small set).
#!/bin/bash
# CSV to JSON converter using BASH
# original script from https://gist.github.com/dsliberty/3de707bc656cf757a0cb
# Usage ./csv2json.sh input.csv > output.json
#set -x
shopt -s extglob
input="${1:-}"
SEP=","
[ -z "${input}" ] && echo "No CSV input file specified" && exit 1
[ ! -e "${input}" ] && echo "Unable to locate ${input}" && exit 1
csv_nextField()
{
local line="$(echo "${1}" | sed 's/\r//g')"
local start=0
local stop=0
if [[ -z "${line}" ]]; then
return 0
fi
local offset=0
local inQuotes=0
while [[ -n "${line}" ]]; do
local char="${line:0:1}"
line="${line:1}"
if [[ "${char}" == "${SEP}" && ${inQuotes} -eq 0 ]]; then
inQuotes=0
break
elif [[ "${char}" == '"' ]]; then
if [[ ${inQuotes} -eq 1 ]]; then
inQuotes=0
else
inQuotes=1
fi
else
echo -n "${char}"
fi
offset=$(( ${offset} + 1 ))
done
echo ""
return $(( ${offset} + 1 ))
}
read first_line < "${input}"
a=0
headings=`echo ${first_line} | awk -F"${SEP}" {'print NF'}`
lines=`cat "${input}" | wc -l`
while [[ ${a} -lt ${headings} ]]; do
field="$(csv_nextField "${first_line}")"
first_line="${first_line:${?}}"
head_array[${a}]="${field}"
a=$(( ${a} + 1 ))
done
c=0
echo "["
while [ ${c} -lt ${lines} ]
do
read each_line
each_line="$(echo "${each_line}" | sed 's/\r//g')"
if [[ ${c} -eq 0 ]]; then
c=$(( ${c} + 1 ))
else
d=0
echo " {"
while [[ ${d} -lt ${headings} ]]; do
item="$(csv_nextField "${each_line}")"
each_line="${each_line:${?}}"
echo -n " \"${head_array[${d}]}\": "
case "${item}" in
"")
echo -n "null"
;;
null|true|false|\"*\"|+([0123456789]))
echo -n ${item}
;;
*)
echo -n "\"${item}\""
;;
esac
d=$(( ${d} + 1 ))
[[ ${d} -lt ${headings} ]] && echo "," || echo ""
done
echo -n " }"
c=$(( ${c} + 1 ))
[[ ${c} -lt ${lines} ]] && echo "," || echo ""
fi
done < "${input}"
echo "]"
My csv file is tab seperated, what do I use for SEP= ?
Greetings
McMelloW
This is fantastic!
Using @jwkblades latest version.
Just wanted to add that by changing
input="${1:-}"
to:
input="${1:-/dev/stdin}"
It will let you take stdin as an input, instead of specifying a file. Which fits my use case perfectly.
Thank you all.
-- edit That only seems to work then the piped data exists in a file already.
cat test.csv | csv2json.sh
but not
horribly_old_custom_sql_query_tool "select * from table_name limit 10" | csv2json > output.json
Working to see if I can't simplify a tool and have it skip writing to the csv in the first place.
ok. This version lets me use either piped stdin or read from a file:
#!/bin/bash
# CSV to JSON converter using BASH
# original script from https://gist.github.com/dsliberty/3de707bc656cf757a0cb
# Usage ./csv2json.sh input.csv > output.json
# cat <input.csv> | csv2json > output.json
#set -x
shopt -s extglob
input="${1:-/dev/stdin}"
SEP=","
[ -z "${input}" ] && echo "No CSV input file specified" && exit 1
[ ! -e "${input}" ] && echo "Unable to locate ${input}" && exit 1
csv_nextField()
{
local line="$(echo "${1}" | sed 's/\r//g')"
local start=0
local stop=0
if [[ -z "${line}" ]]; then
return 0
fi
local offset=0
local inQuotes=0
while [[ -n "${line}" ]]; do
local char="${line:0:1}"
line="${line:1}"
if [[ "${char}" == "${SEP}" && ${inQuotes} -eq 0 ]]; then
inQuotes=0
break
elif [[ "${char}" == '"' ]]; then
if [[ ${inQuotes} -eq 1 ]]; then
inQuotes=0
else
inQuotes=1
fi
else
echo -n "${char}"
fi
offset=$(( ${offset} + 1 ))
done
echo ""
return $(( ${offset} + 1 ))
}
read -r first_line < "${input}"
a=0
headings=$(echo "${first_line}" | awk -F"${SEP}" {'print NF'})
if [ "${input}" = "/dev/stdin" ]; then
while read -r line
do
lines_str+="$line"$'\n'
c=1
done < "${input}"
else
lines_str="$(cat "${input}")"
c=0
fi
lines_num=$(echo "${lines_str}" | wc -l)
while [[ ${a} -lt ${headings} ]]; do
field="$(csv_nextField "${first_line}")"
first_line="${first_line:${?}}"
head_array[${a}]="${field}"
a=$(( ${a} + 1 ))
done
#c=0
echo "["
while [ ${c} -lt ${lines_num} ]
do
read -r each_line
each_line="$(echo "${each_line}" | sed 's/\r//g')"
if [[ ${c} -eq 0 ]]; then
c=$(( ${c} + 1 ))
else
d=0
echo " {"
while [[ ${d} -lt ${headings} ]]; do
item="$(csv_nextField "${each_line}")"
each_line="${each_line:${?}}"
echo -n " \"${head_array[${d}]}\": "
case "${item}" in
"")
echo -n "null"
;;
null|true|false|\"*\"|+([0123456789]))
echo -n ${item}
;;
*)
echo -n "\"${item}\""
;;
esac
d=$(( ${d} + 1 ))
[[ ${d} -lt ${headings} ]] && echo "," || echo ""
done
echo -n " }"
c=$(( ${c} + 1 ))
[[ ${c} -lt ${lines_num} ]] && echo "," || echo ""
fi
done <<< "${lines_str}"
echo "]"
Had some issues with this on MacOS as every echo inserted a line break, also had ^M at the end of the last header and last item in every line. My input files also had some blank lines that caused issues, so had to find a solution for that.
Using @jwkblades first version as the comma issue did not impact me, and if it would become an issue I will use a different delimiter.
refactored the code with the following changes:
- removed all blank lines in the input file
- replaced all echo's with printf
- removed ^M form the last field
- simplified the code some more
Now works well against my files on both MacOS and Ubuntu
EDIT
Will work with piped stdin i.e. sqlite3 output or read from a file as per @outwitevil
Add the ability to quote unquoted ip adressess.
#!/bin/bash
# CSV to JSON converter using BASH
# original script from https://gist.github.com/dsliberty/3de707bc656cf757a0cb
# Usage ./csv2json.sh input.csv > output.json
shopt -s extglob
input="${1:-/dev/stdin}"
SEP=","
[ -z "${input}" ] && printf "No CSV input file specified" && exit 1
[ ! -e "${input}" ] && printf "Unable to locate ${input}" && exit 1
data=$(sed '/^$/d' "${input}")
line_count=$(printf "${data}" | wc -l)
printf "[\n"
row=0
while IFS=$'\n\r' read -r line; do
if [[ ${row} -eq 0 ]]; then
IFS="$SEP" read -ra head_items <<< "${line}"
else
IFS="$SEP" read -ra line_items <<< "${line}"
printf "\t{\n"
col=0
for item in "${line_items[@]}"; do
printf "\t\t\"${head_items[${col}]}\": "
case ${item} in
\"\")
printf "null"
;;
\"*\")
printf "${item}"
;;
*.*.*.*)
printf "\"${item}\""
;;
null|true|false|+([0-9.]))
printf "${item}"
;;
*)
printf "\"${item}\""
;;
esac
(( col++ ))
[[ ${col} -lt ${#head_items[@]} ]] && printf ",\n" || printf "\n"
done
printf "\t}"
[[ ${row} -lt ${line_count} ]] && printf ",\n" || printf "\n"
fi
(( row++ ))
done <<< "${data}"
printf "]"
Many thanks to each of you! just what I was looking for!
Here's a quick and dirty version that works on shells like Busybox ash that don't handle arrays
function CSV2JSON {
local inputFile="${1}" # Input csv text
local separator="${2:-,}" # Separator, defaults to ','
local lineCounter=0
local numberOfHeadings=0
local headingsCounter=0
local elementNumber=0
# Since we do not have arrays in ash, we assign elements via eval "header$number"
# variables header[0-9]* cannot be declared as local
echo -e "\t["
while IFS= read -r line; do
if [ "$line" == "" ] || [ "${line:0:1}" == "#" ]; then
continue
fi
if [ $lineCounter -eq 0 ]; then
numberOfHeadings=$(echo $line | awk -F"$separator" {'print NF'})
while [ $headingsCounter -lt $numberOfHeadings ]; do
eval "header$headingsCounter=\"$(echo $line | awk -v x=$((headingsCounter+1)) -F"$separator" '{print $x}')\""
headingsCounter=$((headingsCounter+1))
done
else
echo -e "\t\t{"
elementNumber=0
while [ $elementNumber -lt $numberOfHeadings ]; do
element="$(echo $line | awk -v y=$(($elementNumber+1)) -F"$separator" '{print $y}')"
if [ "$element" == "" ]; then
element="null"
fi
eval "echo -e \"\\t\\t\\\"\$header$elementNumber\\\" : \\\"$element\\\",\""
elementNumber=$((elementNumber+1))
done
echo -e "\t\t},"
fi
lineCounter=$((lineCounter+1))
done < "$inputFile"
echo -e "\t],"
}
Here's another version that works with Busybox, and does not rely on eval, altough using cut in order to find corresponding headers has a little performance impact.
Function should be highly portable
function CSV2JSON2 {
local inputFile="${1}" # Input csv text
local separator="${2:-,}" # Separator, defaults to ','
local firstLine
local lineCounter=0
local numberOfHeadings=0
local elementNumber=0
local element
while IFS= read -r line; do
if [ "$line" == "" ] || [ "${line:0:1}" == "#" ]; then
continue
fi
if [ $lineCounter -eq 0 ]; then
numberOfHeadings=$(($(echo $line | grep -o "$separator" | wc -l)+1))
firstLine="$line"
else
echo -e "\t\t{"
elementNumber=1
while [ $elementNumber -lt $numberOfHeadings ]; do
headerElement="$(echo $firstLine | cut -d"$separator" -f$elementNumber)"
element="$(echo $line | cut -d"$separator" -f$elementNumber)"
if [ "$element" == "" ]; then
element="null"
fi
echo -e "\t\t\"$headerElement\" : \"$element\","
elementNumber=$((elementNumber+1))
done
echo -e "\t\t},"
fi
lineCounter=$((lineCounter+1))
done < "$inputFile"
# Add [ ] if more than one item in list
if [ $lineCounter -gt 2 ]; then
sed -i.tmp '1s/^/\t[\n/' "$outputFile"
echo -e "\t]," >> "$outputFile"
fi
}
Use with jsonOutput = "$(CSV2JSON2 "inputFile")"
@linosteenkamp's version does not work with CSV that contain quoted comma (",") e.g. printf "head1,head2,head3\n1,\"foo, bar, baz\",\"foo bar baz\"" | ./csv2json.sh
will result in
[
{
"head1": 1,
"head2": ""foo",
"head3": " bar"
"": " baz""
"": "foo bar baz"
}
]
Quick fix for @outwitevil's script (https://gist.github.com/dsliberty/3de707bc656cf757a0cb#gistcomment-2103308) is to replace the \r
in the sed
regex with $(printf '\r')
. The script will still struggle with empty lines, so you have to delete them beforehand. A simple one-liner
printf "head1,head2,head3\n\n\n1,\"foo, bar, baz\",\"foo bar baz\"\n\n" | sed '/^[[:space:]]*$/d' | ./csv2json.sh
[
{
"head1": 1,
"head2": "foo, bar, baz",
"head3": "foo bar baz"
}
]
I haven't checked if there are any side effects on Linux now.
#!/bin/bash
# CSV to JSON converter using BASH
# original script from https://gist.github.com/dsliberty/3de707bc656cf757a0cb
# Usage ./csv2json.sh input.csv > output.json
# cat <input.csv> | csv2json > output.json
#set -x
shopt -s extglob
input="${1:-/dev/stdin}"
SEP=","
[ -z "${input}" ] && echo "No CSV input file specified" && exit 1
[ ! -e "${input}" ] && echo "Unable to locate ${input}" && exit 1
csv_nextField()
{
local line="$(echo "${1}" | sed 's/$(printf '\r')//g')"
local start=0
local stop=0
if [[ -z "${line}" ]]; then
return 0
fi
local offset=0
local inQuotes=0
while [[ -n "${line}" ]]; do
local char="${line:0:1}"
line="${line:1}"
if [[ "${char}" == "${SEP}" && ${inQuotes} -eq 0 ]]; then
inQuotes=0
break
elif [[ "${char}" == '"' ]]; then
if [[ ${inQuotes} -eq 1 ]]; then
inQuotes=0
else
inQuotes=1
fi
else
echo -n "${char}"
fi
offset=$(( ${offset} + 1 ))
done
echo ""
return $(( ${offset} + 1 ))
}
read -r first_line < "${input}"
a=0
headings=$(echo "${first_line}" | awk -F"${SEP}" {'print NF'})
if [ "${input}" = "/dev/stdin" ]; then
while read -r line
do
lines_str+="$line"$'\n'
c=1
done < "${input}"
else
lines_str="$(cat "${input}")"
c=0
fi
lines_num=$(echo "${lines_str}" | wc -l)
while [[ ${a} -lt ${headings} ]]; do
field="$(csv_nextField "${first_line}")"
first_line="${first_line:${?}}"
head_array[${a}]="${field}"
a=$(( ${a} + 1 ))
done
#c=0
echo "["
while [ ${c} -lt ${lines_num} ]
do
read -r each_line
each_line="$(echo "${each_line}" | sed 's/$(printf '\r')//g')"
if [[ ${c} -eq 0 ]]; then
c=$(( ${c} + 1 ))
else
d=0
echo " {"
while [[ ${d} -lt ${headings} ]]; do
item="$(csv_nextField "${each_line}")"
each_line="${each_line:${?}}"
echo -n " \"${head_array[${d}]}\": "
case "${item}" in
"")
echo -n "null"
;;
null|true|false|\"*\"|+([0123456789]))
echo -n ${item}
;;
*)
echo -n "\"${item}\""
;;
esac
d=$(( ${d} + 1 ))
[[ ${d} -lt ${headings} ]] && echo "," || echo ""
done
echo -n " }"
c=$(( ${c} + 1 ))
[[ ${c} -lt ${lines_num} ]] && echo "," || echo ""
fi
done <<< "${lines_str}"
echo "]"
#!/bin/bash
CSV to JSON converter using BASH
Usage ./csv2json input.csv > output.json
input=$1
[ -z $1 ] && echo "No CSV input file specified" && exit 1
[ ! -e $input ] && echo "Unable to locate $1" && exit 1
read first_line < $input
a=0
headings=echo $first_line | awk -F, {'print NF'}
lines=cat $input | wc -l
while [ $a -lt $headings ]
do
head_array[$a]=$(echo
a=$(($a+1))
done
c=0
echo "["
while [ $c -le $lines ]
do
read each_line
if [ $c -ne 0 ]; then
d=0
echo -n "{"
while [ $d -lt $headings ]
do
each_element=$(echo
if [
echo -n ""${head_array[$d]}":"$each_element","
else
echo -n ""${head_array[$d]}":"$each_element""
fi
d=$(($d+1))
done
if [
echo "}"
else
echo "},"
fi
fi
c=$(($c+1))
done < $input
echo "]"
This should give with quatation and array of json objects
Question, the script runs fine but does not output a json file?
Problem if field value have more than 254 characters.After that field every other field will the same
I have a field that has the following value
"doc":0000000000000000000000000000000000000000000000000000000000000000,
what is interesting is that the all zeros is failing to be parsed by JSON tools ...
they either want a 0 or a "0000000000000000000000000000000000000000000000000000000000000000"
Is there a way that we can put quotes around all values even if they are numbers? or is that outside the accepted formatting of JSON?
@jwkblades beautiful piece there