Skip to content

Instantly share code, notes, and snippets.

@margusmartsepp
Last active December 14, 2015 12:58
Show Gist options
  • Select an option

  • Save margusmartsepp/5089683 to your computer and use it in GitHub Desktop.

Select an option

Save margusmartsepp/5089683 to your computer and use it in GitHub Desktop.
CSV to JSON stream converter. Use: mono CSVtoJSON.exe "input.csv" "output.json" "optional:delimiter"
using System;
using System.Text;
using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace CSVtoJSON
{
class MainClass
{
protected static int origRow;
protected static int origCol;
protected static void WriteAt (string s, int x, int y)
{
try {
Console.SetCursorPosition (origCol + x, origRow + y);
Console.Write (s);
} catch (ArgumentOutOfRangeException e) {
Console.Clear ();
Console.WriteLine (e.Message);
}
}
public static List<String> qLines (string input, string delimiter = ",")
{
if (input == null)
throw new ArgumentNullException ("Must have data");
var result = new List<String> ();
var pattern = new Regex (@"
\s* # Ignore leading whitespace.
(?: # Group of value alternatives.
"" # Either a double quoted string,
(?<val> # Capture contents between quotes.
[^""]*(""""[^""]*)* # Zero or more non-quotes, allowing
) # doubled "" quotes within string.
""\s* # Ignore whitespace following quote.
| (?<val>[^" + delimiter +
@"]*) # Or... zero or more non-delimiter.
) # End value alternatives group.
(?:" + delimiter + "|$) # Match end is delimiter or EOS",
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
try {
var matchResult = pattern.Match (input);
while (matchResult.Success) {
result.Add (matchResult.Groups ["val"].Value);
matchResult = matchResult.NextMatch ();
}
} catch (ArgumentException ex) {
throw ex;
}
result.Remove ("");
return result;
}
public static void StreamConvertCSVtoJSON (string pathIn, string pathOut, string delimiter = ",")
{
var isCommaTime = false;
var i = 0;
var j = 0;
try {
if (File.Exists (pathOut))
File.Delete (pathOut);
using (StreamWriter sw = new StreamWriter(new FileStream(
pathOut, FileMode.CreateNew, FileAccess.ReadWrite),Encoding.UTF8))
using (StreamReader sr = new StreamReader(new FileStream(
pathIn, FileMode.Open, FileAccess.Read), Encoding.UTF8)) {
if (sr.Peek () < 0)
return;
var headers = qLines (sr.ReadLine (), delimiter);
sw.WriteLine ("[");
while (sr.Peek() >= 0) {
var fields = qLines (sr.ReadLine (), delimiter);
if (fields.Count != headers.Count)
i++;
else
j++;
var jsonElements = headers.Zip (
fields,
(header, field) => string.Format (
"\"{0}\": \"{1}\"", header, fieldFix (field))).ToArray ();
string jsonObject = "{" + string.Format (
"{0}", string.Join (",", jsonElements)) + "}";
if (isCommaTime)
jsonObject = "," + jsonObject;
sw.WriteLine (jsonObject);
isCommaTime = true;
if ((i + j) % 1000 == 0)
progressUpdate (i, j);
}
sw.WriteLine ("]");
progressUpdate (i, j);
}
} catch (Exception e) {
throw new Exception (string.Format ("The process failed: {0}", e.ToString ()));
}
}
private static string fieldFix (string vars)
{
return vars.Replace ("\"", "'");
}
public static void progressUpdate (int i, int j)
{
WriteAt ("" + j + " successful lines.", 0, 0);
WriteAt ("" + i + " warning(s): Field count does not match header count.", 0, 1);
WriteAt ("", 0, 2);
}
public static void Main (string[] args)
{
origRow = Console.CursorTop;
origCol = Console.CursorLeft;
if (args.Length < 2) {
Console.WriteLine ("Use: mono CSVtoJSON.exe \"input.csv\" \"output.json\" \"optional:delimiter\"");
return;
}
if (args.Length == 2) {
StreamConvertCSVtoJSON (args [0], args [1]);
} else {
StreamConvertCSVtoJSON (args [0], args [1], args [2]);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment