Last active
December 14, 2015 12:58
-
-
Save margusmartsepp/5089683 to your computer and use it in GitHub Desktop.
CSV to JSON stream converter.
Use: mono CSVtoJSON.exe "input.csv" "output.json" "optional:delimiter"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Text; | |
| using System.IO; | |
| using System.Linq; | |
| using System.Collections.Generic; | |
| using System.Text.RegularExpressions; | |
| namespace CSVtoJSON | |
| { | |
| class MainClass | |
| { | |
| protected static int origRow; | |
| protected static int origCol; | |
| protected static void WriteAt (string s, int x, int y) | |
| { | |
| try { | |
| Console.SetCursorPosition (origCol + x, origRow + y); | |
| Console.Write (s); | |
| } catch (ArgumentOutOfRangeException e) { | |
| Console.Clear (); | |
| Console.WriteLine (e.Message); | |
| } | |
| } | |
| public static List<String> qLines (string input, string delimiter = ",") | |
| { | |
| if (input == null) | |
| throw new ArgumentNullException ("Must have data"); | |
| var result = new List<String> (); | |
| var pattern = new Regex (@" | |
| \s* # Ignore leading whitespace. | |
| (?: # Group of value alternatives. | |
| "" # Either a double quoted string, | |
| (?<val> # Capture contents between quotes. | |
| [^""]*(""""[^""]*)* # Zero or more non-quotes, allowing | |
| ) # doubled "" quotes within string. | |
| ""\s* # Ignore whitespace following quote. | |
| | (?<val>[^" + delimiter + | |
| @"]*) # Or... zero or more non-delimiter. | |
| ) # End value alternatives group. | |
| (?:" + delimiter + "|$) # Match end is delimiter or EOS", | |
| RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); | |
| try { | |
| var matchResult = pattern.Match (input); | |
| while (matchResult.Success) { | |
| result.Add (matchResult.Groups ["val"].Value); | |
| matchResult = matchResult.NextMatch (); | |
| } | |
| } catch (ArgumentException ex) { | |
| throw ex; | |
| } | |
| result.Remove (""); | |
| return result; | |
| } | |
| public static void StreamConvertCSVtoJSON (string pathIn, string pathOut, string delimiter = ",") | |
| { | |
| var isCommaTime = false; | |
| var i = 0; | |
| var j = 0; | |
| try { | |
| if (File.Exists (pathOut)) | |
| File.Delete (pathOut); | |
| using (StreamWriter sw = new StreamWriter(new FileStream( | |
| pathOut, FileMode.CreateNew, FileAccess.ReadWrite),Encoding.UTF8)) | |
| using (StreamReader sr = new StreamReader(new FileStream( | |
| pathIn, FileMode.Open, FileAccess.Read), Encoding.UTF8)) { | |
| if (sr.Peek () < 0) | |
| return; | |
| var headers = qLines (sr.ReadLine (), delimiter); | |
| sw.WriteLine ("["); | |
| while (sr.Peek() >= 0) { | |
| var fields = qLines (sr.ReadLine (), delimiter); | |
| if (fields.Count != headers.Count) | |
| i++; | |
| else | |
| j++; | |
| var jsonElements = headers.Zip ( | |
| fields, | |
| (header, field) => string.Format ( | |
| "\"{0}\": \"{1}\"", header, fieldFix (field))).ToArray (); | |
| string jsonObject = "{" + string.Format ( | |
| "{0}", string.Join (",", jsonElements)) + "}"; | |
| if (isCommaTime) | |
| jsonObject = "," + jsonObject; | |
| sw.WriteLine (jsonObject); | |
| isCommaTime = true; | |
| if ((i + j) % 1000 == 0) | |
| progressUpdate (i, j); | |
| } | |
| sw.WriteLine ("]"); | |
| progressUpdate (i, j); | |
| } | |
| } catch (Exception e) { | |
| throw new Exception (string.Format ("The process failed: {0}", e.ToString ())); | |
| } | |
| } | |
| private static string fieldFix (string vars) | |
| { | |
| return vars.Replace ("\"", "'"); | |
| } | |
| public static void progressUpdate (int i, int j) | |
| { | |
| WriteAt ("" + j + " successful lines.", 0, 0); | |
| WriteAt ("" + i + " warning(s): Field count does not match header count.", 0, 1); | |
| WriteAt ("", 0, 2); | |
| } | |
| public static void Main (string[] args) | |
| { | |
| origRow = Console.CursorTop; | |
| origCol = Console.CursorLeft; | |
| if (args.Length < 2) { | |
| Console.WriteLine ("Use: mono CSVtoJSON.exe \"input.csv\" \"output.json\" \"optional:delimiter\""); | |
| return; | |
| } | |
| if (args.Length == 2) { | |
| StreamConvertCSVtoJSON (args [0], args [1]); | |
| } else { | |
| StreamConvertCSVtoJSON (args [0], args [1], args [2]); | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment