Last active
September 30, 2016 07:21
-
-
Save jesuslpm/07fb8121b1747bd69fb43581c4a576d9 to your computer and use it in GitHub Desktop.
StackOverflow THAT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="utf-8" ?> | |
<rows> | |
<row Id="4" | |
PostTypeId="1" | |
Body="This is the body of question 4" | |
Title="When setting a form's opacity should I use a decimal or double" | |
AnswerCount="5" | |
/> | |
<row Id="6" | |
PostTypeId="1" | |
Body="This is the body of question 6" | |
Title="Percentage width child element in absolutely positioned parent on Internet Explorer 7" | |
AnswerCount="5" | |
/> | |
<row Id="7" | |
PostTypeId="1" | |
ParentId="4" | |
Body="This is the body of answer 7 of question 4" | |
/> | |
</rows> | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Xml; | |
using System.Text; | |
using System.Xml.Linq; | |
using System.IO.Compression; | |
namespace StackOverflowThat | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
PreparationPhase(); | |
OutputPhase(); | |
} | |
static void PreparationPhase() | |
{ | |
var postsFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Posts.xml"); | |
using (var textReader = new StreamReader(postsFilePath, Encoding.UTF8)) | |
using (var xmlReader = new XmlTextReader(textReader)) | |
{ | |
foreach (var row in EnumerateRows(xmlReader)) | |
{ | |
ProcessRow(row); | |
} | |
} | |
} | |
static IEnumerable<XElement> EnumerateRows(XmlReader reader) | |
{ | |
reader.MoveToContent(); | |
while (reader.Read()) | |
{ | |
if (reader.NodeType == XmlNodeType.Element) | |
{ | |
yield return XNode.ReadFrom(reader) as XElement; | |
} | |
} | |
} | |
static void ProcessRow(XElement row) | |
{ | |
var parentId = (string)row.Attribute("ParentId"); | |
if (parentId == null) | |
{ | |
var filePath = AppDomain.CurrentDomain.BaseDirectory + GetFilePath((string)row.Attribute("Id")); | |
var folderPath = Path.GetDirectoryName(filePath); | |
if (!Directory.Exists(folderPath)) Directory.CreateDirectory(folderPath); | |
row.Name = "question"; | |
File.WriteAllText(filePath, row.ToString(), Encoding.UTF8); | |
} | |
else | |
{ | |
var filePath = AppDomain.CurrentDomain.BaseDirectory + GetFilePath(parentId); | |
XElement question; | |
using (var reader = new StreamReader(filePath, Encoding.UTF8)) | |
{ | |
question = XElement.Load(reader); | |
} | |
row.Name = "answer"; | |
question.Add(row); | |
File.WriteAllText(filePath, question.ToString(), Encoding.UTF8); | |
} | |
} | |
/// <summary> | |
/// Gets the file path given the row id. | |
/// Files are organized in folders, each folder can have up to 1000 files | |
/// Large id's produce deeper paths. | |
/// For example: id = 1234567, path = 1\234\567.xml | |
/// I do this to avoid to have too many files in a single folder. | |
/// </summary> | |
/// <param name="id"></param> | |
/// <returns></returns> | |
static string GetFilePath(string id) | |
{ | |
string path = ".xml"; | |
string remainingString = id; | |
while (remainingString.Length > 0) | |
{ | |
var startIndex = remainingString.Length - 3; | |
if (startIndex < 0) startIndex = 0; | |
path = "\\" + remainingString.Substring(startIndex) + path; | |
remainingString = remainingString.Substring(0, startIndex); | |
} | |
return path; | |
} | |
static void OutputPhase() | |
{ | |
var outputFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Ouput.gzip"); | |
using (var stream = File.Open(outputFilePath, FileMode.Create, FileAccess.Write)) | |
using (var gzip = new GZipStream(stream, CompressionMode.Compress)) | |
{ | |
AddXmlFilesToStream(AppDomain.CurrentDomain.BaseDirectory, gzip); | |
} | |
} | |
static void AddXmlFilesToStream(string folderPath, Stream output) | |
{ | |
foreach (var filePath in Directory.EnumerateFiles(folderPath, "*.xml")) | |
{ | |
using (var file = File.OpenRead(filePath)) | |
{ | |
file.CopyTo(output); | |
} | |
} | |
foreach (var subDirectory in Directory.EnumerateDirectories(folderPath)) | |
{ | |
AddXmlFilesToStream(subDirectory, output); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Xml; | |
using System.Text; | |
using System.Xml.Linq; | |
using System.IO.Compression; | |
namespace StackOverflowThat | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
PreparationPhase(); | |
OutputPhase(); | |
} | |
static void PreparationPhase() | |
{ | |
WriteQuestionFiles(); | |
AddAnswersToQuestions(); | |
} | |
static void WriteQuestionFiles() | |
{ | |
foreach (var row in EnumerateRows()) | |
{ | |
var parentId = (string)row.Attribute("ParentId"); | |
if (parentId == null) | |
{ | |
var filePath = AppDomain.CurrentDomain.BaseDirectory + GetFilePath((string)row.Attribute("Id")); | |
var folderPath = Path.GetDirectoryName(filePath); | |
if (!Directory.Exists(folderPath)) Directory.CreateDirectory(folderPath); | |
row.Name = "question"; | |
File.WriteAllText(filePath, row.ToString(), Encoding.UTF8); | |
} | |
} | |
} | |
static void AddAnswersToQuestions() | |
{ | |
foreach (var row in EnumerateRows()) | |
{ | |
var parentId = (string)row.Attribute("ParentId"); | |
if (parentId != null) | |
{ | |
var filePath = AppDomain.CurrentDomain.BaseDirectory + GetFilePath(parentId); | |
XElement question; | |
using (var reader = new StreamReader(filePath, Encoding.UTF8)) | |
{ | |
question = XElement.Load(reader); | |
} | |
row.Name = "answer"; | |
question.Add(row); | |
File.WriteAllText(filePath, question.ToString(), Encoding.UTF8); | |
} | |
} | |
} | |
static IEnumerable<XElement> EnumerateRows() | |
{ | |
var postsFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Posts.xml"); | |
using (var textReader = new StreamReader(postsFilePath, Encoding.UTF8)) | |
using (var reader = new XmlTextReader(textReader)) | |
{ | |
reader.MoveToContent(); | |
while (reader.Read()) | |
{ | |
if (reader.NodeType == XmlNodeType.Element) | |
{ | |
yield return XNode.ReadFrom(reader) as XElement; | |
} | |
} | |
} | |
} | |
/// <summary> | |
/// Gets the file path given the row id. | |
/// Files are organized in folders, each folder can have up to 1000 files | |
/// Large id's produce deeper paths. | |
/// For example: id = 1234567, path = 1\234\567.xml | |
/// I do this to avoid to have too many files in a single folder. | |
/// </summary> | |
/// <param name="id"></param> | |
/// <returns></returns> | |
static string GetFilePath(string id) | |
{ | |
string path = ".xml"; | |
string remainingString = id; | |
while (remainingString.Length > 0) | |
{ | |
var startIndex = remainingString.Length - 3; | |
if (startIndex < 0) startIndex = 0; | |
path = "\\" + remainingString.Substring(startIndex) + path; | |
remainingString = remainingString.Substring(0, startIndex); | |
} | |
return path; | |
} | |
static void OutputPhase() | |
{ | |
var outputFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Ouput.gzip"); | |
using (var stream = File.Open(outputFilePath, FileMode.Create, FileAccess.Write)) | |
using (var gzip = new GZipStream(stream, CompressionMode.Compress)) | |
{ | |
AddXmlFilesToStream(AppDomain.CurrentDomain.BaseDirectory, gzip); | |
} | |
} | |
static void AddXmlFilesToStream(string folderPath, Stream output) | |
{ | |
foreach (var filePath in Directory.EnumerateFiles(folderPath, "*.xml")) | |
{ | |
using (var file = File.OpenRead(filePath)) | |
{ | |
file.CopyTo(output); | |
} | |
} | |
foreach (var subDirectory in Directory.EnumerateDirectories(folderPath)) | |
{ | |
AddXmlFilesToStream(subDirectory, output); | |
} | |
} | |
} | |
} |
Program.cs assumes answers come later than questions in posts.xml. Program2.cs doesn't, therefore it needs to read posts.xml twice.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is the response to this interview question from ayende https://ayende.com/blog/175617/interview-question-stackoverflow-that