Skip to content

Instantly share code, notes, and snippets.

@leppie
Created August 2, 2016 15:25
Show Gist options
  • Save leppie/3ce0fa0ff332d628740cf2eba0d9db8a to your computer and use it in GitHub Desktop.
Save leppie/3ce0fa0ff332d628740cf2eba0d9db8a to your computer and use it in GitHub Desktop.
Importing post tags like a boss!
using System;
using System.Data.SqlClient;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml.Linq;
using System.Xml;
using Dapper;
namespace SOImporter
{
class Program
{
class Tag { public int Id; public string Name; }
static void Main(string[] args)
{
var xmlr = XmlTextReader.Create("Posts.xml");
xmlr.ReadToDescendant("row");
var tagre = new Regex(@"<(?<tag>[\.#+0-9a-z-]+)>", RegexOptions.Compiled);
using (var con = new SqlConnection("Data Source=.;Initial Catalog=StackOverflow;Integrated Security=True"))
{
con.Open();
con.Execute("truncate table PostTag");
var lu = con.Query<Tag>("select Id, [Name] from Tag").ToDictionary(x => x.Name, x => x.Id);
long count = 0, tc = 0;
SqlTransaction trans = con.BeginTransaction();
do
{
var e = XElement.Parse(xmlr.ReadOuterXml());
var pt = e.Attribute("PostTypeId").Value;
if (pt == "1")
{
var postId = int.Parse(e.Attribute("Id").Value);
var tags = e.Attribute("Tags").Value;
int tagId = 0;
var posttags = from Match m in tagre.Matches(tags)
let _ = lu.TryGetValue(m.Groups["tag"].Value, out tagId)
select new
{
postId,
tagId
};
var n = con.Execute(@"insert PostTag(PostId, TagId) values(@postId, @tagId)", posttags, transaction: trans);
count += n;
tc += n;
if (tc > 10000)
{
trans.Commit();
trans = con.BeginTransaction();
tc = 0;
Console.Write("o");
}
}
}
while (xmlr.ReadToNextSibling("row"));
trans.Commit();
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment