Skip to content

Instantly share code, notes, and snippets.

@phinett
Created March 15, 2012 12:47
Show Gist options
  • Save phinett/2044025 to your computer and use it in GitHub Desktop.
Save phinett/2044025 to your computer and use it in GitHub Desktop.
Import script from sql server to RavenDb
namespace HM.ImportFromNHibenate.ImportAudios
{
public static class ImportAudioHits
{
private class DummyAudio
{
public int NHibernateId { get; set; }
public string NewId { get; set; }
public string NewAccountId { get; set; }
}
public static void Execute()
{
bool haveResults = true;
const int pageSize = 5000;
int page = 344;
int totalPages = 0;
// this is the total amount of records to import
int total = 21047154;
totalPages = total/pageSize;
// keeps track of how many we have imported
int counter = 0;
// dictionary objects to store our looked up documents from ravenDb (actually only stores the required information to save memory)
IDictionary<int, DummyAudio> newAudios = new Dictionary<int, DummyAudio>();
IDictionary<int, string> newAccounts = new Dictionary<int, string>();
while (haveResults)
{
Stopwatch sp = Stopwatch.StartNew();
using (var nhSession = NHibernateSession.GetSessionFactoryFor(Program.TenantKey).OpenSession())
{
// fetch a paged list of AudioHits from SQL Server (NHibernate)
Stopwatch sp2 = Stopwatch.StartNew();
var hits = nhSession.QueryOver<Domain.AudioHit>()
.OrderBy(x => x.Audio.Id).Asc
.Skip(page * pageSize)
.Take(pageSize)
.List<Domain.AudioHit>();
// This is averaging around 400ms
Console.WriteLine(String.Format("\rQuery took: {0}ms", sp2.ElapsedMilliseconds));
// if no more results then we are done importing
if (!hits.Any())
haveResults = false;
// get a list of all the NHibernate Ids for our Audios
var audioIds = hits.Select(x => x.Audio.Id).ToList();
// filter out any IDs we have already looked up, save getting them again
audioIds = audioIds.Except(newAudios.Select(a => Convert.ToInt32(a.Key)).ToList()).Distinct().ToList();
// partition our audioIds into groups of 256 so we don't hit the limit of max request length
var audioGroups = audioIds.Partition(255).ToList();
Stopwatch sp3 = Stopwatch.StartNew();
foreach (var audioGroup in audioGroups)
{
// fetch all the new AudioIds from RavenDb by using the stored NHibernate Id from the previous imports
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey))
{
var audios = session.Query<Core.Documents.Audio>()
.Where(x => x.NhibernateAudioId.In(audioGroup))
.Select(x => new { NHibernateAudioId = x.NhibernateAudioId, NewId = x.Id, NewAccountId = x.AccountId})
.Take(256)
.ToList();
// ad them to our dictionary so we dont need to look the same Audio documents twice
foreach (var audio in audios)
newAudios.Add(audio.NHibernateAudioId, new DummyAudio() {NHibernateId = audio.NHibernateAudioId, NewId = audio.NewId, NewAccountId = audio.NewAccountId});
}
}
Console.WriteLine(String.Format("Audio lookup took: {0}ms", sp3.ElapsedMilliseconds)); // averaging around 900ms
// get a list of all the NHibernate Ids for our Accounts
var accountIds = hits.Where(x => x.Account != null).Select(x => x.Account.Id).ToList();
// filter out any Ids we have already looked up, so we don't need to lookup them twice from RavenDb
accountIds = accountIds.Except(newAccounts.Select(a => Convert.ToInt32(a.Key)).ToList()).Distinct().ToList();
// parttion our account Ids intogroups of 256
var accountGroups = accountIds.Partition(256).ToList();
Stopwatch sp4 = Stopwatch.StartNew();
foreach (var accountGroup in accountGroups)
{
// fetch all the new AccountIds from RavenDb by using the NHibernetAccountId we stored from the previous import of Accounts
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey))
{
var accounts = session.Query<Core.Documents.Account>()
.Where(x => x.NhibernateAccountId.In(accountGroup))
.Select(x => new { NhibernateAccountId = x.NhibernateAccountId, NewId = x.Id })
.Take(256)
.ToList();
foreach (var account in accounts)
newAccounts.Add(account.NhibernateAccountId, account.NewId);
}
}
Console.WriteLine(String.Format("Account lookup took: {0}ms", sp4.ElapsedMilliseconds)); // averaging around 5000ms
Stopwatch sp5 = Stopwatch.StartNew();
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey))
{
int i = 0;
// for each of our 5000 hits, insert into NHibernate
foreach (var hit in hits)
{
// legacy system contained null Audios due some deletes, ignore these
// also ignore any hit where we couldn't find the new Audio document (should be pretty rare if any at all)
if (hit.Audio == null || !newAudios.ContainsKey(hit.Audio.Id))
continue;
// convert our old enum to our new enum value
Audio.AudioStatCounterType newType = Audio.AudioStatCounterType.Play;
if (hit.HitType == Common.AudioHitType.Download)
newType = Audio.AudioStatCounterType.Download;
// some hits dont have an account, but if they do then get the accountid from our dictionary
string accountId = "";
if (hit.Account != null)
accountId = newAccounts[hit.Account.Id];
// create our new Stat document
var newHit = new AudioStatCounter(newAudios[hit.Audio.Id].NewId, newAudios[hit.Audio.Id].NewAccountId, newType, hit.Ip,
accountId)
{
DateTime = hit.DateAdded
};
session.Store(newHit);
// increment our counter
counter++;
// foreach 1024 documents call SaveChanges()
if (i == 1024)
{
session.SaveChanges();
i = 0;
}
}
// for any remaining documents we haven't saved
session.SaveChanges();
}
Console.WriteLine(String.Format("Insert took: {0}ms", sp5.ElapsedMilliseconds)); // averaging around 2500ms
// information on how long processing this 5000 records took
// Averaging: 9000ms
Console.WriteLine("Counter = " + counter);
Console.WriteLine(String.Format("\rPage {0} of {1} - took: {2}ms", page, totalPages,
sp.ElapsedMilliseconds));
page++;
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment