Created
March 15, 2012 12:47
-
-
Save phinett/2044025 to your computer and use it in GitHub Desktop.
Import script from sql server to RavenDb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace HM.ImportFromNHibenate.ImportAudios | |
{ | |
public static class ImportAudioHits | |
{ | |
private class DummyAudio | |
{ | |
public int NHibernateId { get; set; } | |
public string NewId { get; set; } | |
public string NewAccountId { get; set; } | |
} | |
public static void Execute() | |
{ | |
bool haveResults = true; | |
const int pageSize = 5000; | |
int page = 344; | |
int totalPages = 0; | |
// this is the total amount of records to import | |
int total = 21047154; | |
totalPages = total/pageSize; | |
// keeps track of how many we have imported | |
int counter = 0; | |
// dictionary objects to store our looked up documents from ravenDb (actually only stores the required information to save memory) | |
IDictionary<int, DummyAudio> newAudios = new Dictionary<int, DummyAudio>(); | |
IDictionary<int, string> newAccounts = new Dictionary<int, string>(); | |
while (haveResults) | |
{ | |
Stopwatch sp = Stopwatch.StartNew(); | |
using (var nhSession = NHibernateSession.GetSessionFactoryFor(Program.TenantKey).OpenSession()) | |
{ | |
// fetch a paged list of AudioHits from SQL Server (NHibernate) | |
Stopwatch sp2 = Stopwatch.StartNew(); | |
var hits = nhSession.QueryOver<Domain.AudioHit>() | |
.OrderBy(x => x.Audio.Id).Asc | |
.Skip(page * pageSize) | |
.Take(pageSize) | |
.List<Domain.AudioHit>(); | |
// This is averaging around 400ms | |
Console.WriteLine(String.Format("\rQuery took: {0}ms", sp2.ElapsedMilliseconds)); | |
// if no more results then we are done importing | |
if (!hits.Any()) | |
haveResults = false; | |
// get a list of all the NHibernate Ids for our Audios | |
var audioIds = hits.Select(x => x.Audio.Id).ToList(); | |
// filter out any IDs we have already looked up, save getting them again | |
audioIds = audioIds.Except(newAudios.Select(a => Convert.ToInt32(a.Key)).ToList()).Distinct().ToList(); | |
// partition our audioIds into groups of 256 so we don't hit the limit of max request length | |
var audioGroups = audioIds.Partition(255).ToList(); | |
Stopwatch sp3 = Stopwatch.StartNew(); | |
foreach (var audioGroup in audioGroups) | |
{ | |
// fetch all the new AudioIds from RavenDb by using the stored NHibernate Id from the previous imports | |
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey)) | |
{ | |
var audios = session.Query<Core.Documents.Audio>() | |
.Where(x => x.NhibernateAudioId.In(audioGroup)) | |
.Select(x => new { NHibernateAudioId = x.NhibernateAudioId, NewId = x.Id, NewAccountId = x.AccountId}) | |
.Take(256) | |
.ToList(); | |
// ad them to our dictionary so we dont need to look the same Audio documents twice | |
foreach (var audio in audios) | |
newAudios.Add(audio.NHibernateAudioId, new DummyAudio() {NHibernateId = audio.NHibernateAudioId, NewId = audio.NewId, NewAccountId = audio.NewAccountId}); | |
} | |
} | |
Console.WriteLine(String.Format("Audio lookup took: {0}ms", sp3.ElapsedMilliseconds)); // averaging around 900ms | |
// get a list of all the NHibernate Ids for our Accounts | |
var accountIds = hits.Where(x => x.Account != null).Select(x => x.Account.Id).ToList(); | |
// filter out any Ids we have already looked up, so we don't need to lookup them twice from RavenDb | |
accountIds = accountIds.Except(newAccounts.Select(a => Convert.ToInt32(a.Key)).ToList()).Distinct().ToList(); | |
// parttion our account Ids intogroups of 256 | |
var accountGroups = accountIds.Partition(256).ToList(); | |
Stopwatch sp4 = Stopwatch.StartNew(); | |
foreach (var accountGroup in accountGroups) | |
{ | |
// fetch all the new AccountIds from RavenDb by using the NHibernetAccountId we stored from the previous import of Accounts | |
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey)) | |
{ | |
var accounts = session.Query<Core.Documents.Account>() | |
.Where(x => x.NhibernateAccountId.In(accountGroup)) | |
.Select(x => new { NhibernateAccountId = x.NhibernateAccountId, NewId = x.Id }) | |
.Take(256) | |
.ToList(); | |
foreach (var account in accounts) | |
newAccounts.Add(account.NhibernateAccountId, account.NewId); | |
} | |
} | |
Console.WriteLine(String.Format("Account lookup took: {0}ms", sp4.ElapsedMilliseconds)); // averaging around 5000ms | |
Stopwatch sp5 = Stopwatch.StartNew(); | |
using (var session = Program.DocumentStore.OpenSession(Program.TenantKey)) | |
{ | |
int i = 0; | |
// for each of our 5000 hits, insert into NHibernate | |
foreach (var hit in hits) | |
{ | |
// legacy system contained null Audios due some deletes, ignore these | |
// also ignore any hit where we couldn't find the new Audio document (should be pretty rare if any at all) | |
if (hit.Audio == null || !newAudios.ContainsKey(hit.Audio.Id)) | |
continue; | |
// convert our old enum to our new enum value | |
Audio.AudioStatCounterType newType = Audio.AudioStatCounterType.Play; | |
if (hit.HitType == Common.AudioHitType.Download) | |
newType = Audio.AudioStatCounterType.Download; | |
// some hits dont have an account, but if they do then get the accountid from our dictionary | |
string accountId = ""; | |
if (hit.Account != null) | |
accountId = newAccounts[hit.Account.Id]; | |
// create our new Stat document | |
var newHit = new AudioStatCounter(newAudios[hit.Audio.Id].NewId, newAudios[hit.Audio.Id].NewAccountId, newType, hit.Ip, | |
accountId) | |
{ | |
DateTime = hit.DateAdded | |
}; | |
session.Store(newHit); | |
// increment our counter | |
counter++; | |
// foreach 1024 documents call SaveChanges() | |
if (i == 1024) | |
{ | |
session.SaveChanges(); | |
i = 0; | |
} | |
} | |
// for any remaining documents we haven't saved | |
session.SaveChanges(); | |
} | |
Console.WriteLine(String.Format("Insert took: {0}ms", sp5.ElapsedMilliseconds)); // averaging around 2500ms | |
// information on how long processing this 5000 records took | |
// Averaging: 9000ms | |
Console.WriteLine("Counter = " + counter); | |
Console.WriteLine(String.Format("\rPage {0} of {1} - took: {2}ms", page, totalPages, | |
sp.ElapsedMilliseconds)); | |
page++; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment