Created
February 8, 2023 21:02
-
-
Save n0099/8a8cb3899aee9099192dbd192548a3cd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Text.Encodings.Web; | |
using static System.Text.Json.JsonSerializer; | |
namespace tbm.Crawler; | |
public class MigrationWorker : BackgroundService | |
{ | |
private readonly ILogger<MigrationWorker> _logger; | |
private readonly ILifetimeScope _scope0; | |
public MigrationWorker(ILogger<MigrationWorker> logger, ILifetimeScope scope0) | |
{ | |
_logger = logger; | |
_scope0 = scope0; | |
} | |
private static readonly JsonSerializerOptions JsonSerializerOptions = new() | |
{ | |
IncludeFields = true, | |
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping, | |
// DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault | |
}; | |
protected override async Task ExecuteAsync(CancellationToken stoppingToken) | |
{ | |
await using var scope1 = _scope0.BeginLifetimeScope(); | |
var db = scope1.Resolve<TbmDbContext.New>()(0); | |
var fids = from f in db.Forum select f.Fid; | |
foreach (var fid in fids) | |
{ | |
stoppingToken.ThrowIfCancellationRequested(); | |
_logger.LogInformation("converting for fid:{} started", fid); | |
Convert(fid, stoppingToken); | |
_logger.LogInformation("converting for fid:{} finished", fid); | |
} | |
Environment.Exit(0); | |
} | |
private void Convert(Fid fid, CancellationToken stoppingToken) | |
{ | |
using var scope1 = _scope0.BeginLifetimeScope(); | |
var db = scope1.Resolve<TbmDbContext.New>()(fid); | |
var db2 = scope1.Resolve<TbmDbContext.New>()(fid); | |
var replies = from p in db.ReplyContents where p.Content != null select p; | |
var i = 0; | |
using var process = Process.GetCurrentProcess(); | |
var stopwatch = new Stopwatch(); | |
stopwatch.Start(); | |
var exceptions = new Dictionary<string, (uint times, ulong pid, string content)>(); | |
var repliesWithImage = new List<ReplyPost>(10000); | |
void SaveAndLog() | |
{ | |
ReplySaver.SaveReplyContentImages(db2, repliesWithImage); | |
var imagesInserted = db2.SaveChanges(); | |
db2.ChangeTracker.Clear(); | |
repliesWithImage.Clear(); | |
repliesWithImage.EnsureCapacity(10000); | |
_logger.LogTrace("i:{} imagesInserted:{} elapsed:{}ms mem:{}mb exceptions:{}", | |
i, imagesInserted, | |
stopwatch.ElapsedMilliseconds, | |
process.PrivateMemorySize64 / 1024 / 1024, | |
Serialize(exceptions, JsonSerializerOptions)); | |
stopwatch.Restart(); | |
} | |
foreach (var reply in replies.AsNoTracking()) | |
{ | |
i++; | |
if (i % 10000 == 0) SaveAndLog(); | |
if (stoppingToken.IsCancellationRequested) break; | |
if (reply.Content == null) continue; | |
var content = PostContentWrapper.Parser.ParseFrom(reply.Content).Value; | |
repliesWithImage.Add(new() {Pid = reply.Pid, OriginalContents = content}); | |
} | |
SaveAndLog(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment