This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
namespace MedsProcessor.Common.Models | |
{ | |
public class HzzoMedsDownloadDto | |
{ | |
private readonly string _rootLocation; | |
public HzzoMedsDownloadDto(string href, string validFrom, string rootLocation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISet<HzzoMedsDownloadDto> ParseHtmlDocuments(IDocument[] docs) => | |
docs.Aggregate( | |
new HashSet<HzzoMedsDownloadDto>(), | |
(docList, doc) => new HashSet<HzzoMedsDownloadDto>(docList.Concat(ParseHtmlDocument(doc))) | |
); | |
static ISet<HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable<IElement> elems) => | |
elems.Aggregate(new HashSet<HzzoMedsDownloadDto>(), (medsList, li) => | |
{ | |
var href = li.QuerySelector("a").GetAttribute("href"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ActionResult> Index([FromServices] HzzoHtmlScraper scraper) | |
{ | |
var startTime = DateTime.Now; | |
// TODO: implement scraper and parser logic | |
var meds = await scraper.Run(); | |
var totalTime = startTime - DateTime.Now; | |
return Ok( | |
$"Done! Handler duration: {totalTime.Duration()}" + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
namespace MedsProcessor.Common.Models | |
{ | |
public sealed class AppPathsInfo | |
{ | |
public AppPathsInfo(string appRootPath) | |
{ | |
if (appRootPath == null) | |
throw new ArgumentNullException(nameof(appRootPath)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void ConfigureServices(IServiceCollection services) | |
{ | |
services.AddAngleSharp(); | |
// adding the AppPathsInfo singleton instance carrying the ContentRootPath | |
// provided by the IHostingEnvironment injected instance | |
services.AddSingleton( | |
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath)); | |
services.AddSingleton<HzzoHtmlScraper>(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.IO; | |
using System.Net.Http; | |
using System.Threading.Tasks; | |
using MedsProcessor.Common.Models; | |
using static MedsProcessor.Common.Constants; | |
namespace MedsProcessor.Downloader | |
{ | |
public class HzzoExcelDownloader |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void ConfigureServices(IServiceCollection services) | |
{ | |
// we need the http client factory: | |
services.AddHttpClient(); | |
services.AddAngleSharp(); | |
services.AddSingleton( | |
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath)); | |
services.AddSingleton<HzzoHtmlScraper>(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ | |
// NOTE: throttle requests in parallel | |
var parallelismDegree = 5; | |
var waitBetweenRequestsMs = 500; | |
var savingItems = new List<Task>(); | |
var notDownloadedDocs = meds.Where(x => !x.IsAlreadyDownloaded).ToList(); | |
for (int i = 0; i < notDownloadedDocs.Count; i += parallelismDegree) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async Task<HzzoMedsDownloadDto> DownloadExcel(HzzoMedsDownloadDto doc) | |
{ | |
doc.DocumentStream = await _httpCli.GetStreamAsync(doc.Href); | |
return doc; | |
} | |
static Task SaveExcel(HzzoMedsDownloadDto doc) => | |
Task.Factory.StartNew(() => | |
{ | |
using(var fileStream = File.Create(doc.FilePath, BUFFER_SIZE, FileOptions.Asynchronous)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
readonly AppPathsInfo _appPathsInfo; | |
public HzzoHtmlScraper(IBrowsingContext browsingContext, AppPathsInfo appPathsInfo) | |
{ | |
this._browsingContext = browsingContext; | |
this._appPathsInfo = appPathsInfo; | |
} |