This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void ConfigureServices(IServiceCollection services) | |
{ | |
services.AddAngleSharp(); | |
// adding the AppPathsInfo singleton instance carrying the ContentRootPath | |
// provided by the IHostingEnvironment injected instance | |
services.AddSingleton( | |
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath)); | |
services.AddSingleton<HzzoHtmlScraper>(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
namespace MedsProcessor.Common.Models | |
{ | |
public sealed class AppPathsInfo | |
{ | |
public AppPathsInfo(string appRootPath) | |
{ | |
if (appRootPath == null) | |
throw new ArgumentNullException(nameof(appRootPath)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ActionResult> Index([FromServices] HzzoHtmlScraper scraper) | |
{ | |
var startTime = DateTime.Now; | |
// TODO: implement scraper and parser logic | |
var meds = await scraper.Run(); | |
var totalTime = startTime - DateTime.Now; | |
return Ok( | |
$"Done! Handler duration: {totalTime.Duration()}" + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISet<HzzoMedsDownloadDto> ParseHtmlDocuments(IDocument[] docs) => | |
docs.Aggregate( | |
new HashSet<HzzoMedsDownloadDto>(), | |
(docList, doc) => new HashSet<HzzoMedsDownloadDto>(docList.Concat(ParseHtmlDocument(doc))) | |
); | |
static ISet<HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable<IElement> elems) => | |
elems.Aggregate(new HashSet<HzzoMedsDownloadDto>(), (medsList, li) => | |
{ | |
var href = li.QuerySelector("a").GetAttribute("href"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
namespace MedsProcessor.Common.Models | |
{ | |
public class HzzoMedsDownloadDto | |
{ | |
private readonly string _rootLocation; | |
public HzzoMedsDownloadDto(string href, string validFrom, string rootLocation) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace MedsProcessor.Common | |
{ | |
public static class Constants | |
{ | |
public const string CURRENT_LISTS_URL = "http://www.hzzo.hr/zdravstveni-sustav-rh/trazilica-za-lijekove-s-vazecih-lista/"; | |
public const string ARCHIVE_LISTS_URL = "http://www.hzzo.hr/zdravstveni-sustav-rh/trazilica-za-lijekove-s-vazecih-lista/arhiva-liste-lijekova/"; | |
public const string DOWNLOAD_DIR = ""; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Task<IDocument[]> DownloadHtmlDocuments() => | |
Task.WhenAll( | |
_browsingContext.OpenAsync(CURRENT_LISTS_URL), | |
_browsingContext.OpenAsync(ARCHIVE_LISTS_URL) | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ISet<HzzoMedsDownloadDto>> Run() | |
{ | |
var htmlDocs = await DownloadHtmlDocuments(); | |
var parsedDocs = ParseHtmlDocuments(htmlDocs); | |
return parsedDocs; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Threading.Tasks; | |
using MedsProcessor.Scraper; | |
using Microsoft.AspNetCore.Builder; | |
using Microsoft.AspNetCore.Hosting; | |
using Microsoft.AspNetCore.HttpsPolicy; | |
using Microsoft.AspNetCore.Mvc; | |
using Microsoft.Extensions.Configuration; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using AngleSharp; | |
using Microsoft.Extensions.DependencyInjection; | |
namespace MedsProcessor.Scraper | |
{ | |
public static class ServiceCollectionExtensions | |
{ | |
public static IServiceCollection AddAngleSharp(this IServiceCollection services) => | |
services.AddSingleton(BrowsingContext.New( | |
AngleSharp.Configuration.Default.WithDefaultLoader())); |