Skip to content

Instantly share code, notes, and snippets.

@Legends
Last active June 22, 2020 11:49
Show Gist options
  • Save Legends/cd561e7f0a9667c9e4600f925d327df7 to your computer and use it in GitHub Desktop.
Save Legends/cd561e7f0a9667c9e4600f925d327df7 to your computer and use it in GitHub Desktop.
Puppeteer Sharp - Scraping using C# and puppeteer
private async Task<DtoEst> LoadXXXAsync()
{
// downloads chromium to the local project, needed by Puppeteer Sharp for execution, takes about 2 min!
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
DtoEst dto = new DtoEst();
var url = "https://www.xxxxxx.xhtml";
#region SELECTORS
// https://stackoverflow.com/questions/45110893/select-elements-by-attributes-with-colon
// use css escapes: https://mothereff.in/css-escapes
string id_checkbox_IsMarried = "bmf_form_ekst:ekst_pv:0";
string id_input_Umsatz = "bmf_form_ekst:ekst_zve";
string id_button_Submit2 = @"#bmf_form_ekst\3A income_ekst";
#endregion
//// This statement downloads and installs chromium
//await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true
});
using (var page = await browser.NewPageAsync())
{
// load page
await page.GoToAsync(url);
// wait for element to be there
await page.WaitForExpressionAsync($"document.getElementById('{id_input_Umsatz}')!=null");
await page.EvaluateExpressionAsync($"document.getElementById('{id_input_Umsatz}').value = '{NettoUmsatz}';");
// get element value
var val = await page.EvaluateFunctionAsync<string>($"()=>document.getElementById('{id_input_Umsatz}').value");
// wait for element to be there
await page.WaitForExpressionAsync($"document.getElementById('{id_checkbox_IsMarried}')!=null");
await page.EvaluateExpressionAsync($"document.getElementById('{id_checkbox_IsMarried}').checked = true;");
await page.ClickAsync(id_button_Submit2);
// wait for redirect
await page.WaitForNavigationAsync();
// get value from element
var strValueEst = await page.EvaluateFunctionAsync<string>("()=>document.querySelector('#ui-id-4 > div.ekst_ergebnis > div:nth-child(6) > table > tbody > tr:nth-child(3) > td:nth-child(2) > strong').textContent");
// get value from element
var strValueEstPercent = await page.EvaluateFunctionAsync<string>("()=>document.querySelector('#ui-id-4 > div.ekst_ergebnis > div:nth-child(6) > table > tbody > tr:nth-child(3) > td:nth-child(3) > strong').textContent");
var deDE = new CultureInfo("de-DE");
Decimal.TryParse(strValueEst.TrimEnd(" Euro".ToCharArray()), NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint, deDE, out var decValueEst);
Decimal.TryParse(strValueEstPercent.TrimEnd(" %".ToCharArray()), NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint, deDE, out var decValueEstPercent);
dto.EstBetrag = decValueEst;
dto.EstProzentsatz = decValueEstPercent;
}
return await Task.FromResult(dto);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment