For example alpine, it's only about 5MB.
RUN apk add --no-cache tzdata
ENV TZ America/New_York| #!/usr/bin/awk -f | |
| # This program is a copy of guff, a plot device. https://github.com/silentbicycle/guff | |
| # My copy here is written in awk instead of C, has no compelling benefit. | |
| # Public domain. @thingskatedid | |
| # Run as awk -v x=xyz ... or env variables for stuff? | |
| # Assumptions: the data is evenly spaced along the x-axis | |
| # TODO: moving average |
| #!/bin/bash | |
| gdb -p "$1" -batch -ex 'set {short}$rip = 0x050f' -ex 'set $rax=231' -ex 'set $rdi=0' -ex 'cont' |
| year | ct | amt | kind | |
|---|---|---|---|---|
| 2011 | 0 | 0 | Filing Date | |
| 2011 | 2 | 10 | Transaction Date | |
| 2012 | 0 | 0 | Filing Date | |
| 2012 | 8 | 0.4 | Transaction Date | |
| 2013 | 9 | 0.4 | Filing Date | |
| 2013 | 24 | 35 | Transaction Date | |
| 2014 | 19 | 2.5 | Filing Date | |
| 2014 | 38 | 11 | Transaction Date | |
| 2015 | 60 | 54 | Filing Date |
| // XPath CheatSheet | |
| // To test XPath in your Chrome Debugger: $x('/html/body') | |
| // http://www.jittuu.com/2012/2/14/Testing-XPath-In-Chrome/ | |
| // 0. XPath Examples. | |
| // More: http://xpath.alephzarro.com/content/cheatsheet.html | |
| '//hr[@class="edge" and position()=1]' // every first hr of 'edge' class |
| get_page <- function(page_num = 1) { | |
| # this is to be kind to the web site | |
| # it does not have a robots.txt so this should be the default wait | |
| # time between requests since the desires of the scraper are not | |
| # greater than that of the site owner and you'd be abusing | |
| # their resources if you did not put a delay in between requests | |
| Sys.sleep(5) | |
| ; | |
| WITH CTE AS | |
| ( | |
| SELECT | |
| *, | |
| ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code | |
| ORDER BY | |
| [doc_date]) - ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code, mark_1 |
| library(rvest) | |
| library(dplyr) | |
| pg <- read_html("https://bidplus.gem.gov.in/bidresultlists") | |
| blocks <- html_nodes(pg, ".block") | |
| items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]") | |
| items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE) |
Since cognitive processes are challenging for Python programmers, this is pretty much what the (not great idiom in) the appropriately-acronym'd BS 4 text => get_text() does (https://github.com/wention/BeautifulSoup4/blob/03a2b3a9d1fc5877212d9d382a512663f24c887d/bs4/element.py#L846-L854). There are FAR BETTER WAYS to get text than this, but I would not expect Python things to grok that.
| library(httr) | |
| library(rvest) | |
| library(readxl) | |
| library(tidyverse) | |
| doe <- read_html("https://www.oe.netl.doe.gov/OE417_annual_summary.aspx") | |
| dir.create("~/Data/doe-cache-dir", showWarnings = FALSE) | |
| html_nodes(doe, xpath=".//a[contains(., 'XLS')]") %>% |