Skip to content

Instantly share code, notes, and snippets.

@tilltnet
Created April 17, 2025 19:41
Show Gist options
  • Save tilltnet/21d6f1825258bc161d3deb9e158a638b to your computer and use it in GitHub Desktop.
Save tilltnet/21d6f1825258bc161d3deb9e158a638b to your computer and use it in GitHub Desktop.
Download PDFs from providers.anthem.com
# libs --------
library(jsonlite)
library(tidyverse)
# urls --------
base_url <- "https://providers.anthem.com"
json_url <- "https://providers.anthem.com/sites/Satellite?d=Universal&pagename=getdocuments&brand=BCCNYE&state=&formslibrary=gpp_formslib"
# headers -----
# A session ID is required to successfully send http GET requests to the server. You can copy a session ID from your browser's inspector network tab. The session ID below might be expired at this point.
session_id <- "JSESSIONID=KAtFHIUie_IWbSGcWWKAq1bzO-uUgoYw4t-vvmLOSump4IgzB4-W!-1335552178; ant=!5ni3RCQ5wQG3QxmJ1YNmaYeXMEwzgXWu2FGXs7hpUJ9ljeWS8hj5gqtbtcb3QA4x+tpEtgDWTG02; ak_bmsc=B50BDC7D368E681B2A61B6FD8DA40BA7~000000000000000000000000000000~YAAQRS/KFxpXDwqWAQAAMLAIRRvEiY/T+AXRU171t8DECEms+bHkqnl0HYiQmEmzaFGp0DXjMjSoK8vIaCqC/COJDHre23QuHxUkRCo2QNeYIQIdDwlkirTSJjJB/crmtaSfZshKxEyiIrcSUuBH2PEBqHMT5z4rKF42Fg6PI4e/5aAG17TnFc2FLKLM8plnaHY8zj8bAIZBQahEEz8MjGLP8UKN7d2FRwBxVU5cqwRQR+jpaC0t+R/bQx5zo88RaHrcRkmf4Cx0/vY2U/m4fP4v9pLhR6F+PrtFRpIBUlVpUg64HqnydTMnxdRJbIUBi/ksj5n5z6fuReMvkp796bKArJiONXEkHLWznFe/nvnkxFi0oPOBbC8LtmDHhkExNsiRVz5qhQ7jiQ==; PIM-SESSION-ID=OSPxFiYAHvFnJxNl; AMCV_95CF659E533DE4C90A490D4D%40AdobeOrg=179643557%7CMCIDTS%7C20196%7CMCMID%7C65781991035368012358531562081835437820%7CMCOPTOUT-1744923136s%7CNONE%7CvVersion%7C5.5.0%7CMCAID%7CNONE; mbox=session#6d36991d6f0348fe871e0330d62ac983#1744917797; at_check=true; AMCVS_95CF659E533DE4C90A490D4D%40AdobeOrg=1; bm_sv=CF1AD4475B170A4BD559F368F37D4CDF~YAAQR2vcF3TQrCiWAQAACGkhRRvjyZ0qJ4A9TFg8I95PfuUs4i/uz7Bxpnzhfby4ZWPhjKrpQO5iHI4nF822gLIJf4D/Eie/A3Dyn84WJYxVIBEirp6ntRy6bqV0dkxp6MU4zTk0IZdKyAQtH8/Tg6zmbDcrpu4CI3f5nxdmclaAYUdLny1//3DONU7FCTQEP7w76AKD0UJjeFvI8rVGu2oy03DtBLDxd9oqU+K9iBIO1fTnlTTTZDuzDiG5jPoVFA==~1; bm_mi=EF5E726D65D718114659E7946F879E2A~YAAQEHk1F9Z1sRmWAQAAtzYYRRvz5+Pmjh0cc/pslywyELBmDwl3AkJjZkAVgupuEPvfpPLtKevVx1EzqFakHj+r4hKzYhu+KCK8pxWy/fr3y3IQ/BXE0JnHIfkQSn9UdOsHjF+s1EGqlqpB4gkvK7kGiwX9/C7E2XqRfUE4CijzIrYLjlyc9+jvIlAU6lcmJuJuu2QOD1a0QpqGhAqF5FfCh41QN9Jx5Tzcw8Kog8qDxbOSc4n8sx9ysi7dfPkpWdt5xIilxEcl7kE3+DMpxwgUfqGxlnIE1yGeHhPgASl3FCdjPd3eT3a/7VemYrq3r9eGUn/B~1; TLTSID=674950a0-adb0-ad74-7b97-991c0b047891; _abck=E36DEA01F963449B5CBFF1A5AD018627~-1~YAAQEHk1F8h5sRmWAQAA5j8YRQ25k9AqS7LDoUd0W52489/59v2Z7tVVIpDwSFsio9UDSX6YaV5pM9y/p16ml4zz5jjAfoGJoPCaWWHyXxIyBjKV3GH8vAu6zobtEpsAI2SZuQrCdNrmIFiqSttDkTbfMp9XAak/V2T1S1AjjearUJjI9pRq2lJp7m65U+5hpDYw1OfCcwsHp8zeRM8Ujs53W6gKzy0j492aiwLGCATCG4AnqDYl2uejoadSupogks1KukrjWWiQiEQ5wFn6hpTjxz8Vx8VTLv6VcE04bWi360mvBFDKr6jybtoLjDgLcQykVbcSbbdPcbjOEHLoouD23gNhbKpk+TFWL18+MELI6/PaDg/9rtv+LmQ4YxsYIshJwXo7HsuckM/d9hDGaVjYACO/ksUrfqUJOjF9Aozt/rbhs0KDAA9/IrkaZwhYCZ794TCdRhGEsBzsspPz3iovAWotrG6cOP7dorK4EVVAjJyFJXhMUQHmVdDVYNEDaC3T8wczm42tzfLOpUYYb32+ncQ81iXIAIGum7B8QA5SjZ47hK4MlPmpOj5h1qcjuXlVDHYdYrE39SI=~-1~||0||~-1; bm_sz=24FDFFE0D51223FF22A4512C7A634763~YAAQEHk1F9h1sRmWAQAAtzYYRRteJj2opbwlYHEyCUwxrJ5XMNv9kTDoYW9GhXAbPjfBa1Bt0gnWvuoIi4UOVa0KYIZvs84EkHSUgZFgiQH7ZKqaMcMG/ycHesRQuoEAikyQjC0bfPeC3I9Y6zeuL+d+I5t7i00EqKVvDhYoPd+MmyYczklXdkn+mrn/SHw2SYVDbVt1L9BwRFgQj8yn/j2ICN8/cZnxtrn5xu87fvsx0nzdn9js+6ZZKZalgmLnLTvSS4zyOXsIj+JF77GVmwSuJl+F5i2Eg/OgVJgbN55O300Gwfp3UmFaCai3IEcdX7dknNspTUByMADxUuz4tqWWtQZcDl7jXoDsi+fcRdj2kqBbLkceO6/6L7kg/3CHxxHy14DDN+83onhW6A==~3683124~3485744"
headers <-
list(
Cookie = session_id,
"User-Agent" = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"
)
# download json ----
download.file(json_url, "asd.json", headers = headers)
# extract json and prepare urls ----
docs <- fromJSON("asd.json")[[1]]
docs <-
docs |>
filter(fileType == "pdf") |>
mutate(url = file.path(base_url, URI),
filename = basename(URI),
filename = str_remove(filename, "\\?v=.+"))
# download pdfs -----
map2(docs$url,
docs$filename,
\(x, y) download.file(x, y, headers = headers))
@spsanderson
Copy link

without using the session id in code explicitly

library(httr2)
library(tidyverse)

jsonurl <- "https://providers.anthem.com/sites/Satellite?d=Universal&pagename=getdocuments&brand=BCCNYE&state=&formslibrary=gpp_formslib"

# Step 1: Perform the GET request and parse the JSON response
response <- request(jsonurl) |> 
  req_perform() |> 
  resp_body_json()

# Step 2: Extract the relevant data and process it into a tibble
alldocs2 <- tibble(URI = map_chr(response[[1]], "URI")) |> 
  mutate(
    filename = str_extract(URI, "(?<=/)[^/]+(?=\\?)"),
    is_pdf = str_detect(filename, "\\.pdf$")
  ) |> 
  filter(is_pdf)

# Step 3: Download the first PDF file
walk2(
  paste0("https://providers.anthem.com", alldocs2$URI[1]), 
  alldocs2$filename[1], 
  ~ request(.x) |> req_perform(path = .y)
)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment