Created
April 17, 2025 19:41
-
-
Save tilltnet/21d6f1825258bc161d3deb9e158a638b to your computer and use it in GitHub Desktop.
Download PDFs from providers.anthem.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # libs -------- | |
| library(jsonlite) | |
| library(tidyverse) | |
| # urls -------- | |
| base_url <- "https://providers.anthem.com" | |
| json_url <- "https://providers.anthem.com/sites/Satellite?d=Universal&pagename=getdocuments&brand=BCCNYE&state=&formslibrary=gpp_formslib" | |
| # headers ----- | |
| # A session ID is required to successfully send http GET requests to the server. You can copy a session ID from your browser's inspector network tab. The session ID below might be expired at this point. | |
| session_id <- "JSESSIONID=KAtFHIUie_IWbSGcWWKAq1bzO-uUgoYw4t-vvmLOSump4IgzB4-W!-1335552178; ant=!5ni3RCQ5wQG3QxmJ1YNmaYeXMEwzgXWu2FGXs7hpUJ9ljeWS8hj5gqtbtcb3QA4x+tpEtgDWTG02; ak_bmsc=B50BDC7D368E681B2A61B6FD8DA40BA7~000000000000000000000000000000~YAAQRS/KFxpXDwqWAQAAMLAIRRvEiY/T+AXRU171t8DECEms+bHkqnl0HYiQmEmzaFGp0DXjMjSoK8vIaCqC/COJDHre23QuHxUkRCo2QNeYIQIdDwlkirTSJjJB/crmtaSfZshKxEyiIrcSUuBH2PEBqHMT5z4rKF42Fg6PI4e/5aAG17TnFc2FLKLM8plnaHY8zj8bAIZBQahEEz8MjGLP8UKN7d2FRwBxVU5cqwRQR+jpaC0t+R/bQx5zo88RaHrcRkmf4Cx0/vY2U/m4fP4v9pLhR6F+PrtFRpIBUlVpUg64HqnydTMnxdRJbIUBi/ksj5n5z6fuReMvkp796bKArJiONXEkHLWznFe/nvnkxFi0oPOBbC8LtmDHhkExNsiRVz5qhQ7jiQ==; PIM-SESSION-ID=OSPxFiYAHvFnJxNl; AMCV_95CF659E533DE4C90A490D4D%40AdobeOrg=179643557%7CMCIDTS%7C20196%7CMCMID%7C65781991035368012358531562081835437820%7CMCOPTOUT-1744923136s%7CNONE%7CvVersion%7C5.5.0%7CMCAID%7CNONE; mbox=session#6d36991d6f0348fe871e0330d62ac983#1744917797; at_check=true; AMCVS_95CF659E533DE4C90A490D4D%40AdobeOrg=1; bm_sv=CF1AD4475B170A4BD559F368F37D4CDF~YAAQR2vcF3TQrCiWAQAACGkhRRvjyZ0qJ4A9TFg8I95PfuUs4i/uz7Bxpnzhfby4ZWPhjKrpQO5iHI4nF822gLIJf4D/Eie/A3Dyn84WJYxVIBEirp6ntRy6bqV0dkxp6MU4zTk0IZdKyAQtH8/Tg6zmbDcrpu4CI3f5nxdmclaAYUdLny1//3DONU7FCTQEP7w76AKD0UJjeFvI8rVGu2oy03DtBLDxd9oqU+K9iBIO1fTnlTTTZDuzDiG5jPoVFA==~1; bm_mi=EF5E726D65D718114659E7946F879E2A~YAAQEHk1F9Z1sRmWAQAAtzYYRRvz5+Pmjh0cc/pslywyELBmDwl3AkJjZkAVgupuEPvfpPLtKevVx1EzqFakHj+r4hKzYhu+KCK8pxWy/fr3y3IQ/BXE0JnHIfkQSn9UdOsHjF+s1EGqlqpB4gkvK7kGiwX9/C7E2XqRfUE4CijzIrYLjlyc9+jvIlAU6lcmJuJuu2QOD1a0QpqGhAqF5FfCh41QN9Jx5Tzcw8Kog8qDxbOSc4n8sx9ysi7dfPkpWdt5xIilxEcl7kE3+DMpxwgUfqGxlnIE1yGeHhPgASl3FCdjPd3eT3a/7VemYrq3r9eGUn/B~1; TLTSID=674950a0-adb0-ad74-7b97-991c0b047891; _abck=E36DEA01F963449B5CBFF1A5AD018627~-1~YAAQEHk1F8h5sRmWAQAA5j8YRQ25k9AqS7LDoUd0W52489/59v2Z7tVVIpDwSFsio9UDSX6YaV5pM9y/p16ml4zz5jjAfoGJoPCaWWHyXxIyBjKV3GH8vAu6zobtEpsAI2SZuQrCdNrmIFiqSttDkTbfMp9XAak/V2T1S1AjjearUJjI9pRq2lJp7m65U+5hpDYw1OfCcwsHp8zeRM8Ujs53W6gKzy0j492aiwLGCATCG4AnqDYl2uejoadSupogks1KukrjWWiQiEQ5wFn6hpTjxz8Vx8VTLv6VcE04bWi360mvBFDKr6jybtoLjDgLcQykVbcSbbdPcbjOEHLoouD23gNhbKpk+TFWL18+MELI6/PaDg/9rtv+LmQ4YxsYIshJwXo7HsuckM/d9hDGaVjYACO/ksUrfqUJOjF9Aozt/rbhs0KDAA9/IrkaZwhYCZ794TCdRhGEsBzsspPz3iovAWotrG6cOP7dorK4EVVAjJyFJXhMUQHmVdDVYNEDaC3T8wczm42tzfLOpUYYb32+ncQ81iXIAIGum7B8QA5SjZ47hK4MlPmpOj5h1qcjuXlVDHYdYrE39SI=~-1~||0||~-1; bm_sz=24FDFFE0D51223FF22A4512C7A634763~YAAQEHk1F9h1sRmWAQAAtzYYRRteJj2opbwlYHEyCUwxrJ5XMNv9kTDoYW9GhXAbPjfBa1Bt0gnWvuoIi4UOVa0KYIZvs84EkHSUgZFgiQH7ZKqaMcMG/ycHesRQuoEAikyQjC0bfPeC3I9Y6zeuL+d+I5t7i00EqKVvDhYoPd+MmyYczklXdkn+mrn/SHw2SYVDbVt1L9BwRFgQj8yn/j2ICN8/cZnxtrn5xu87fvsx0nzdn9js+6ZZKZalgmLnLTvSS4zyOXsIj+JF77GVmwSuJl+F5i2Eg/OgVJgbN55O300Gwfp3UmFaCai3IEcdX7dknNspTUByMADxUuz4tqWWtQZcDl7jXoDsi+fcRdj2kqBbLkceO6/6L7kg/3CHxxHy14DDN+83onhW6A==~3683124~3485744" | |
| headers <- | |
| list( | |
| Cookie = session_id, | |
| "User-Agent" = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0" | |
| ) | |
| # download json ---- | |
| download.file(json_url, "asd.json", headers = headers) | |
| # extract json and prepare urls ---- | |
| docs <- fromJSON("asd.json")[[1]] | |
| docs <- | |
| docs |> | |
| filter(fileType == "pdf") |> | |
| mutate(url = file.path(base_url, URI), | |
| filename = basename(URI), | |
| filename = str_remove(filename, "\\?v=.+")) | |
| # download pdfs ----- | |
| map2(docs$url, | |
| docs$filename, | |
| \(x, y) download.file(x, y, headers = headers)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
without using the session id in code explicitly