Last active
August 11, 2022 09:58
-
-
Save Xetera/d50af9c42615d66d55755b3708c2a70e to your computer and use it in GitHub Desktop.
Scraping post data from weverse's new naver api using an hmac
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from hashlib import sha1 | |
import hmac | |
import time | |
import base64 | |
import requests | |
import urllib.parse | |
js_link_regex = re.compile("src=\"(.+\/main.*\.js)\"") | |
secret_key_regex = re.compile("return\s?\"([a-fA-F0-9]+)\"") | |
def get_secret(): | |
result = requests.get("https://weverse.io") | |
matches = js_link_regex.findall(result.text) | |
if not matches: | |
raise Exception("No matching main bundle links found") | |
[js_link] = matches | |
js_result = requests.get(js_link) | |
js_matches = secret_key_regex.findall(js_result.text) | |
if not js_matches: | |
raise Exception("Could not find any secret keys matching the pattern") | |
# Usually the first hex value that looks like a secret key is the actual key | |
return next(match for match in js_matches if len(match) > 10) | |
app_id = "be4d79eb8fc7bd008ee82c8ec4ff6fd4" | |
# This value changes periodically so it needs to be dynamically retrieved from the js bundle | |
active = get_secret() | |
url = f"/post/v1.0/community-14/artistTabPosts?fieldSet=postsV1&limit=20&pagingType=CURSOR&appId={app_id}&language=en&platform=WEB&wpf=pc" | |
url_hash = url[:255] | |
print(url_hash) | |
pad = str(math.floor(time.time() * 1000) - 10) | |
hashed_url = url_hash + pad | |
byte_key = bytes(active, "UTF-8") | |
result = base64.standard_b64encode(hmac.new(byte_key, hashed_url.encode(), sha1).digest()).decode() | |
wmd = urllib.parse.quote(result) | |
final_url = f"https://apis.naver.com/weverse/wevweb{url}&wmsgpad={pad}&wmd={wmd}" | |
print(final_url) | |
resp = requests.get(final_url, | |
headers = { | |
# optional user agent | |
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36", | |
"Referer": "https://www.weverse.io/", | |
# replace with your auth token. Refer to https://gist.github.com/Xetera/aa59e84f3959a37c16a3309b5d9ab5a0 if you don't know how to get it | |
"Authorization": "Bearer ..." | |
}) | |
print(resp.json()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment