Created
May 19, 2017 04:37
-
-
Save Muratam/3f9a105b65cf269897533a6366a7cca0 to your computer and use it in GitHub Desktop.
デジタル教材配信のDL例.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
import shutil | |
from bs4 import BeautifulSoup | |
your_id = "xx_xxxxxx" | |
your_pass = "xxxxxxxx" | |
s = requests.session() | |
login = s.get("https://bookroll.let.media.kyoto-u.ac.jp/bookroll/login") | |
soup = BeautifulSoup(login.text, "html5lib") | |
csrf = soup.select_one("input[name=_csrf]").get("value") | |
login = s.post("https://bookroll.let.media.kyoto-u.ac.jp/bookroll/login", data={ | |
"userid": your_id, | |
"password": your_pass, | |
"_csrf": csrf | |
}) | |
print(login.text) | |
def dl_contents(url, dirname, limit): | |
if not os.path.exists(dirname): | |
os.mkdir(dirname) | |
for i in range(1, limit + 1): | |
stream = s.get(url % i, stream=True) | |
with open(dirname + ("/img%s.jpg" % i), "wb") as f: | |
shutil.copyfileobj(stream.raw, f) | |
dl_contents("https://bookroll.let.media.kyoto-u.ac.jp/bookroll/contents/unzipped/e89f38039ee41ccb137879a1d1f237f5f3679ab0dea961e78d77d48becfc7766/OPS/images/out_%s.jpg", "t1", 147) | |
print("dl") |
Panda 経由でしかアクセスできなくなったので対応してくれー(他力本願)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
imagemagickとcoreutils のgls でpdfにしよ^^^^^^^^^
convert `gls -v` imgs.pdf