Created
October 19, 2017 14:54
-
-
Save Cartman0/eef9e34bb9adea6444268ddfc0013ce5 to your computer and use it in GitHub Desktop.
はてなAPI 一括編集
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_soupXML_soupHTML_from_entryid(hatena_id, blog_id, password, entry_id): | |
''' | |
return soup(XML) of respose, soup(HTML) content in reponse | |
''' | |
member_uri = "https://blog.hatena.ne.jp/{hatena_id}/{blog_id}/atom/entry/{entry_id}".format(hatena_id=hatena_id, blog_id=blog_id,entry_id=entry_id) | |
res_member = requests.get(member_uri, auth=(hatena_id, password)) | |
if not res_member.ok: | |
print("status_code: " + str(res_member.status_code)) | |
return False | |
soup_response_xml = bs4.BeautifulSoup(res_member.content, features="xml") | |
soup_content_html = bs4.BeautifulSoup(soup_response_xml.find("content").string, "lxml") | |
if soup_content_html.html : soup_content_html.html.unwrap() | |
if soup_content_html.body : soup_content_html.body.unwrap() | |
return soup_response_xml, soup_content_html | |
def edit_put_entry(hatena_id, blog_id, password, entry_id, base_xml_soup, | |
updated_content_str): | |
''' | |
- args: | |
- base_xml_soup: soup(XML), response XML | |
- updated_content_str: str, eg. "<h1>title</h1><p>..." | |
- return: | |
- requests.put.response | |
''' | |
if not updated_content_str: | |
raise ValueError("updated_content is null string.") | |
def create_xml(base_xml_soup, | |
title=None, | |
author=None, | |
updated=None, | |
categories=[], | |
draft=None, | |
content=None): | |
# XMLsoupのクローン | |
update_soup_xml = bs4.BeautifulSoup(str(base_xml_soup), features="xml") | |
# id 削除 | |
if update_soup_xml.id: update_soup_xml.id.decompose() | |
# link 削除 | |
for l in update_soup_xml.findAll("link"): | |
l.decompose() | |
# delete published | |
if update_soup_xml.published: update_soup_xml.published.decompose() | |
# delete app:edited | |
edited = update_soup_xml.find("app:edited") | |
if edited: | |
edited.decompose() | |
# delete summary | |
if update_soup_xml.summary: update_soup_xml.summary.decompose() | |
# delete formatted_content | |
formatted = update_soup_xml.find("formatted-content") | |
if formatted:formatted.decompose() | |
# title | |
if title: update_soup_xml.title.string = title | |
# author | |
if author: update_soup_xml.author.string = author | |
# updated | |
if updated: update_soup_xml.updated.string = updated | |
# category | |
for new_c in categories: | |
cate_tag = update_soup_xml.new_tag("category") | |
cate_tag.attrs = {"term": new_c} | |
update_soup_xml.append(cate_tag) | |
# draft: yes, no | |
if draft: soup_response_xml.find("app:draft").string = draft | |
# content書き換え | |
if content: update_soup_xml.content.string = content | |
return update_soup_xml | |
member_uri = "https://blog.hatena.ne.jp/{hatena_id}/{blog_id}/atom/entry/{entry_id}".format( | |
hatena_id=hatena_id, blog_id=blog_id, entry_id=entry_id) | |
xml_str = create_xml(content=updated_content_str) | |
res_put = requests.put( | |
member_uri, auth=(hatena_id, password), data=xml_str.encode("utf-8")) | |
return res_put | |
# entry id に対して書き換え | |
for entry_id in entry_id_list: | |
print("get: " + entry_id) | |
# 記事取得 | |
soup_xml, soup_content_html = get_soupXML_soupHTML_from_entryid( | |
hatena_id, blog_id, password, entry_id=entry_id) | |
print(soup_xml.title) | |
# 記事操作 | |
soup_update_html = bs4.BeautifulSoup(str(soup_content_html), "lxml") # clone | |
# article 削除 | |
if soup_update_html.article: | |
soup_update_html.article.unwrap() | |
# 目次のnavタグをはてな記法へ書き換え | |
nav = soup_update_html.nav | |
if nav: | |
nav.clear() | |
p = soup_update_html.new_tag("p") | |
p.string = "[:contents]" | |
nav.append(p) | |
# 文字列化 | |
if soup_update_html.html: soup_update_html.html.unwrap() | |
if soup_update_html.body: soup_update_html.body.unwrap() | |
content_str = str(soup_update_html) | |
# 続きを読む記法を文字列置換でreplace | |
content_str = re.sub( | |
pattern=r"<p>(====+)</p>|(====+)", | |
repl="<p><!-- more --></p>", | |
string=content_str, | |
count=1) | |
# 更新されたか比較 | |
if str(soup_content_html) == content_str: | |
print("through: " + entry_id) | |
continue | |
# 記事をアップロード | |
res = edit_put_entry( | |
hatena_id, | |
blog_id, | |
password, | |
entry_id=entry_id, | |
base_xml_soup=soup_xml, | |
updated_content_str=content_str) | |
if not res.ok: | |
print("faild PUT: " + entry_id) | |
print(res.text) | |
continue | |
print("PUT: " + str(res.status_code) + " : " + entry_id) | |
print("") | |
time.sleep(0.01) | |
print("completed!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment