Skip to content

Instantly share code, notes, and snippets.

@AlphaGit
Created July 29, 2018 18:18
Show Gist options
  • Save AlphaGit/9f909745d7c573146e6102ad65cc8149 to your computer and use it in GitHub Desktop.
Save AlphaGit/9f909745d7c573146e6102ad65cc8149 to your computer and use it in GitHub Desktop.
Loading tumblr posts
# inspired in https://github.com/veggiedefender/miraculousladybot/blob/master/log.py
import requests
import re
import tumblr_client
from html2text import html2text
from datetime import datetime
from settings import blogs_to_search, posts_per_blog, tags_to_search, posts_per_search
def append_to_file(filename, contents):
with open(filename, 'a', encoding='utf8') as file:
file.writelines(contents + '\n')
def clean_up(text):
# well see this one later
return text
def get_posts(search_function, search_parameter, post_search_limit):
total = 0
earliest = int(datetime.now().timestamp())
iterations_without_posts = 0
while total <= post_search_limit and iterations_without_posts < 5:
posts = search_function(search_parameter, earliest)
if len(posts) > 0:
earliest = min([ post["timestamp"] for post in posts ])
else:
break
posts = [ post for post in posts if post["type"] == "text" ]
if len(posts) == 0:
iterations_without_posts += 1
for post in posts:
body = html2text(post["body"])
text = clean_up(body)
if len(text) == 0:
continue
append_to_file('text_source.txt', text)
total += 1
for blog in blogs_to_search:
get_posts(tumblr_client.get_posts_from_blog, blog, posts_per_blog)
for tag in tags_to_search:
get_posts(tumblr_client.get_posts_from_search, tag, posts_per_search)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment