Skip to content

Instantly share code, notes, and snippets.

@jerinisready
Created July 2, 2018 11:23
Show Gist options
  • Save jerinisready/e67b7cd907167cc784242682e3212a40 to your computer and use it in GitHub Desktop.
Save jerinisready/e67b7cd907167cc784242682e3212a40 to your computer and use it in GitHub Desktop.
If you want to download and save an image from an external url, this snippet might help you!
from django.core.files import File
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.files.temp import NamedTemporaryFile
from newspaper import Article
import requests
from myapp.models import Blog, Category, Tags
__author__ = 'jerinisready'
__doc__ = """If you want to download and save an image from an external url, this snippet might help you!"""
def save_image_from_url(obj, url):
r = requests.get(url)
name = url.split("/")[-1]
img_temp = NamedTemporaryFile(delete=True)
img_temp.write(r.content)
img_temp.flush()
obj.picture.save(name, File(img_temp), save=False)
def import_blog_from_url(url, user):
""" import_blog_from_url(Url:string, User:<DjangoUserModel>) """
:return <Blog>
"""
article.nlp(),
some_of_the_useful_apis_of_package_newspaper = [
article.url,
article.clean_top_node,
article.article_html.replace('\n', ''),
article.top_image,
article.authors[0],
article.title.replace('\n', ''),
article.summary.replace('\n', ''),
article.keywords
]
"""
article = Article(url, keep_article_html=True)
article.download()
article.parse()
b = Blog()
b.title = article.title.replace('\n', '')
article.nlp()
b.meta_desc = article.summary.replace('\n', '')[:250]
b.content = article.article_html.replace('\n', '')
b.publish_date = None
b.image_credit_url = b.image_credit = article.top_image
save_image_from_url(b, article.top_image) # article.top_image: Image URL
b.video = None
b.scrapped_author = article.authors[0]
b.own_blog = False
b.is_draft = False
b.is_edited = False
b.external_url = article.url
b.user = user
b.save()
b.tags.add(*Tag.objects.all()[:2]) # ", ".join(article.meta_keywords) # article.publish_date
b.categories.add(*Category.objects.all()[:2])
return b
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment