Created
July 2, 2018 11:23
-
-
Save jerinisready/e67b7cd907167cc784242682e3212a40 to your computer and use it in GitHub Desktop.
If you want to download and save an image from an external url, this snippet might help you!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from django.core.files import File | |
from django.core.files.uploadedfile import InMemoryUploadedFile | |
from django.core.files.temp import NamedTemporaryFile | |
from newspaper import Article | |
import requests | |
from myapp.models import Blog, Category, Tags | |
__author__ = 'jerinisready' | |
__doc__ = """If you want to download and save an image from an external url, this snippet might help you!""" | |
def save_image_from_url(obj, url): | |
r = requests.get(url) | |
name = url.split("/")[-1] | |
img_temp = NamedTemporaryFile(delete=True) | |
img_temp.write(r.content) | |
img_temp.flush() | |
obj.picture.save(name, File(img_temp), save=False) | |
def import_blog_from_url(url, user): | |
""" import_blog_from_url(Url:string, User:<DjangoUserModel>) """ | |
:return <Blog> | |
""" | |
article.nlp(), | |
some_of_the_useful_apis_of_package_newspaper = [ | |
article.url, | |
article.clean_top_node, | |
article.article_html.replace('\n', ''), | |
article.top_image, | |
article.authors[0], | |
article.title.replace('\n', ''), | |
article.summary.replace('\n', ''), | |
article.keywords | |
] | |
""" | |
article = Article(url, keep_article_html=True) | |
article.download() | |
article.parse() | |
b = Blog() | |
b.title = article.title.replace('\n', '') | |
article.nlp() | |
b.meta_desc = article.summary.replace('\n', '')[:250] | |
b.content = article.article_html.replace('\n', '') | |
b.publish_date = None | |
b.image_credit_url = b.image_credit = article.top_image | |
save_image_from_url(b, article.top_image) # article.top_image: Image URL | |
b.video = None | |
b.scrapped_author = article.authors[0] | |
b.own_blog = False | |
b.is_draft = False | |
b.is_edited = False | |
b.external_url = article.url | |
b.user = user | |
b.save() | |
b.tags.add(*Tag.objects.all()[:2]) # ", ".join(article.meta_keywords) # article.publish_date | |
b.categories.add(*Category.objects.all()[:2]) | |
return b |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment