Created
February 18, 2019 00:28
-
-
Save guitarmanvt/a59d87e10098c4d1d40115d914582a77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 690db7d59c5dca1f814263fde11563ebe36e9b4b | |
Author: John Anderson <[email protected]> | |
Date: Sun Feb 17 19:16:42 2019 -0500 | |
Scrape HTML tables to JSON | |
diff --git a/hello/views.py b/hello/views.py | |
index c248072..9616840 100644 | |
--- a/hello/views.py | |
+++ b/hello/views.py | |
@@ -1,13 +1,45 @@ | |
from django.shortcuts import render | |
-from django.http import HttpResponse | |
+from django.http import JsonResponse, HttpResponse | |
+import json | |
+ | |
+from bs4 import BeautifulSoup | |
+import requests | |
from .models import Greeting | |
# Create your views here. | |
def index(request): | |
- # return HttpResponse('Hello from Python!') | |
- return render(request, "index.html") | |
+ target = request.GET.get('target') | |
+ if target: | |
+ try: | |
+ r = requests.get(target) | |
+ data = { | |
+ 'url': target, | |
+ } | |
+ | |
+ tables = [] | |
+ soup = BeautifulSoup(r.text) | |
+ for htable in soup.find_all('table'): | |
+ jtable = [] | |
+ for hrow in htable.find_all('tr'): | |
+ jrow = [] | |
+ for hdata in hrow.find_all('td'): | |
+ jrow.append(hdata.text) | |
+ jtable.append(jrow) | |
+ tables.append(jtable) | |
+ | |
+ data['tables'] = tables | |
+ response = JsonResponse(data) | |
+ return response | |
+ except: | |
+ raise | |
+ # just fallback; not wonderful, but fast to code ;) | |
+ #pass | |
+ # Fallback, if anything went wrong: Use the sample code from Heroku docs. | |
+ r = requests.get('http://httpbin.org/status/418') | |
+ print(r.text) | |
+ return HttpResponse('<pre>' + r.text + '</pre>') | |
def db(request): | |
diff --git a/requirements.txt b/requirements.txt | |
index 4af6a37..02d04a0 100644 | |
--- a/requirements.txt | |
+++ b/requirements.txt | |
@@ -1,3 +1,5 @@ | |
django | |
gunicorn | |
-django-heroku | |
\ No newline at end of file | |
+django-heroku | |
+requests | |
+BeautifulSoup4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment