Created
April 27, 2020 17:40
-
-
Save branw/0da4635245453809f3a937f7187806dc to your computer and use it in GitHub Desktop.
Web app interface for scraping Chegg textbook solutions (Sept. 2018)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests.auth import HTTPBasicAuth | |
from pprint import pprint | |
import secrets | |
import uuid | |
from urllib.parse import urlparse | |
from flask import Flask, jsonify, render_template, request, abort, redirect | |
from collections import deque | |
s = requests.Session() | |
s.auth = HTTPBasicAuth('hlDpZAPF05mqjAmg7cqtIKLOhUryB8p1', 'uBjzakmxGx6WtqAr') | |
s.headers.update({ | |
'X-CHEGG-DEVICEID': secrets.token_hex(8), | |
'X-CHEGG-SESSIONID': str(uuid.uuid4()), | |
#'X-CHEGG-XYZPASS': '1', | |
'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 9; Pixel XL Build/PPR1.180610.009)' | |
}) | |
tbs_recent_books = deque(maxlen=10) | |
tbs_books = {} | |
tbs_chapters = {} | |
tbs_problems = {} | |
def get_tbs_books(query): | |
r = s.get('https://hub.chegg.com/v1/book', params={ | |
'q': query, 'f.hasSolutions': true | |
}) | |
r.raise_for_status() | |
return r.json() | |
def get_tbs_book(book_id): | |
if book_id in tbs_books: | |
return tbs_books[book_id] | |
output = [] | |
r = s.get(f'https://hub.chegg.com/v1/book/{book_id}') | |
if r.status_code == 404: | |
return None | |
r.raise_for_status() | |
j = r.json() | |
result = j['result'] | |
output = { | |
'id': book_id, | |
'name': result['title'], | |
'full_name': result['fullTitle'], | |
'edition': result['edition'], | |
'image': result['imgLarge'] if 'imgLarge' in result else result['imgThumb'], | |
'has_solutions': result['hasSolutions'] | |
} | |
if book_id not in tbs_recent_books: | |
tbs_recent_books.append(book_id) | |
tbs_books[book_id] = output | |
return output | |
def get_tbs_chapters(book_id, offset=0, all=True): | |
if book_id in tbs_chapters: | |
return tbs_chapters[book_id] | |
output = [] | |
r = s.get(f'https://hub.chegg.com/v1/book/{book_id}/chapters', params={ | |
'offset': offset | |
}) | |
r.raise_for_status() | |
j = r.json() | |
output.extend(j['result']) | |
while all and 'nextPage' in j: | |
r = s.get(j['nextPage']) | |
r.raise_for_status() | |
j = r.json() | |
output.extend(j['result']) | |
tbs_chapters[book_id] = output | |
return output | |
def get_tbs_problems(chapter_id, offset=0, all=True): | |
if chapter_id in tbs_problems: | |
return tbs_problems[chapter_id] | |
output = [] | |
r = s.get(f'https://hub.chegg.com/v1/chapter/{chapter_id}/problems', params={ | |
'offset': offset | |
}) | |
r.raise_for_status() | |
j = r.json() | |
output.extend(j['result']) | |
while all and 'nextPage' in j: | |
r = s.get(j['nextPage']) | |
r.raise_for_status() | |
j = r.json() | |
output.extend(j['result']) | |
tbs_problems[chapter_id] = output | |
return output | |
def get_tbs_problem_text(problem_id): | |
r = s.get(f'https://hub.chegg.com/content/tbs-problem/{problem_id}.html') | |
if r.status_code == 404: | |
return None | |
r.raise_for_status() | |
return r.text | |
def get_tbs_solutions(problem_id): | |
r = s.get(f'https://hub.chegg.com/v1/problem/{problem_id}/solutions') | |
r.raise_for_status() | |
j = r.json() | |
return j['result'] | |
def load_solutions(problem_id): | |
solutions = get_tbs_solutions(problem_id) | |
output = [] | |
for solution in solutions: | |
solution_output = [] | |
steps = solutions[0]['steps'] | |
for i, step in enumerate(steps): | |
r = s.get(step['link']) | |
solution_output.append({ | |
'i': i + 1, | |
'text': r.text | |
}) | |
output.append({ | |
'num_steps': len(steps), | |
'steps': solution_output | |
}) | |
return output | |
def load_problems(chapter_id): | |
problems = get_tbs_problems(chapter_id) | |
output = [] | |
for problem in problems: | |
output.append({ | |
'name': problem['name'], | |
'id': problem['id'] | |
}) | |
return output | |
def load_chapters(book_id): | |
chapters = get_tbs_chapters(book_id) | |
output = [] | |
for chapter in chapters: | |
output.append({ | |
'name': chapter['name'], | |
'id': chapter['id'] | |
}) | |
return output | |
app = Flask(__name__) | |
@app.route('/book/<int:book_id>/chapters') | |
def get_chapters(book_id): | |
return jsonify(get_tbs_chapters(book_id)) | |
@app.route('/chapter/<int:chapter_id>/problems') | |
def get_problems(chapter_id): | |
return jsonify(get_tbs_problems(chapter_id)) | |
@app.route('/problem/<int:problem_id>') | |
def get_problem(problem_id): | |
r = s.get(f'https://hub.chegg.com/content/tbs-problem/{problem_id}.html') | |
return r.text if r.status_code is 200 else '' | |
@app.route('/problem/<int:problem_id>/solutions') | |
def get_solutions(problem_id): | |
return jsonify(load_solutions(problem_id)) | |
@app.route('/') | |
def request_index(): | |
recent_books = [tbs_books[book_id] for book_id in tbs_recent_books] | |
return render_template('chegg.html', recent_books=recent_books) | |
@app.route('/query', methods=['POST']) | |
def request_query(): | |
query = request.form['query'] | |
path = urlparse(query).path | |
if path.startswith('/homework-help/questions-and-answers/'): | |
question_id = path.split('q')[-1] | |
return redirect(f'/qna/{question_id}', 302) | |
elif path.startswith('/homework-help/'): | |
book_id = path.split('-')[-1] | |
return redirect(f'/tbs/{book_id}', 302) | |
abort(501) | |
@app.route('/tbs/<int:book_id>') | |
def request_tbs_book(book_id): | |
current = { | |
'book': { | |
'id': book_id | |
} | |
} | |
return render_template('chegg.html', current=current, book=get_tbs_book(book_id), | |
chapters=load_chapters(book_id)) | |
@app.route('/tbs/<int:book_id>/<int:chapter_id>') | |
def request_tbs_chapter(book_id, chapter_id): | |
chapters = get_tbs_chapters(book_id) | |
chapter_info = next((item for item in chapters if item['id'] == str(chapter_id))) | |
current = { | |
'chapter': { | |
'name': chapter_info['name'], | |
'id': chapter_id | |
}, | |
'book': { | |
'id': book_id | |
} | |
} | |
return render_template('chegg.html', current=current, book=get_tbs_book(book_id), | |
chapters=load_chapters(book_id), problems=load_problems(chapter_id)) | |
@app.route('/tbs/<int:book_id>/<int:chapter_id>/<int:problem_id>') | |
def request_tbs_problem(book_id, chapter_id, problem_id): | |
problems = get_tbs_problems(chapter_id) | |
problem_info = next((item for item in problems if item['id'] == str(problem_id))) | |
problem_text = get_tbs_problem_text(problem_id) | |
chapters = get_tbs_chapters(book_id) | |
chapter_info = next((item for item in chapters if item['id'] == str(chapter_id))) | |
solutions = load_solutions(problem_id) | |
current = { | |
'problem': { | |
'name': problem_info['name'], | |
'id': problem_id | |
}, | |
'chapter': { | |
'name': chapter_info['name'], | |
'id': chapter_id | |
}, | |
'book': { | |
'id': book_id | |
} | |
} | |
return render_template('chegg.html', current=current, book=get_tbs_book(book_id), | |
chapters=load_chapters(book_id), problems=load_problems(chapter_id), | |
problem_text=problem_text, solutions=solutions) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> | |
<title>Chegg</title> | |
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous"> | |
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy" crossorigin="anonymous"></script> | |
</head> | |
<body class="bg-light"> | |
<div class="container"> | |
<div class="py-5 text-center"> | |
<h6><a href="/">β£<pre> | |
ππβββββββββββπ | |
πππββπ«ββπππ | |
πππββπββππ»π | |
πβ£ππππππππ | |
Doingβmyβhomework! | |
βοΈπβοΈπβοΈπβοΈπ | |
</pre></a></h6> | |
</div> | |
<div class="row"> | |
<div class="col-md-4"> | |
<form class="card p-2 list-group mb-3" action="/query" method="post"> | |
<div class="input-group"> | |
<input class="form-control" name="query" placeholder="Chegg URL" type="text"> | |
<div class="input-group-append"> | |
<button type="submit" class="btn btn-secondary">Load</button> | |
</div> | |
</div> | |
</form> | |
{% if book %} | |
<div class="card mb-3"> | |
<div class="row no-gutters"> | |
<div class="col-4"> | |
<img src="{{ book.image }}" class="img-fluid" alt=""> | |
</div> | |
<div class="col"> | |
<div class="card-body px-2"> | |
<h6 class="card-title">{{ book.full_name }}</h6> | |
</div> | |
</div> | |
</div> | |
</div> | |
{% endif %} | |
{% if chapters %} | |
<ul class="list-group mb-3"> | |
<li class="list-group-item justify-content-between lh-condensed"> | |
<h5>Chapters</h5> | |
<p> | |
{% for chapter in chapters %} | |
<a href="/tbs/{{ current.book.id }}/{{ chapter.id }}" class="badge {% if current and current.chapter and current.chapter.id|string == chapter.id|string %}badge-dark{% else %}badge-light{% endif %}">{{ chapter.name }}</a> | |
{% endfor %} | |
</p> | |
</li> | |
</ul> | |
{% endif %} | |
{% if problems %} | |
<ul class="list-group mb-3"> | |
<li class="list-group-item justify-content-between lh-condensed"> | |
<h5>Problems</h5> | |
<p> | |
{% for problem in problems %} | |
<a href="/tbs/{{ current.book.id }}/{{ current.chapter.id }}/{{ problem.id }}" class="badge {% if current and current.problem and current.problem.id|string == problem.id|string %}badge-dark{% else %}badge-light{% endif %}">{{ problem.name }}</a> | |
{% endfor %} | |
</p> | |
</li> | |
</ul> | |
{% endif %} | |
</div> | |
<div class="col-md-8"> | |
{% if not current %} | |
<div class="card"> | |
<div class="card-body"> | |
<h2>Recent Books</h2> | |
<div class="card-columns"> | |
{% for book in recent_books %} | |
<div class="card"> | |
<a href="/tbs/{{ book.id }}"><img class="card-img-top" src="{{ book.image }}" title="{{ book.full_name }}"></a> | |
</div> | |
{% endfor %} | |
</div> | |
</div> | |
</div> | |
{% endif %} | |
{% if current and current.problem %} | |
<ul class="list-group mb-3"> | |
<li class="list-group-item justify-content-between"> | |
<div> | |
<h5>Chapter {{ current.chapter.name }}, Problem {{ current.problem.name }}</h5> | |
</div> | |
{% if problem_text != None %} | |
{{ problem_text | safe }} | |
{% endif %} | |
</li> | |
</ul> | |
{% for solution in solutions %} | |
<ul class="list-group mb-3"> | |
{% for step in solution.steps %} | |
<li class="list-group-item justify-content-between"> | |
<div> | |
<h5>Step {{ step.i }} <span class="text-muted">of {{ solution.num_steps }}</span></h5> | |
</div> | |
{{ step.text | safe }} | |
</li> | |
{% endfor %} | |
</ul> | |
{% endfor %} | |
{% endif %} | |
</div> | |
</div> | |
</div> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can you provide a step by step sir on how to setup this ?