Created
January 24, 2022 22:09
-
-
Save henrydatei/1fb4b6fc545b81606c8fa63d850f0a9b to your computer and use it in GitHub Desktop.
extracts the solution for Datacamp courses
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
import html | |
from urllib.parse import urlparse, parse_qs | |
import difflib | |
import csv | |
from io import StringIO | |
# data is provided by Datacamp in a weired mixture of JSON and CSV, so I wrote my own function so find specific information | |
def findString2(result, search): | |
returnList = [] | |
data = StringIO(result) | |
reader = csv.reader(data, delimiter=',') | |
for row in reader: | |
indices = [i for i, value in enumerate(row) if value == search] | |
for index in indices: | |
returnList.append(row[index + 1]) | |
return returnList | |
url = "https://campus.datacamp.com/courses/data-types-for-data-science-in-python/dictionaries-the-root-of-python?ex=4" | |
req = requests.get(url) | |
result = html.unescape(re.search("window.PRELOADED_STATE = (.*)</script>", req.text).group(1)[:-1]) | |
# process information from URL | |
titleraw = url.split("/")[4].replace("-", " ") | |
chapterraw = url.split("/")[5].split("?")[0].replace("-", " ") | |
parsed_url = urlparse(url) | |
exercise = int(parse_qs(parsed_url.query)['ex'][0]) | |
# print solution | |
solLines = findString2(result, "solution")[exercise - 1].split('\\\\n') | |
for line in solLines: | |
print(line) | |
# find exact titles for exercise | |
titles = findString2(result, "title") | |
title = difflib.get_close_matches(titleraw, titles)[0] | |
chapter = difflib.get_close_matches(chapterraw, titles)[0] | |
exerciseTitle = titles[titles.index(chapter) + exercise] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment