Created
October 10, 2024 11:25
-
-
Save yazdipour/e7b4a36ae266d379295aaedfce99a3dc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A python scrapper that https://www.einbuergerungstest-online.eu/fragen/{page_number_start_from_1_to_10}/ and scrapes the questions and answers from the page. | |
# The scrapper should return the questions (class=questions-question-text) and multiple choose options (li elements inside class=list-unstyled question-answers-list and class=question-answer-right is the answer) and progress percentage(class=progress-bar-success) in a json format. | |
# The scrapper should be able to scrape all the 10 pages and save it in a json file. | |
# The output should be in the following format: | |
# { | |
# "questions": [ | |
# { | |
# "question_id": "1", | |
# "question": "In Deutschland dürfen Menschen offen etwas gegen die Regierung sagen, weil …", | |
# "options": [ | |
# "hier Religionsfreiheit gilt.", | |
# "die Menschen Steuern zahlen.", | |
# "die Menschen das Wahlrecht haben.", | |
# "hier Meinungsfreiheit gilt." | |
# ], | |
# "answer": "✅ hier Meinungsfreiheit gilt.", | |
# "percentage": 91, | |
# "language": "de" | |
# } | |
# ] | |
# } | |
# <div class="content container"> | |
# <h2>Die Fragen mit Antworten</h2> | |
# <div class="questions-question-stats"> | |
# <div class="progress" title="In 90,6% der bisher abgeschlossenen Tests wurde diese Frage richtig beantwortet"> | |
# <div class="progress-bar progress-bar-success" style="width:91%">91%</div> | |
# <div class="progress-bar progress-bar-danger" style="width:9%"></div> | |
# </div> | |
# </div> | |
# <div class="row" id="frage-1"> | |
# <div class="col-sm-6"> | |
# <div class="questions-question-text"> | |
# <div class="questions-question-id">1.</div> | |
# <p><a href="/fragen/1-in-deutschland-duerfen-menschen-offen-etwas-gegen-die-regierung-sagen-weil/">In Deutschland dürfen Menschen offen etwas gegen die Regierung sagen, weil …</a></p> | |
# </div> | |
# </div> | |
# <div class="col-xs-11 col-xs-offset-1 col-sm-offset-0 col-sm-6"> | |
# <ul class="list-unstyled question-answers-list"> | |
# <li>hier Religionsfreiheit gilt.</li> | |
# <li>die Menschen Steuern zahlen.</li> | |
# <li>die Menschen das Wahlrecht haben.</li> | |
# <li><span class="question-answer-right">hier Meinungsfreiheit gilt.</span></li> | |
# </ul> | |
# </div> | |
# </div> | |
# <div class="questions-question-stats"> | |
# <div class="progress" title="In 92,3% der bisher abgeschlossenen Tests wurde diese Frage richtig beantwortet"> | |
# <div class="progress-bar progress-bar-success" style="width:92%">92%</div> | |
# <div class="progress-bar progress-bar-danger" style="width:8%"></div> | |
# </div> | |
# </div> | |
# <div class="row" id="frage-2"> | |
# <div class="col-sm-6"> | |
# <div class="questions-question-text"> | |
# <div class="questions-question-id">2.</div> | |
# <p>In Deutschland können Eltern bis zum 14. Lebensjahr ihres Kindes entscheiden, ob es in der Schule am …</p> | |
# </div> | |
# </div> | |
# <div class="col-xs-11 col-xs-offset-1 col-sm-offset-0 col-sm-6"> | |
# <ul class="list-unstyled question-answers-list"> | |
# <li>Geschichtsunterricht teilnimmt.</li> | |
# <li><span class="question-answer-right">Religionsunterricht teilnimmt.</span></li> | |
# <li>Politikunterricht teilnimmt.</li> | |
# <li>Sprachunterricht teilnimmt.</li> | |
# </ul> | |
# </div> | |
# </div> | |
# <div class="questions-question-stats"> | |
# <div class="progress" title="In 94,2% der bisher abgeschlossenen Tests wurde diese Frage richtig beantwortet"> | |
# <div class="progress-bar progress-bar-success" style="width:94%">94%</div> | |
# <div class="progress-bar progress-bar-danger" style="width:6%"></div> | |
# </div> | |
# </div> | |
# <div class="search-box search-box--bottom"> | |
# <form action="/suche/" method="get"> | |
# <p>Einbürgerungstest-Fragen durchsuchen:</p> | |
# <div class="search-box__input"><input type="hidden" name="cx" value="partner-pub-7803896590914046:7122373411"><input type="hidden" name="cof" value="FORID:10"><input class="form-control" type="text" name="q" value=""><button class="btn btn-default">🔍</button></div> | |
# </form> | |
# </div> | |
# </div> | |
import requests | |
from bs4 import BeautifulSoup | |
import json | |
def scrape_questions(): | |
language = 'de' | |
base_url = "https://www.einbuergerungstest-online.eu/fragen" | |
all_questions = [] | |
for page_number in range(1, 11): | |
url = f"{base_url}/{page_number}/" | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
questions = soup.find_all('div', class_='row') | |
# get all stats | |
stats = soup.find_all('div', class_='questions-question-stats') | |
for question, stat in zip(questions, stats): | |
question_id = question.find('div', class_='questions-question-text').div.text.strip()[:-1] | |
question_text = question.find('div', class_='questions-question-text').p.text.strip() | |
options = [li.text.strip() for li in question.find('ul', class_='question-answers-list').find_all('li')] | |
answer = question.find('span', class_='question-answer-right').text.strip() | |
percentage = int(stat.find('div', class_='progress-bar-success').text.strip()[:-1]) | |
all_questions.append({ | |
"question_id": question_id, | |
"question": question_text, | |
"options": options, | |
"answer": f"✅ {answer}", | |
"percentage": percentage, | |
"language": language | |
}) | |
with open(language + '.json', 'w', encoding='utf-8') as f: | |
json.dump({"questions": all_questions}, f, ensure_ascii=False, indent=4) | |
# scrape_questions() | |
from notion_client import Client | |
NOTION_TOKEN = "secret_v" | |
# And then write a python script that reads the json file. Use Notion API to create a new item in your notion database and add the questions and answers in the page. | |
# Each items has the following properties: | |
# - Question Number | |
# - Question | |
# - Option 1 | |
# - Option 2 | |
# - Option 3 | |
# - Option 4 | |
# - Answer | |
# - Percentage | |
# - Language | |
def add_questions_to_notion(token, database_id): | |
language = 'persisch' | |
client = Client(auth=token) | |
with open(language + '.json', 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
questions = data['questions'] | |
for question in questions: | |
response = client.pages.create( | |
parent={"database_id": database_id}, | |
properties={ | |
"Nr": {"number": int(question['question_id'])}, | |
"Question": {"title": [{"text": {"content": question['question']}}]}, | |
"Option 1": {"rich_text": [{"text": {"content": question['options'][0]}}]}, | |
"Option 2": {"rich_text": [{"text": {"content": question['options'][1]}}]}, | |
"Option 3": {"rich_text": [{"text": {"content": question['options'][2]}}]}, | |
"Option 4": {"rich_text": [{"text": {"content": question['options'][3]}}]}, | |
"Answer": {"rich_text": [{"text": {"content": question['answer']}}]}, | |
"Percentage": {"number": question['percentage']}, | |
"Language": {"rich_text": [{"text": {"content": question['language']}}]} | |
} | |
) | |
print(question['question_id'], 'added to notion') | |
add_questions_to_notion('TOKEN', 'DATABASE_ID') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment