Last active
August 25, 2020 14:18
-
-
Save zelinskiy/83f1323fab99e6d9e2eccc58ecd68528 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import date, timedelta | |
import urllib.request, urllib.error, urllib.parse | |
import re, os | |
# TODO: | |
# Ad libitum | |
# Psalmodia complementaris | |
def hora_name(n): | |
if n == 2: | |
return "LECTIONIS" | |
elif n == 3: | |
return "LAUDES" | |
elif n == 4: | |
return "TERTIA" | |
elif n == 5: | |
return "SEXTA" | |
elif n == 6: | |
return "NONA" | |
elif n == -4: | |
return "TERTIA*" | |
elif n == -5: | |
return "SEXTA*" | |
elif n == -6: | |
return "NONA*" | |
elif n == 7: | |
return "VESPERAS" | |
elif n == 8: | |
return "COMPLETORIUM" | |
elif n == 10: | |
return "LAUDES MATUTINAS ET OFFICIUM LECTIONIS" | |
else: | |
raise "Hora " + str(n) + " not recognized" | |
def clean_data(data, hora, date=None): | |
data = data.replace('<link rel="stylesheet" type="text/css" href="breviar.css">', "") | |
data = data.replace('<h1>BREVIARIUM ROMANUM<\/h1>', "") | |
data = re.sub(r'<style>(\n|.)*<\/style>', '', data) | |
data = re.sub(r'<h1>(\n|.)*<\/h1>', '', data) | |
data = r'<h3>' + hora_name(hora) + r'</h3>' + data | |
if date != None: | |
data = r'<h1>' + date + r'</h1>' + data | |
data = re.sub(r'<!--\{TEDEUM_BEGIN\}(\n|.)*<!--\{TEDEUM_END\}-->', '', data) | |
data = re.sub(r'<script type="text\/javascript">(\n|.)*<\/noscript>', '', data) | |
data = re.sub(r'<!--(.)+-->', '', data) | |
data = re.sub(r'<html>(\n|.)*<body>', '', data) | |
data = re.sub(r'<\/body>(\n|.)*<\/html>', '', data) | |
data = re.sub(r'<p.*\[ Coniunctiones ostendere \]<\/a><p>', '', data) | |
data = re.sub(r'<p class=pouzetisk.*<\/p>', '', data) | |
data = re.sub(r'<!-- Piwik -->(\n|.)*<!-- End Piwik Code -->', '', data) | |
data = re.sub(r'\n.*©.*\n', '', data) | |
data = re.sub(r'\n.*©.*\n', '', data) | |
data = re.sub(r'<font <\/font>', '<\/font>', data) | |
return data | |
date_ = date(2024, 10, 1) | |
date_end = date(2025, 1, 1) | |
while date_ < date_end: | |
date_str = date_.strftime("%Y-%m-%d") | |
for hora in [2, 3, 4, 5, 6, -4, -5, -6, 7, 8]: | |
if(hora < 0): | |
# psalmodia complementaris | |
pcomp = 1 | |
hora_ = hora * (-1) | |
else: | |
pcomp = 0 | |
hora_ = hora | |
url = 'http://breviarium.info/?a={}&datum={}&m=1&dopln={}'.format(hora_, date_str, pcomp) | |
response = urllib.request.urlopen(url) | |
data = response.read().decode("utf-8") | |
data = clean_data(data, hora, date_.strftime("%d-%m-%Y") if hora==2 else None) | |
dir = "res/{}/{}/".format(date_.year, date_.month) | |
if not os.path.exists(dir): | |
os.makedirs(dir) | |
with open(dir + "{}_{}.html".format(date_str, hora), "w") as f: | |
f.write(data) | |
print(date_str) | |
date_ += timedelta(days=1) | |
print("DONE") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment