Guts · July 23, 2021 13:43
diff --git a/.gitignore b/.gitignore
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class

 # C extensions
 *.so

 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST

 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec

 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/

 # Translations
 *.mo
 *.pot

 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3

 # Flask stuff:
 instance/
 .webassets-cache

 # Scrapy stuff:
 .scrapy

 # Sphinx documentation
 docs/_build/

 # PyBuilder
 target/

 # Jupyter Notebook
 .ipynb_checkpoints

 # IPython
 profile_default/
 ipython_config.py

 # pyenv
 .python-version

 # celery beat schedule file
 celerybeat-schedule

 # SageMath parsed files
 *.sage.py

 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/

 # Spyder project settings
 .spyderproject
 .spyproject

 # Rope project settings
 .ropeproject

 # mkdocs documentation
 /site

 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json

 # Pyre type checker
 .pyre/

 ## Custom

 *.html
 .vscode/
diff --git a/README.md b/README.md
diff --git a/configuration.json b/configuration.json
 {
  "feeds": [
    {
      "name": "Geotribu",
      "url": "https://static.geotribu.fr/feed_rss_created.xml",
      "last_guid": null
    },
    {
      "name": "CartoNumerique",
      "url": "https://cartonumerique.blogspot.com/feeds/posts/default?alt=rss",
      "last_guid": null
    },
    {
      "name": "Idgeo",
      "url": "https://www.idgeo.fr/feed/",
      "last_guid": null
    }
  ],
  "client": {
    "last_run": null,
    "debug": true
  }
 }
diff --git a/requirements.txt b/requirements.txt
 # project
 feedparser<6.1
 jinja2<3.1
diff --git a/script.py b/script.py
 #! python3  # noqa: E265

 # standard library
 import json
 import logging
 from pathlib import Path

 # 3rd party
 import feedparser
 from jinja2 import Environment, select_autoescape, FileSystemLoader

 # log
 logging.basicConfig(level=logging.INFO)

 # variables
 configuration_path = Path("configuration.json")
 limit_length_by_feed = 5
 template_path = Path(__file__).parent / "template.jinja"

 # read configuration file
 with configuration_path.open(mode="r", encoding="UTF-8") as stream_json:
    config = json.load(stream_json)

 logging.info(
    "{} feeds configured: {}".format(
        len(config.get("feeds")),
        ", ".join([i.get("name") for i in config.get("feeds")]),
    )
 )

 # parse feeds and store data
 data_feeds = {}

 for f in config.get("feeds"):
    logging.info(f"Parsing feed: {f.get('name')}")
    parsed_feed = feedparser.parse(f.get("url"))

    # check feed health
    if parsed_feed.get("bozo") == 0:
        logging.info("Feed is healthy")
    else:
        logging.warning("Feed is not healthy")
        continue

    # parse items
    for i in parsed_feed.entries[:limit_length_by_feed]:
        data_feeds[i.guid] = {
            "title": i.title,
            "link": i.link,
            "description": i.description,
            "published": i.published,
            "source": f.get("name"),
        }
        if len(i.enclosures):
            data_feeds.get(i.guid)["illustration"] = i.enclosures[0].get("href")

 # Create output file

 env = Environment(
    autoescape=select_autoescape(["html", "xml"]),
    loader=FileSystemLoader(Path(__file__).parent),
 )
 template = env.get_template(template_path.name)

 with open("output.html", mode="w", encoding="UTF8") as fifeed_created:
    fifeed_created.write(
        template.render(feeds_items=data_feeds, title="Coucou les CQP Geom")
    )
diff --git a/template.jinja b/template.jinja
 <!DOCTYPE html>
 <html lang="fr">
 <head>
    <title>Agrégat de flux</title>
 </head>
 <body>
    <h1>{{ title }}</h1>

    {% for item in feeds_items.values() %}
        <h2><a href="{{ item.link }} _target="blank">{{ item.title }}</a></h2>
        <p>Source : {{ item.source }}</p>
        <p>{{ item.description }}</p>
        <img src="{{ item.illustration }}" alt="{{ item.title }}">
        <hr>
        <br>
    {% endfor %}

 </body>
 </html>
	# Byte-compiled / optimized / DLL files
	__pycache__/
	*.py[cod]
	*$py.class

	# C extensions
	*.so

	# Distribution / packaging
	.Python
	build/
	develop-eggs/
	dist/
	downloads/
	eggs/
	.eggs/
	lib/
	lib64/
	parts/
	sdist/
	var/
	wheels/
	*.egg-info/
	.installed.cfg
	*.egg
	MANIFEST

	# PyInstaller
	# Usually these files are written by a python script from a template
	# before PyInstaller builds the exe, so as to inject date/other infos into it.
	*.manifest
	*.spec

	# Installer logs
	pip-log.txt
	pip-delete-this-directory.txt

	# Unit test / coverage reports
	htmlcov/
	.tox/
	.nox/
	.coverage
	.coverage.*
	.cache
	nosetests.xml
	coverage.xml
	*.cover
	.hypothesis/
	.pytest_cache/

	# Translations
	*.mo
	*.pot

	# Django stuff:
	*.log
	local_settings.py
	db.sqlite3

	# Flask stuff:
	instance/
	.webassets-cache

	# Scrapy stuff:
	.scrapy

	# Sphinx documentation
	docs/_build/

	# PyBuilder
	target/

	# Jupyter Notebook
	.ipynb_checkpoints

	# IPython
	profile_default/
	ipython_config.py

	# pyenv
	.python-version

	# celery beat schedule file
	celerybeat-schedule

	# SageMath parsed files
	*.sage.py

	# Environments
	.env
	.venv
	env/
	venv/
	ENV/
	env.bak/
	venv.bak/

	# Spyder project settings
	.spyderproject
	.spyproject

	# Rope project settings
	.ropeproject

	# mkdocs documentation
	/site

	# mypy
	.mypy_cache/
	.dmypy.json
	dmypy.json

	# Pyre type checker
	.pyre/

	## Custom

	*.html
	.vscode/
	{
	"feeds": [
	{
	"name": "Geotribu",
	"url": "https://static.geotribu.fr/feed_rss_created.xml",
	"last_guid": null
	},
	{
	"name": "CartoNumerique",
	"url": "https://cartonumerique.blogspot.com/feeds/posts/default?alt=rss",
	"last_guid": null
	},
	{
	"name": "Idgeo",
	"url": "https://www.idgeo.fr/feed/",
	"last_guid": null
	}
	],
	"client": {
	"last_run": null,
	"debug": true
	}
	}
	#! python3 # noqa: E265

	# standard library
	import json
	import logging
	from pathlib import Path

	# 3rd party
	import feedparser
	from jinja2 import Environment, select_autoescape, FileSystemLoader

	# log
	logging.basicConfig(level=logging.INFO)

	# variables
	configuration_path = Path("configuration.json")
	limit_length_by_feed = 5
	template_path = Path(__file__).parent / "template.jinja"

	# read configuration file
	with configuration_path.open(mode="r", encoding="UTF-8") as stream_json:
	config = json.load(stream_json)

	logging.info(
	"{} feeds configured: {}".format(
	len(config.get("feeds")),
	", ".join([i.get("name") for i in config.get("feeds")]),
	)
	)

	# parse feeds and store data
	data_feeds = {}

	for f in config.get("feeds"):
	logging.info(f"Parsing feed: {f.get('name')}")
	parsed_feed = feedparser.parse(f.get("url"))

	# check feed health
	if parsed_feed.get("bozo") == 0:
	logging.info("Feed is healthy")
	else:
	logging.warning("Feed is not healthy")
	continue

	# parse items
	for i in parsed_feed.entries[:limit_length_by_feed]:
	data_feeds[i.guid] = {
	"title": i.title,
	"link": i.link,
	"description": i.description,
	"published": i.published,
	"source": f.get("name"),
	}
	if len(i.enclosures):
	data_feeds.get(i.guid)["illustration"] = i.enclosures[0].get("href")

	# Create output file

	env = Environment(
	autoescape=select_autoescape(["html", "xml"]),
	loader=FileSystemLoader(Path(__file__).parent),
	)
	template = env.get_template(template_path.name)

	with open("output.html", mode="w", encoding="UTF8") as fifeed_created:
	fifeed_created.write(
	template.render(feeds_items=data_feeds, title="Coucou les CQP Geom")
	)
	<!DOCTYPE html>
	<html lang="fr">
	<head>
	<title>Agrégat de flux</title>
	</head>
	<body>
	<h1>{{ title }}</h1>

	{% for item in feeds_items.values() %}
	<h2><a href="{{ item.link }} _target="blank">{{ item.title }}</a></h2>
	<p>Source : {{ item.source }}</p>
	<p>{{ item.description }}</p>
	<img src="{{ item.illustration }}" alt="{{ item.title }}">
	<hr>
	<br>
	{% endfor %}

	</body>
	</html>