Created
November 11, 2024 02:53
-
-
Save ryancollingwood/2b257a1375b7cbdd9468ab98e61da29b to your computer and use it in GitHub Desktop.
Generate PPTX slides from CSV using a template PPTX
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
# Dataframe | |
import pandas as pd | |
import numpy as np | |
# Powerpoint | |
from pptx import Presentation | |
""" | |
requirements: | |
python-pptx | |
pandas | |
numpy | |
""" | |
def find_placeholders(text): | |
# Define the regex pattern to match placeholders like {{PlaceholderName}} | |
pattern = r'\{\{(.*?)\}\}' | |
# Find all matches in the input text | |
matches = re.findall(pattern, text) | |
return matches | |
def generate_slide(template_path, replacements, ouput_path, remove_missing = True): | |
""" | |
This function is definately spagetti, and I haven't fully figured out how the | |
`python-pptx` library does things. There is some pretty extensive docco | |
here: https://python-pptx.readthedocs.io/en/latest/user/quickstart.html | |
But what we have is good enough to get going. | |
If you see `Malformatted run?` then go to the offending text in the Slide and | |
delete the template value - e.g. `{{Idea Rank}}` as a run is determined by | |
formatting which breaks the text into parts | |
""" | |
def escaped_key(key_value): | |
return f'{{{{{key_value}}}}}' | |
prs = Presentation(template_path) | |
# Iterate through each slide in the presentation | |
for slide in prs.slides: | |
# Iterate through each shape in the slide | |
for shape in slide.shapes: | |
# Check if the shape has text | |
if shape.has_text_frame: | |
# Iterate through each paragraph in the text frame | |
for paragraph in shape.text_frame.paragraphs: | |
# Iterate through each run in the paragraph | |
for run in paragraph.runs: | |
# Replace text wrapped in double curly braces | |
search_text = str(run.text).strip() | |
place_holders = find_placeholders(search_text) | |
if search_text.find("{{") > -1 and len(place_holders) == 0: | |
print("failed to find placeholders:", search_text, place_holders) | |
print("Malformatted run?", [x.text for x in paragraph.runs], "\n") | |
if len(place_holders) == 0: | |
continue | |
else: | |
print("search_text", search_text) | |
for place_holder in place_holders: | |
if place_holder not in replacements: | |
if remove_missing: | |
print(f"removing {place_holder}", "\n", sep = "") | |
run.text = run.text.replace(escaped_key(place_holder), "") | |
continue | |
print(f"replacing {place_holder}", "\n", replacements[place_holder], "\n", sep = "") | |
run.text = run.text.replace(escaped_key(place_holder), str(replacements[place_holder])) | |
if run.text.find("{{") > -1: | |
print("Stil got templeted text?", run.text, find_placeholders(run.text)) | |
print("Malformatted run?", [x.text for x in paragraph.runs]) | |
# Save the modified presentation | |
prs.save(ouput_path) | |
def process_row(template_path, filename_cols, row): | |
""" | |
Process the Sheet | |
Do the thing for all of the rows | |
""" | |
filename_parts = list() | |
for col in filename_cols: | |
filename_parts.append(str(row[col]).strip().replace(" ", "-")) | |
row_data = {str(k).replace(" ", "").strip():str(v).strip() for k,v in row.to_dict().items()} | |
filename = "_".join(filename_parts) | |
filename = f"{filename}.pptx" | |
generate_slide(template_path, row_data, filename) | |
return filename | |
def main(df_path, template_path, mandatory_columns, filename_columns): | |
""" | |
Populate Templete with Data | |
It is assumed that in your template pptx you have text you want to substitute | |
in the form of `{{ColumnNameWithNoSpaces}}` | |
So column `Business Value` will be mapped to text `{{BusinessValue}}` | |
df_path - Path to csv with the data | |
template_path - Path to templatised pptx | |
mandatory_columns - What columns are mandatory for the rows | |
filename_columns - What columns (and what order) for the generated filename | |
""" | |
df = pd.read_csv(df_path) | |
for column in mandatory_columns: | |
df = df[~df[column].str.strip().replace("", np.nan).isnull()] | |
print("Number of rows from sheet:", len(df)) | |
df.apply( | |
lambda row: process_row(template_path, filename_columns, row), | |
axis=1 | |
) | |
if __name__ == "__main__": | |
# yes could be more robust | |
args = sys.argv[1:] | |
df_path = args[0] | |
template_path = args[1] | |
mandatory_columns = args[2].split(",") | |
filename_columns = args[3].split(",") | |
main(df_path, mandatory_columns, filename_columns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment