Skip to content

Instantly share code, notes, and snippets.

@ryancollingwood
Created November 11, 2024 02:53
Show Gist options
  • Save ryancollingwood/2b257a1375b7cbdd9468ab98e61da29b to your computer and use it in GitHub Desktop.
Save ryancollingwood/2b257a1375b7cbdd9468ab98e61da29b to your computer and use it in GitHub Desktop.
Generate PPTX slides from CSV using a template PPTX
import sys
import re
# Dataframe
import pandas as pd
import numpy as np
# Powerpoint
from pptx import Presentation
"""
requirements:
python-pptx
pandas
numpy
"""
def find_placeholders(text):
# Define the regex pattern to match placeholders like {{PlaceholderName}}
pattern = r'\{\{(.*?)\}\}'
# Find all matches in the input text
matches = re.findall(pattern, text)
return matches
def generate_slide(template_path, replacements, ouput_path, remove_missing = True):
"""
This function is definately spagetti, and I haven't fully figured out how the
`python-pptx` library does things. There is some pretty extensive docco
here: https://python-pptx.readthedocs.io/en/latest/user/quickstart.html
But what we have is good enough to get going.
If you see `Malformatted run?` then go to the offending text in the Slide and
delete the template value - e.g. `{{Idea Rank}}` as a run is determined by
formatting which breaks the text into parts
"""
def escaped_key(key_value):
return f'{{{{{key_value}}}}}'
prs = Presentation(template_path)
# Iterate through each slide in the presentation
for slide in prs.slides:
# Iterate through each shape in the slide
for shape in slide.shapes:
# Check if the shape has text
if shape.has_text_frame:
# Iterate through each paragraph in the text frame
for paragraph in shape.text_frame.paragraphs:
# Iterate through each run in the paragraph
for run in paragraph.runs:
# Replace text wrapped in double curly braces
search_text = str(run.text).strip()
place_holders = find_placeholders(search_text)
if search_text.find("{{") > -1 and len(place_holders) == 0:
print("failed to find placeholders:", search_text, place_holders)
print("Malformatted run?", [x.text for x in paragraph.runs], "\n")
if len(place_holders) == 0:
continue
else:
print("search_text", search_text)
for place_holder in place_holders:
if place_holder not in replacements:
if remove_missing:
print(f"removing {place_holder}", "\n", sep = "")
run.text = run.text.replace(escaped_key(place_holder), "")
continue
print(f"replacing {place_holder}", "\n", replacements[place_holder], "\n", sep = "")
run.text = run.text.replace(escaped_key(place_holder), str(replacements[place_holder]))
if run.text.find("{{") > -1:
print("Stil got templeted text?", run.text, find_placeholders(run.text))
print("Malformatted run?", [x.text for x in paragraph.runs])
# Save the modified presentation
prs.save(ouput_path)
def process_row(template_path, filename_cols, row):
"""
Process the Sheet
Do the thing for all of the rows
"""
filename_parts = list()
for col in filename_cols:
filename_parts.append(str(row[col]).strip().replace(" ", "-"))
row_data = {str(k).replace(" ", "").strip():str(v).strip() for k,v in row.to_dict().items()}
filename = "_".join(filename_parts)
filename = f"{filename}.pptx"
generate_slide(template_path, row_data, filename)
return filename
def main(df_path, template_path, mandatory_columns, filename_columns):
"""
Populate Templete with Data
It is assumed that in your template pptx you have text you want to substitute
in the form of `{{ColumnNameWithNoSpaces}}`
So column `Business Value` will be mapped to text `{{BusinessValue}}`
df_path - Path to csv with the data
template_path - Path to templatised pptx
mandatory_columns - What columns are mandatory for the rows
filename_columns - What columns (and what order) for the generated filename
"""
df = pd.read_csv(df_path)
for column in mandatory_columns:
df = df[~df[column].str.strip().replace("", np.nan).isnull()]
print("Number of rows from sheet:", len(df))
df.apply(
lambda row: process_row(template_path, filename_columns, row),
axis=1
)
if __name__ == "__main__":
# yes could be more robust
args = sys.argv[1:]
df_path = args[0]
template_path = args[1]
mandatory_columns = args[2].split(",")
filename_columns = args[3].split(",")
main(df_path, mandatory_columns, filename_columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment