Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Last active June 7, 2023 16:07
Show Gist options
  • Save alekrutkowski/ecfaa6bfde60c159ad0d841f2560996d to your computer and use it in GitHub Desktop.
Save alekrutkowski/ecfaa6bfde60c159ad0d841f2560996d to your computer and use it in GitHub Desktop.
A simple [shinylive](https://shiny.posit.co/py/docs/shinylive.html) app example with Excel input, dataframe processing, and Excel output

To compile it into a docs folder/directory which could be hosted as an app on GitHub (Pages), assuming that the app.py and requirements.txt are in the excel_io folder/directory:

shinylive export excel_io docs

To run it (after compilation) locally:

python -m http.server --directory docs 8008

The input Excel file (used as an example) that is uploaded by a user in the running app has the following contents:

first_col_numeric second_col_text
1 aa
2 df
33 dfds
45 df
67 ww
100 sds
import mimetypes
from typing import List
import openpyxl as xl
import pandas as pd
import io
from shiny import App, render, ui
app_ui = ui.page_fluid(
ui.input_file("file1", "Choose a file to upload:", multiple=True),
ui.input_radio_buttons("type", "Type:", ["Binary", "Text", "Excel"]),
ui.output_text_verbatim("file_content"),
ui.HTML("Download an Excel output file.<br>"),
ui.download_button("download2", "Download .xlsx file")
)
def server(input, output, session):
MAX_SIZE = 50000
@output
@render.text
def file_content():
file_infos = input.file1()
if not file_infos:
return
# file_infos is a list of dicts; each dict represents one file. Example:
# [
# {
# 'name': 'data.csv',
# 'size': 2601,
# 'type': 'text/csv',
# 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv'
# }
# ]
out_str = ""
for file_info in file_infos:
out_str += (
"=" * 47
+ "\n"
+ file_info["name"]
+ "\nMIME type: "
+ str(mimetypes.guess_type(file_info["name"])[0])
)
if file_info["size"] > MAX_SIZE:
out_str += f"\nTruncating at {MAX_SIZE} bytes."
out_str += "\n" + "=" * 47 + "\n"
if input.type() == "Text":
with open(file_info["datapath"], "r") as f:
out_str += f.read(MAX_SIZE)
elif input.type() == "Excel":
dFrame = import_dataframe(input.file1()[0]['datapath'])
dFrame = modify_dataframe(dFrame)
df_string = dFrame.to_string(header=True, index=False)
out_str += df_string
else:
with open(file_info["datapath"], "rb") as f:
data = f.read(MAX_SIZE)
out_str += format_hexdump(data)
return out_str
@session.download(filename="output_file.xlsx")
def download2():
# Another way to implement a file download is by yielding bytes; either all at
# once, like in this case, or by yielding multiple times. When using this
# approach, you should pass a filename argument to @session.download, which
# determines what the browser will name the downloaded file.
dFrame = import_dataframe(input.file1()[0]['datapath'])
dFrame = modify_dataframe(dFrame)
with io.BytesIO() as buf:
dFrame.to_excel(buf, index=False)
yield buf.getvalue()
def format_hexdump(data: bytes) -> str:
hex_vals = ["{:02x}".format(b) for b in data]
hex_vals = group_into_blocks(hex_vals, 16)
hex_vals = [" ".join(row) for row in hex_vals]
hex_vals = "\n".join(hex_vals)
return hex_vals
def group_into_blocks(x: List[str], blocksize: int):
"""
Given a list, return a list of lists, where the inner lists each have `blocksize`
elements.
"""
return [
x[i * blocksize : (i + 1) * blocksize] for i in range(ceil(len(x) / blocksize))
]
def import_dataframe(filename):
with open(filename, "rb") as f:
# solution adapted from https://stackoverflow.com/a/69983732
wb = xl.load_workbook(f, data_only=True)
ws = wb.worksheets[0]
excel_data = ws.values
columns = next(excel_data)[0:]
dFrame = pd.DataFrame(excel_data, columns=columns)
return dFrame
def modify_dataframe(dFrame):
# Your "business logic" here
dFrame["new_column"] = dFrame["first_col_numeric"].astype(str) + dFrame["second_col_text"]
return dFrame
app = App(app_ui, server)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment