|
import mimetypes |
|
from typing import List |
|
import openpyxl as xl |
|
import pandas as pd |
|
import io |
|
|
|
|
|
from shiny import App, render, ui |
|
|
|
app_ui = ui.page_fluid( |
|
ui.input_file("file1", "Choose a file to upload:", multiple=True), |
|
ui.input_radio_buttons("type", "Type:", ["Binary", "Text", "Excel"]), |
|
ui.output_text_verbatim("file_content"), |
|
ui.HTML("Download an Excel output file.<br>"), |
|
ui.download_button("download2", "Download .xlsx file") |
|
) |
|
|
|
|
|
def server(input, output, session): |
|
MAX_SIZE = 50000 |
|
|
|
@output |
|
@render.text |
|
def file_content(): |
|
file_infos = input.file1() |
|
if not file_infos: |
|
return |
|
|
|
# file_infos is a list of dicts; each dict represents one file. Example: |
|
# [ |
|
# { |
|
# 'name': 'data.csv', |
|
# 'size': 2601, |
|
# 'type': 'text/csv', |
|
# 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv' |
|
# } |
|
# ] |
|
out_str = "" |
|
for file_info in file_infos: |
|
out_str += ( |
|
"=" * 47 |
|
+ "\n" |
|
+ file_info["name"] |
|
+ "\nMIME type: " |
|
+ str(mimetypes.guess_type(file_info["name"])[0]) |
|
) |
|
if file_info["size"] > MAX_SIZE: |
|
out_str += f"\nTruncating at {MAX_SIZE} bytes." |
|
|
|
out_str += "\n" + "=" * 47 + "\n" |
|
|
|
if input.type() == "Text": |
|
with open(file_info["datapath"], "r") as f: |
|
out_str += f.read(MAX_SIZE) |
|
elif input.type() == "Excel": |
|
dFrame = import_dataframe(input.file1()[0]['datapath']) |
|
dFrame = modify_dataframe(dFrame) |
|
df_string = dFrame.to_string(header=True, index=False) |
|
out_str += df_string |
|
else: |
|
with open(file_info["datapath"], "rb") as f: |
|
data = f.read(MAX_SIZE) |
|
out_str += format_hexdump(data) |
|
|
|
return out_str |
|
|
|
@session.download(filename="output_file.xlsx") |
|
def download2(): |
|
# Another way to implement a file download is by yielding bytes; either all at |
|
# once, like in this case, or by yielding multiple times. When using this |
|
# approach, you should pass a filename argument to @session.download, which |
|
# determines what the browser will name the downloaded file. |
|
dFrame = import_dataframe(input.file1()[0]['datapath']) |
|
dFrame = modify_dataframe(dFrame) |
|
with io.BytesIO() as buf: |
|
dFrame.to_excel(buf, index=False) |
|
yield buf.getvalue() |
|
|
|
|
|
def format_hexdump(data: bytes) -> str: |
|
hex_vals = ["{:02x}".format(b) for b in data] |
|
hex_vals = group_into_blocks(hex_vals, 16) |
|
hex_vals = [" ".join(row) for row in hex_vals] |
|
hex_vals = "\n".join(hex_vals) |
|
return hex_vals |
|
|
|
|
|
def group_into_blocks(x: List[str], blocksize: int): |
|
""" |
|
Given a list, return a list of lists, where the inner lists each have `blocksize` |
|
elements. |
|
""" |
|
return [ |
|
x[i * blocksize : (i + 1) * blocksize] for i in range(ceil(len(x) / blocksize)) |
|
] |
|
|
|
def import_dataframe(filename): |
|
with open(filename, "rb") as f: |
|
# solution adapted from https://stackoverflow.com/a/69983732 |
|
wb = xl.load_workbook(f, data_only=True) |
|
ws = wb.worksheets[0] |
|
excel_data = ws.values |
|
columns = next(excel_data)[0:] |
|
dFrame = pd.DataFrame(excel_data, columns=columns) |
|
return dFrame |
|
|
|
def modify_dataframe(dFrame): |
|
# Your "business logic" here |
|
dFrame["new_column"] = dFrame["first_col_numeric"].astype(str) + dFrame["second_col_text"] |
|
return dFrame |
|
|
|
app = App(app_ui, server) |