import pandas as pd |
import streamlit as st |
from bbw.bbw import annotate |
import base64 |
from io import StringIO |
st.set_page_config(page_title="bbw", page_icon=None, layout='centered', initial_sidebar_state='auto') |
def settings(): |
st.set_option('client.caching', False) |
st.sidebar.title("bbw: Match CSV to Wikidata") |
def get_table_download_link(df, fname): |
csv = df.to_csv(index=False) |
b64 = base64.b64encode( |
csv.encode() |
).decode() |
return f'<br><a href="data:file/csv;base64,{b64}" download="bbw.{fname}">CSV</a>' |
def process_data(uploaded_file): |
rawtable = st.empty() |
with rawtable.beta_container(): |
bytes_data = uploaded_file.read() |
uploaded_file.seek(0) |
filename = uploaded_file.name |
s=str(bytes_data, 'utf-8') |
data = StringIO(s) |
csvfile = pd.read_csv(data, dtype=str, header=None) |
rawcsv = csvfile[1:] |
rawcsv.columns = csvfile.iloc[0] |
st.subheader("INPUT") |
st.table(rawcsv) |
return [csvfile, filename, rawtable] |
def annotate_data(csvfile, filename): |
bbwtable = st.empty() |
with bbwtable.beta_container(): |
[webtable, urltable, labeltable, cpa_sub, cea_sub, cta_sub] = annotate(csvfile,filename) |
st.subheader("OUTPUT: Semantically annotated web table") |
st.write(webtable.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(webtable, filename), unsafe_allow_html=True) |
st.subheader("OUTPUT: Table with up-to-date URLs in Wikidata") |
st.write(urltable.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(urltable, 'url_'+filename), unsafe_allow_html=True) |
st.subheader("OUTPUT: Table with up-to-date labels in Wikidata") |
st.write(labeltable.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(labeltable, 'label_'+filename), unsafe_allow_html=True) |
st.subheader("CPA") |
st.write(cpa_sub.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(cpa_sub, 'cpa_'+filename), unsafe_allow_html=True) |
st.subheader("CTA") |
st.write(cta_sub.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(cpa_sub, 'cta_'+filename), unsafe_allow_html=True) |
st.subheader("CEA") |
st.write(cea_sub.to_html(render_links=True, escape=False), unsafe_allow_html=True) |
st.markdown(get_table_download_link(cpa_sub, 'cea_'+filename), unsafe_allow_html=True) |
return bbwtable |
if __name__ == "__main__": |
settings() |
filebox = st.empty() |
with filebox.beta_container(): |
uploaded_file = st.sidebar.file_uploader("Choose a raw CSV-file", type=['csv']) |
if uploaded_file: |
try: |
[csvfile, filename, rawtable] = process_data(uploaded_file) |
except: |
st.info('Something went wrong: bbw is unable to process the input '+filename) |
try: |
bbwtable = annotate_data(csvfile, filename) |
except: |
st.info('Something went wrong: bbw is unable to annotate the input '+filename) |