fomightez · June 19, 2025 16:08
diff --git a/useful_notebook_snippets b/useful_notebook_snippets
 # Use `%%capture` to hush 'noisy' stdout and stderr streams, but still combine with getting `%%time` after
 %%capture out_stream
 %%time
 ---rest of a cell that does something with LOTS of output--
 #In cell after, put following to get time of completion from that:
 #time it took to run cell above 
 for x in out_stream.stdout.split("\n")[-3:]:
    print(x)


 # Use `%%capture` to hush 'noisy' stdout and stderr streams, but still get certain output after
 %%capture out_stream
 ---rest of a cell that does something with LOTS of output with anything to keep tagged with `#x#x#x#x#x` at start of every line --
 #In cell after, put following to get filtered output:
 # output specifically tagged stderr lines captured from above cell
 tag_used = "#x#x#x#x#x"
 filtered_out = ""
 for x in out_stream.stderr.split("\n"):
    if x.startswith(tag_used):
        filtered_out += x[len(tag_used):]+"\n"
 # Feedback
 sys.stderr.write("{}".format(filtered_out))


 # Use `%%capture` to capture' stdout and stderr streams and send output to a file
 %%capture out_stream
 ---rest of a cell that does something with output--
 #In cell after, put following:
 %store out_stream.stdout >output_from_cell.txt #based on https://stackoverflow.com/a/32731418/8508004
 # (In an answer to a Jupyter Discourse post, I added more background on using this and options for how 
 # you could add showing the captured text in the notebook, too. See 
 # https://discourse.jupyter.org/t/how-to-write-the-output-from-previous-cell-to-a-csv-file/10319/2?u=fomightez )

 # Use `with io.capture_output() as captured:` to suppress output from only what is in the `with` block
 # This comes from down below on that same page as the `%%capture` cell magic (https://stackoverflow.com/a/52559560/8508004)
 from IPython.utils import io
 with io.capture_output() as captured:
    MyFunction()






 # Use %store and doscstring to write multi-line text to file
 s='''#!/bin/bash
 pdb=$1
 for chain in $(grep "^ATOM" $pdb | cut -b 22 | sort -u)
 do
    sed -n "/^.\{21\}$chain/p" $pdb > ${pdb%.pdb}_$chain.pdb
 done'''
 %store s >split_into_chains.sh


 # clean out directory of all but one file
 from shlex import quote
 pathname_of_file_to_keep = quote("notebooks/Generating later Circos tutorial notebooks from extracted markdown via notedown and papermill.ipynb")
 name_of_file_to_keep = quote("Generating later Circos tutorial notebooks from extracted markdown via notedown and papermill.ipynb")

 # based on Olivier Dulac's comment at https://unix.stackexchange.com/questions/153862/remove-all-files-directories-except-for-one-file
 %cd ..
 !cp $pathname_of_file_to_keep .
 !rm -rf notebooks
 !mkdir notebooks
 !mv $name_of_file_to_keep notebooks/
 %cd notebooks




 # Make a directory if it doesn't already exist
 # Make a folder if it doesn't already exist
 import os
 directory_for_archive = "original_html"
 if not os.path.isdir(directory_for_archive):
    !mkdir {directory_for_archive}


 # Get a file if not yet retrieved / check if file exists
 import os
 file_needed = "get_seq_from_multiFASTA_with_match_in_description.py"
 if not os.path.isfile(file_needed):
    #!curl -OL https://raw.githubusercontent.com/fomightez/sequencework/master/Extract_from_FASTA/{file_needed}
    os.system(f"curl -OL https://raw.githubusercontent.com/fomightez/sequencework/master/Extract_from_FASTA/{file_needed}")

 # Get a list of files if not yet retrieved, checking if file exists already first
 import os
 files_needed = ["hhsearch_q9bsu1_uniclust_w_ss_pfamA_30.hhr",
                "2uvo_hhblits.hhr",
                "2uvo_hhsearch.hhr",
                "hhpred_9590198.hhr"]
 url_prefix = "https://raw.githubusercontent.com/biopython/biopython/master/Tests/HHsuite/"
 for file_needed in files_needed:
    if not os.path.isfile(file_needed):
        !curl -OL {url_prefix+file_needed}



 # Check if list of file already uploaded and if not, prompt for them
 import os
 import sys
 files_needed = ["genome_1.fa","genome_2.fa","genome_3.fa","genome_4.fa",]
 files_still_needed = []
 for fn in files_needed:
    if not os.path.isfile(fn):
        files_still_needed.append(fn)
 if files_still_needed:
    sys.stderr.write("\nThe following sequences still need uploading:\n - {}\n".format("\n - ".join(files_still_needed)))
    sys.exit(1)
 else:
    sys.stderr.write("\nSequences needed all present.")




 # Check for a file that is in an archive and then ask for archive if file not there
 # and don't find the archive. Particularly useful in Binder sessions to make sure
 # needed files are around and ready to run cells. HALTS NOTEBOOK CELL PROCESSING IF NOT.

 # first check `an_archive_example.tar.gz` uploaded if it wasn't already extracted
 import os
 unpacked_example = os.path.join("directory_containing_file_when_unpacked","your_alignment_file.clustal")
 file_needed = "an_archive_example.tar.gz"

 import sys
 if os.path.isfile(unpacked_example):
    sys.stderr.write("\nAppears '{}' has already been unpacked.\n".format(file_needed))
 elif os.path.isfile(file_needed):
    !tar xzf {file_needed}
 else:
    sys.stderr.write("\n\n*****************ERROR**************************\n"
        "The file '{0}' is needed.\n"
        "Upload '{0}' to this Jupyter session and re-run this cell.\n"
        "*****************ERROR**************************\n".format(file_needed))
    sys.exit(1)

 # Ask for an archive and unpack and extract enclosed dataframe
 file_required = "collected_candidate_21S-containing_seqs.tar.gz"
 dataframe_to_read = "extracted_21S-containing_seq_info_df.pkl"
 import os
 import sys
 import pandas as pd
 if os.path.isfile(file_required):
    !tar -xzf {file_required}
    mitolsu_frag_df = pd.read_pickle(dataframe_to_read)
    sys.stderr.write("\nFile with sequences ('{}') observed and"
        " unpacked.".format(file_required))
    sys.stderr.write("\nDataframe '{}' read in"
        ".".format(dataframe_to_read))
 else:
    sys.stderr.write("\nFile with sequences '{}' not seen and so nothing done"
        ". Seems wrong.".format(file_required))
    sys.exit(1)
    
    
 # for when that archive to check for contains a dataframe to bring into memory in the notebook:
 import os
 file_needed = "an_archive_example.tar.gz"
 unpacked_goal = "info_df.pkl"

 import sys
 import pandas as pd
 if os.path.isfile(unpacked_goal):
    sys.stderr.write("\nAppears '{}' has already been unpacked.\n".format(file_needed))
    #bring the details in
    try:
      len(previous_details_df) > 2
    except NameError as e:
        previous_details_df = pd.read_pickle(unpacked_goal)
        sys.stderr.write("\nData in '{}' read in.\n".format(unpacked_goal))
 elif os.path.isfile(file_needed):
    !tar xzf {file_needed}
    previous_details_df = pd.read_pickle(unpacked_goal)
    sys.stderr.write("\nData in '{}' read in.\n".format(unpacked_goal))
 else:
    sys.stderr.write("\n\n*****************ERROR**************************\n"
        "The file '{0}' is needed.\n"
        "Upload '{0}' to this Jupyter session and re-run this cell.\n"
        "*****************ERROR**************************\n".format(file_needed))
    sys.exit(1)


 # for when that archive to check for contains several dataframes and a list to bring into memory in the notebook (SEE JUST BELOW FOR MORE GENERAL / ONLY DATAFRAMES):
 import os
 file_needed = "Counts_promoter_motifs_among1011_21S_candidates_where_no_mito_prev_identified.tar.gz"
 unpacked_goal = "disruptor_hit_num_tallies_by_id_df.pkl"

 df_n_fnstr_dict = {
    "largest_disr_num_by_id_df": "largest_disr_num_by_id_df",
    "mito_promoter_matches_df": "df",
    "mito_promoter_hit_num_tallies_by_id_df": "largest_hit_num_by_id_df",
    "disruptor_matches_df": "disrupt_df",
    "disruptor_hit_num_tallies_by_id_df": "largest_disr_num_by_id_df",
    "grich_matches_df": "grich_df",
    "grich_hit_num_tallies_by_id_df": "largest_grich_num_by_id_df",
    "endgrich_matches_df": "end_grich_df",
    "endgrich_hit_num_tallies_by_id_df": "largest_endgrich_num_by_id_df",
    "twenty_nineATrich_seq_matches_df": "twenty_nine_df",
    "twenty_nineATrich_seq_hit_num_tallies_by_id_df": "largest_ATrich_num_by_id_df",
 }
 def read_in_data(df_n_fnstr_dict):
    #df_fns = ["{}.pkl".format(x) for x in df_n_fnstr_dict.keys()]
    df_n_fnstr_dict = {"{}.pkl".format(k):v for k,v in df_n_fnstr_dict.items()}
    g = globals() #based on `how to use a string to make a python variable.md`
    for k,v in df_n_fnstr_dict.items():
        g[v] = pd.read_pickle(k)
        sys.stderr.write("\nData in '{}' read in; produced `{}`.".format(k,v))
    import json
    with open('genomes_list.json', 'r') as f:
        g["genomes"] = json.load(f)
    sys.stderr.write("\nGenomes list read back in as `genomes`.")

 import sys
 import pandas as pd
 if os.path.isfile(unpacked_goal):
    sys.stderr.write("\nAppears '{}' has already been unpacked.\n".format(file_needed))
    #bring the data into memory, if it isn't already
    try:
      len(globals()[list(df_n_fnstr_dict.items())[0][1]]) > 2
    except (NameError,KeyError) as e:
        read_in_data(df_n_fnstr_dict)
 elif os.path.isfile(file_needed):
    !tar xzf {file_needed}
    read_in_data(df_n_fnstr_dict)
 else:
    sys.stderr.write("\n\n*****************ERROR**************************\n"
        "The file '{0}' is needed.\n"
        "Upload '{0}' to this Jupyter session and re-run this cell.\n"
        "*****************ERROR**************************\n".format(file_needed))
    sys.exit(1)


 ## MORE GENERAL VERSION OF THAT LAST ONE THAT DOESN'T INCLUDE ANY LIST TO READ IN
 import os
 file_needed = "Counts_promoter_motifs_among1011_21S_candidates_where_no_mito_prev_identified.tar.gz"
 unpacked_goal = "disruptor_hit_num_tallies_by_id_df.pkl"

 df_n_fnstr_dict = {
    "largest_disr_num_by_id_df": "largest_disr_num_by_id_df",
    "mito_promoter_matches_df": "df",
    "mito_promoter_hit_num_tallies_by_id_df": "largest_hit_num_by_id_df",
    "disruptor_matches_df": "disrupt_df",
    "disruptor_hit_num_tallies_by_id_df": "largest_disr_num_by_id_df",
    "grich_matches_df": "grich_df",
    "grich_hit_num_tallies_by_id_df": "largest_grich_num_by_id_df",
    "endgrich_matches_df": "end_grich_df",
    "endgrich_hit_num_tallies_by_id_df": "largest_endgrich_num_by_id_df",
    "twenty_nineATrich_seq_matches_df": "twenty_nine_df",
    "twenty_nineATrich_seq_hit_num_tallies_by_id_df": "largest_ATrich_num_by_id_df",
 }
 def read_in_pickles(df_n_fnstr_dict):
    #df_fns = ["{}.pkl".format(x) for x in df_n_fnstr_dict.keys()]
    df_n_fnstr_dict = {"{}.pkl".format(k):v for k,v in df_n_fnstr_dict.items()}
    g = globals() #based on `how to use a string to make a python variable.md`
    for k,v in df_n_fnstr_dict.items():
        g[v] = pd.read_pickle(k)
        sys.stderr.write("\nData in '{}' read in; produced `{}`.".format(k,v))
        
 import sys
 import pandas as pd
 if os.path.isfile(unpacked_goal):
    sys.stderr.write("\nAppears '{}' has already been unpacked.\n".format(file_needed))
    #bring the data into memory, if it isn't already
    try:
      len(globals()[list(df_n_fnstr_dict.items())[0][1]]) > 2
    except (NameError,KeyError) as e:
        read_in_pickles(df_n_fnstr_dict)
 elif os.path.isfile(file_needed):
    !tar xzf {file_needed}
    read_in_pickles(df_n_fnstr_dict)
 else:
    sys.stderr.write("\n\n*****************ERROR**************************\n"
        "The file '{0}' is needed.\n"
        "Upload '{0}' to this Jupyter session and re-run this cell.\n"
        "*****************ERROR**************************\n".format(file_needed))
    sys.exit(1)

    
   

 # check single file uploaded
 file_required = "collected_seqs.tar.gz" # usually in another cell
 import os
 import sys
 try: 
    os.path.isfile(file_required)
 except NameError:
    file_required = "collected_seqs.tar.gz"
 if os.path.isfile(file_required):
    !tar -xzf collected_seqs.tar.gz
    !mv collected_seqs/* .
    !rm -rf collected__seqs
    sys.stderr.write("\nFile with sequences ('{}') observed and"
        " unpacked.".format(file_required))
 else:
    sys.stderr.write("\nFile with sequences '{}' not seen and so nothing done"
        ". Seems wrong.".format(file_required))
    sys.exit(1)
    
 # Check single file uploaded with check on size
 file_required = ""0_332yeast_genomesFROMshenETal2018.zip" # usually in another cell
 size_expected = 2.902e+09 # in bytes # usually in another cell
 # Upload the file prior to running this cell
 import os
 import sys
 try: 
    os.path.isfile(file_required)
 except NameError:
    file_required = "0_332yeast_genomesFROMshenETal2018.zip"
 if os.path.isfile(file_required):
    # make sure it is large as it should be since it takes so long to upload
    f_size = os.path.getsize(file_required) # based on https://stackoverflow.com/a/2104083/8508004
    if f_size >= size_expected: 
        !mkdir genomes
        !unzip -q 0_332yeast_genomesFROMshenETal2018.zip
        !unzip -q 0_332yeast_genomes/332_genome_assemblies.zip
        !mv *.fas genomes/.
        sys.stderr.write("\nGenomes archive ('{}') observed and"
            " unpacked.".format(file_required))
    else:
        sys.stderr.write("\nGenomes archive ('{}') observed but is not"
            " fully uploaded\nWait and run this cell again.".format(file_required))
 else:
    sys.stderr.write("\nGenomes archive '{}' not seen and so nothing done"
        ". Seems wrong.".format(file_required))
    sys.exit(1)
    
 #someone else's take on some of these concepts is in post at https://twitter.com/radekosmulski/status/1129116929589940232
 

 # check multiple files uploaded
 import os
 import sys
 import pandas as pd
 try:
    type(files_required)
 except NameError:
    print("Setting `files_required`")
    files_required = ["PB_n_1011_collection_df.pkl","other_all_stretchesN_df.pkl"]
 for file_required in files_required:
    if os.path.isfile(file_required):
        if file_required == files_required[0]:
            all_df = pd.read_pickle(file_required)
        else:
            other_df = pd.read_pickle(file_required)
        sys.stderr.write("\nFile '{}' observed and"
            " unpickled.".format(file_required))
    else:
        sys.stderr.write("\nFile'{}' not seen and so nothing done"
            ".\nSeems wrong!??!\n\n".format(file_required))
        sys.exit(1)


 #Check if a large remote archive already unpacked and retrieved. If not,
 # take care of whatever is left to do to use result. (For example, if the directory was
 # set up via Cyverse to already have the archive, no need to retrieve it now
 # but want to unpack it.
 import os
 import sys
 archive_fn = "1011Assemblies.tar.gz"
 archive_url = "http://1002genomes.u-strasbg.fr/files/1011Assemblies.tar.gz"
 num_files_in_archive = 1011
 genomes_dir = 'GENOMES_ASSEMBLED'
 expected_unpacked_fn = genomes_dir+"/"+"YBV.re.fa"

 def unpack_and_delete_lrg_archive(archive_fn):
    !tar -xzf {archive_fn}
    if len(os.listdir(genomes_dir)) >= num_files_in_archive:
        !rm {archive_fn}
    sys.stderr.write("\nFile with genomes ('{}') observed and"
        " unpacked.".format(archive_fn))

 if os.path.isfile(expected_unpacked_fn):
    sys.stderr.write("\n**Nothing Done. Genomes from '{}' already obtained &"
        " unpacked.**".format(archive_fn))
 else:
    if os.path.isfile(
        archive_fn) and not os.path.isfile(expected_unpacked_fn):
        unpack_and_delete_lrg_archive(archive_fn)
    if not os.path.isfile(
        archive_fn) and not os.path.isfile(expected_unpacked_fn):
        sys.stderr.write("\nGenome sequences not seen, and so obtaining"
            " '{}'".format(archive_fn))
        #!curl -O {archive_url}
        os.system(f"curl -O {archive_url}")
        unpack_and_delete_lrg_archive(archive_fn)
    else:
        sys.stderr.write("\nSomething seems wrong.")
        sys.exit(1)

 # Pickle files function
 def pickle_dict(d,file_name):
    with open(file_name, "wb") as f:
        pickle.dump(d, f)
 pickle_dict(di_dict, "di_dict.pkl")

 #Unpickle the files with a function if pickled version present
 import pickle 
 def unpickle_dict(file_name):
    with open(file_name, "rb") as f:
        return pickle.load(f)
 if os.path.exists(main_pickled_dict_file_to_check_for):
    di_dict = unpickle_dict(main_pickled_dict_file_to_check_for)
    print(f"Loaded data from {main_pickled_dict_file_to_check_for}.")


 # Manage files with `fnmatch` (see just above about whether file uploaded, too)

 # Basic fnmatch use
 import fnmatch
 for file in os.listdir(genomes_dir):
    if fnmatch.fnmatch(file, '*.re.fa'):
        !perl patmatch_1.2/unjustify_fasta.pl {genomes_dir}/{file}
        #os.remove(os.path.join(genomes_dir, file)) #left over from development
        output = !perl patmatch_1.2/patmatch.pl -c {promoter_pattern} {genomes_dir}/{file}.prepared
        os.remove(os.path.join(genomes_dir, file+".prepared")) #delete file made for PatMatch
        df = patmatch_results_to_df(output.n, pattern=promoter_pattern, name="promoter")

 # more fnmatch basic use
 tag_to_add ="1G03"
 import os
 import sys
 import fnmatch
 model_pattern = "model_*.pdb"
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, model_pattern):
        os.rename(file, tag_to_add + file) 

 # use fnmatch and Pathlib to handle extension, ie`suffix` for PAthlib (or basename, i.e, `stem` for Pathlib)
 import pandas as pd
 # allow pickle or csv as way to supply "all_identified_reads_list" dataframe and
 # handle accordingly
 import os
 import fnmatch
 from pathlib import Path
 file_prefix_for_all_long_reads_data = "all_identified_reads_list"
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, file_prefix_for_all_long_reads_data+'.*'):
        fp = Path(file)
        print(fp)
        print(fp.suffix)
        if fp.suffix == '.pkl':
            all_df = pd.read_pickle(f"{file_prefix_for_all_long_reads_data}.pkl")
        else:
            all_df = pd.read_csv(f"{file_prefix_for_all_long_reads_data}.csv")


 # fnmatch use combined with checking id related file yet exists
 # categorize those annotated already and those missed
 import os
 import sys
 import fnmatch
 extension_to_check = ".fa"
 extension_to_see_if_exists = ".new"
 num_checked = 0
 not_annotated = []
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*'+extension_to_check):
        num_checked += 1
        #print (file)
        first_part_filen = file.rsplit(extension_to_check,1)[0]
        # check if corresponding `.new` file exists
        annotated_file = file+extension_to_see_if_exists
        #print(annotated_file)
        if os.path.isfile(annotated_file):
            pass
        else:
            not_annotated.append(file)
            print ("No {} file?".format(annotated_file)) 
 # Feedback
 sys.stderr.write("{} sequences files checked; {} lack corresponding, "
    "\nannotated `.new` files.".format(num_checked,len(not_annotated)))     
 sys.stderr.write("\nThe variable `not_annotated` lists the sequences missing annotated files.")


 #fnmatch to make a list of files and then do something with related files (see below how to use `glob.glob()` if just needed a list and 
 # weren't doing something with the names of files as encountered) (I added an example considering both when do need to iterate on many
 # files and rename at https://discourse.jupyter.org/t/rename-files-using-a-for/17144/2?u=fomightez )(Example with using glob or fnmatch to get base file names is at https://www.biostars.org/p/9539595/#9548023 ; keep in mind in relation to basename is .stem from Path, see https://stackoverflow.com/a/47496703/8508004)
 import os
 import sys
 import fnmatch
 extension_to_handle = ".gff3"
 name_part_to_match = "mito.gff3"
 associated_mito_noms= []
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*'+name_part_to_match):
        #print (file)
        first_part_filen = file.rsplit(extension_to_handle,1)[0]
        associated_mito_noms.append(first_part_filen)
 # Now delete any files that end in `mito.fa` that are not in the list of the annotation files
 extension_to_handle = ".fa"
 name_part_to_match = "_mito.fa"
 removed = 0
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*'+name_part_to_match):
        first_part_filen = file.rsplit(extension_to_handle,1)[0]
        if (first_part_filen) not in associated_mito_noms:
            os.remove(file)
            removed += 1
 sys.stderr.write("\n{} files ending in `{}` removed"
    ".".format(removed,name_part_to_match))
    
 # use fnmatch and glob in a notebook to iterate on all `.py` Python script files in a directory and run them. Even subsequent ones made by the 
 # scripts 'dynamically' in the course of running (was to answer a StackOverflow question, see https://stackoverflow.com/a/75087369/8508004
 import os
 import fnmatch
 import glob
 executed_scripts = []
 extension_to_match = ".py"
 def execute_script(s):
    %run {s}
 while set(executed_scripts) != set(glob.glob(f"*{extension_to_match}")):
    for file in os.listdir('.'):
        if fnmatch.fnmatch(file, '*'+extension_to_match):
            if file not in executed_scripts:
                execute_script(file)
                executed_scripts.append(file)

 # glob use to just get list of file pathnames like part of what is done just above:
 import glob
 name_part_to_match = "mito.gff3"
 associated_files = glob.glob(f"*.{name_part_to_match}")

 # recursive search for CSV files in Current directory OR sub directories /subdirectories
 csv_files = glob.glob("**/*.csv", recursive=True)
 # I didn't find the explanation of `**` in the Python documentation (https://docs.python.org/3/library/glob.html) very clear until I read 
 # https://www.geeksforgeeks.org/how-to-use-glob-function-to-find-files-recursively-in-python/ , 
 # but for files with matches to an extension in current directory or sub directories, the example 
 # code in the documentation is more concise and results in easier to read and use paths.


 # fnmatch use Example with checking for a file it will produce and then running if not there. It will unpack
 # an multi-entry FASTA file into a single file for each entry and rename them all to have `.mito.fa` at end,
 # leaving any original FASTA file there at the start ending in ".fa" alone.
 import os
 import sys
 import fnmatch
 example_produced_file = "NCYC3594.mito.fa"
 if not os.path.isfile(example_produced_file): #so won't run again if already ran
    name_part_to_match = ".fa"
    name_part_to_expand_to = ".mito.fa"
    old_files_with_ext = []
    for file in os.listdir('.'):
        if fnmatch.fnmatch(file, '*'+name_part_to_match):
            old_files_with_ext.append(file)
    files_to_not_touch_despite_match = old_files_with_ext     
    seq_file = "SGDs288CplusPacBio_ADJUSTEDplusWoltersnW303forALIGNERS.fa"
    !faidx --split-files {seq_file}
    new_fasta = []
    for file in os.listdir('.'):
        if fnmatch.fnmatch(file, '*'+name_part_to_match) and file not in files_to_not_touch_despite_match:
            new_fasta.append(file)
    #fix name if it needs fixing
    for file in new_fasta:
        if not fnmatch.fnmatch(file, '*'+name_part_to_expand_to):
            new_file_name = file.split(".fa")[0] + name_part_to_expand_to
            !mv {file} {new_file_name}

 # fnmatch use Example with reading and modifying the current matched file
 import fnmatch
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*mt.fsa'):
        print (file)
        # !blastn -query {file} -db chrmt.fsa -outfmt "6 qseqid sseqid stitle pident qcovs length mismatch gapopen qstart qend sstart send qframe sframe frames evalue bitscore qseq sseq" -out {file[:-9]}x.chrmt.comp.txt
        blast_result = !blastn -query {file} -db S288c.mt.genome.fa -outfmt "6 qseqid sseqid stitle pident qcovs length mismatch gapopen qstart qend sstart send qframe sframe frames evalue bitscore qseq sseq"
        blast_df = blast_to_df(blast_result.n, pickle_df=False)
        #...
        print(start_loc)
        with open(file) as handle:
            mito_seq = SeqIO.read(handle, "fasta")

        # fix, based on where it says "i.e. shift the starting point on this plasmid," @ 
        #http://biopython.org/DIST/docs/api/Bio.SeqRecord.SeqRecord-class.html
        left = mito_seq[:start_loc] # use one less than what matches '1' in 
        # those cases because of zero indexing in Python; gets handled by that 
        # substraction above where `start_loc` defined
        right = mito_seq[start_loc:]
        adj_mito_seq = right + left

        # write result after fix
        sys.stderr.write("\n\nFile with adjusted 'start' saved as "
            "'{}'.".format(generate_new_name(file),))
        SeqIO.write(
            adj_mito_seq, generate_new_name(file), "fasta");

 # Use fnmatch to skip if extension has more beyond what is searched and exclude a specific file
 fn_to_check = "pep.fa" 
 sequences = "" 
 import os
 import fnmatch
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*'+fn_to_check):
        if not file.endswith(".fai") and file != "DBVPG6044.mt.pep.fa":
            sequences += get_seq_from_multiFASTA_with_match_in_description(
                file,gene_to_match, return_record_as_string=True)


 # Use fnmatch to change the names of files with specific extensions (in a subdirectory) to different extensions
 #Change name of genome files from ending in `.fas` to ending in `.genome.fa` ; rename files, renaming files
 genomes_dir = "genomes"
 old_extension = ".fas" 
 new_extension = ".genome.fa" 
 import os
 import fnmatch
 for file in os.listdir(genomes_dirn):
    if fnmatch.fnmatch(file, '*'+ old_extension):
        !mv {genomes_dirn}/{file} {genomes_dirn}/{file.split(old_extension)[0]+new_extension}


 # Package up a lot of various data sources and output streams (if trying to pack up
 # just files, keep in mind using nbzip module or if want whole subdirectory hierarchy recursively, use https://stackoverflow.com/a/48141707/8508004 ; uncompress the example with `tar xf archive.tar`)
 python_data_to_grab = {
    'FILE_NAME_TO_GENERATE_A.md':(name_of_listA,"DESCRIPTION_TAG_to_put_on_top_line_of_file:"),
    'FILE_NAME_TO_GENERATE_B.md': (name_of_listB,"TAG_to_put_on_top_line_of_file:"),
    'FILE_NAME_TO_GENERATE_C.md':(out_stream.stderr,"TAG_to_put_on_top_line_of_file:"),
                    }
 # PYTHON 2.7 VERSION
 import sys
 import contextlib
 data_tag = "some_descriptive_string_here_about_info"
 # fix for python 2 based on https://stackoverflow.com/a/44226422/8508004
 @contextlib.contextmanager
 def redirect_stdout(target):
    original = sys.stdout
    sys.stdout = target
    yield
    sys.stdout = original
 for file_name_to_use,py_obj_info in python_data_to_grab.items():
    py_obj,data_name = py_obj_info
    with open(file_name_to_use, 'w') as f:
        with redirect_stdout(f):
            print(data_name + " =")
            print(str(py_obj))
 # package up the files
 !mkdir pertinent_data_for{set_designation}
 for each_file in python_data_to_grab.keys():
    !mv {each_file} pertinent_data/.
 !tar czf pertinent_data_for{set_designation}.tar.gz pertinent_data/
 sys.stderr.write("Useful information for the set saved as "
                 "`pertinent_data_for_{}.tar.gz`".format(data_tag))
 # IT'D BE BETTER TO incorporate `%store` in above, I think. see https://stackoverflow.com/a/32731418/8508004 & above here


 # identify several files via file names and fnmatch and package up without placing in a directory first
 # (see under 'Collecting all the results' in `Annotating mito sequences extracted from XXXX collection with MFannot and converting annotation file to gff3.ipynb` if need example with putting into directory first just using bash shell commands or 
 # search `!mv {each_file} pertinent_data/.` here for something similar)
 archive_file_name = "annotations_for_four_putative_mitos_from332.tar.gz"
 import os
 import sys
 import fnmatch
 dl_files = []
 name_part_to_match = ".new"
 for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*'+name_part_to_match):
        #print (file)
        #first_part_filen = file.rsplit(extension_to_handle,1)[0]
        dl_files.append(file)
 !tar czf {archive_file_name} {" ".join(dl_files)}
 sys.stderr.write("***************************DONE***********************************\n"
    "'{}' generated. Download it.\n"
    "***************************DONE***********************************".format(archive_file_name))

 #Note that when I added in the use of the `--transform` flag into making a tar, it disrupted use of passing Python into shell commands and so even after consultig Claude and trying somethings
 #I found easier to just hardcode in even if blatently redundant and breaking DRY, like so:
 archive_file_name = "results_for_merged_set.tar.gz"
 !tar czf {archive_file_name}  --transform 's/^\./results_for_merged_set/' {" ".join(list_of_files)}



 #package up several files for download  (if trying to pack up just files, keep in mind using nbzip module as alternative if want whole subdirectory hierarchy recursively, use https://stackoverflow.com/a/48141707/8508004 ; uncompress the example with `tar xf archive.tar`)
 # make one file for downloading
 archive_file_name = "collected_files.tar.gz"
 dl_files = [x + "_tag.fa" for f in file_list]
 !tar czf {archive_file_name} {" ".join(dl_files)}
 sys.stderr.write("*****************DONE***********************************\n"
    "'{}' generated. Download it.\n"
    "*****************DONE***********************************".format(archive_file_name))



 #package up several files and files made from captured output stream for download, where  (keep in mind using nbzip module as alternative if want whole subdirectory hierarchy recursively, use https://stackoverflow.com/a/48141707/8508004 ; uncompress the example with `tar xf archive.tar`)
 # make one file for downloading
 archive_file_name = "collected_files.tar.gz"
 dl_files = [x + "_tag.fa" for f in file_list]
 # save & add the additional information files to collect
 output_txt_filename_a = 'seqs_filtered_info.txt'
 output_txt_filename_b = 'seqs_filtered.txt'
 %store captured_stream_a.stderr >{output_txt_filename_a}  #based on https://stackoverflow.com/a/32731418/8508004
 %store captured_stream_b.stdout >{output_txt_filename_b}  #based on https://stackoverflow.com/a/32731418/8508004
 dl_files += [output_txt_filename_a, output_txt_filename_b] # or if really only one, `dl_files.append(filtered_out)`
 !tar czf {archive_file_name} {" ".join(dl_files)}
 sys.stderr.write("*****************DONE***********************************\n"
    "{} generated. Download it.\n"
    "*****************DONE***********************************".format(archive_file_name))

 # note based on https://stackoverflow.com/a/32731418/8508004, used `%store` to replace above
 with open(output_txt_filename_a, 'w') as output_handler:
   output_handler.write(captured_stream_a.stderr)
 with open(output_txt_filename_b, 'w') as output_handler:
   output_handler.write(captured_stream_b.stdout)

 # Package up several dataframes and sequences
 #Archive the CTD sequences (FASTA format) collected and any dataframes made
 # Pickle each dataframe and also save as `tsv` for possible use elsewhere
 strd_dataframes_fn_list = []

 def pickle_df_and_store_as_table(dataframe, prefix):
    '''
    Take a dataframe and a filename prefix and save a pickled form of that 
    dataframe and a text tablular data version (tab-sepearated values).

    Returns the name of the pickled and text file.
    '''
    dataframe.to_pickle(prefix + ".pkl")
    dataframe.to_csv(prefix + ".tsv", sep='\t',index = False)
    return prefix + ".pkl", prefix + ".tsv"
 # To automate the dataframe handling, make a dictionary for each dataframe name string as key and filename prefix
 # associated as the value
 df_n_fn_dict = {
    "CTD_seq_of_protein_orthologs": CTD_seq_df,
    "first_heptad_of_protein_orthologs": first_7_df,
    "heptads_ofCTD_seq_of_protein_orthologs": repeat_df,
    "main_heptads_ofCTD_seq_of_protein_orthologs": repeat_wo_first_df,
    "fraction_matching_consensus_per_CTD": fraction_consensus_df,
 }
 import pandas as pd
 for prefix, dataframe in df_n_fn_dict.items():
    #pkl_fn, text_table_fn = pickle_df_and_store_as_table(dataframe, prefix)
    strd_dataframes_fn_list.extend(pickle_df_and_store_as_table(dataframe, prefix))

 # store `CTD_seqs_fn_list` as json since lighter-weight and more portable than pickling
 CTD_seqs_fn_list_storedfn = "CTD_seqs_fn_list.json"
 import json
 with open(CTD_seqs_fn_list_storedfn, 'w') as f:
    json.dump(CTD_seqs_fn_list, f)
 # see my useful python snippets for reading json back in

 #for ease in aligning or other uses later save the all the CTDs as a concatenated file
 cat_fasta_fn = "CTD_seq_of_protein_orthologs.fa"
 # !cat {" ".join(CTD_seqs_fn_list)} > {cat_fasta_fn} # faster but not as good as awk if files don't already have newlines at end;
 # just results in the lines of the files as one long run on that won't work for aligning
 !awk 1 {" ".join(ortholog_prot_seqs)} > {cat_fasta_fn} #based on https://stackoverflow.com/a/25030513/8508004


 archiving_fn_list = CTD_seqs_fn_list + strd_dataframes_fn_list + [CTD_seqs_fn_list_storedfn , cat_fasta_fn]
 archive_file_name = gene_name+"_orthologs_extracted_CTDs.tar.gz"
 !tar czf {archive_file_name} {" ".join(archiving_fn_list)} # use the list for archiving command
 sys.stderr.write("\nCollected CTD sequences"
                 " and tables of details gathered and saved as "
                 "`{}`.".format(archive_file_name))





 # Package up several dataframes and a list of genomes  (see just below for only with dataframes)
 # Pickle each dataframe and also save as `tsv` for possible use elsewhere
 strd_dataframes_fn_list = []

 # store `genomes` as json since lighter-weight and more portable than pickling
 # for easy json dumping for many list use when archiving:
 file_names_for_lists_dict = {
    "genomes_list.json":genomes,
 }
 import json
 for fn, lizt in file_names_for_lists_dict.items():
    with open(fn, 'w') as f:
        json.dump(lizt, f)

 def pickle_df_and_store_as_table(dataframe, prefix):
    '''
    Take a dataframe and a filename prefix and save a pickled form of that 
    dataframe and a text tablular data version (tab-sepearated values).

    Returns the name of the pickled and text file.
    '''
    dataframe.to_pickle(prefix + ".pkl")
    dataframe.to_csv(prefix + ".tsv", sep='\t',index = False)
    return prefix + ".pkl", prefix + ".tsv"
 # To automate the dataframe handling, make a dictionary for each dataframe name string as key and filename prefix
 # associated as the value
 df_n_fn_dict = {
    "mito_promoter_matches_df": df,
    "mito_promoter_hit_num_tallies_by_id_df": largest_hit_num_by_id_df,
    "disruptor_matches_df": disrupt_df,
    "disruptor_hit_num_tallies_by_id_df": largest_disr_num_by_id_df,
    "grich_matches_df": grich_df,
    "grich_hit_num_tallies_by_id_df": largest_grich_num_by_id_df,
    "endgrich_matches_df": end_grich_df,
    "endgrich_hit_num_tallies_by_id_df": largest_endgrich_num_by_id_df,
    "twenty_nineATrich_seq_matches_df": twenty_nine_df,
    "twenty_nineATrich_seq_hit_num_tallies_by_id_df": largest_ATrich_num_by_id_df,
 }
 import pandas as pd
 for prefix, dataframe in df_n_fn_dict.items():
    #pkl_fn, text_table_fn = pickle_df_and_store_as_table(dataframe, prefix)
    strd_dataframes_fn_list.extend(pickle_df_and_store_as_table(dataframe, prefix))


 archiving_fn_list = strd_dataframes_fn_list + list(file_names_for_lists_dict.keys())
 archive_file_name = "Counts_promoter_motifs_among1011_21S_candidates_where_no_mito_prev_identified.tar.gz"
 !tar czf {archive_file_name} {" ".join(archiving_fn_list)} # use the list for archiving command
 sys.stderr.write("\nCollected dataframes"
                 " and tables of details gathered and saved as "
                 "`{}`.".format(archive_file_name))



 # for archiving just several dataframes with automated handling
 archive_file_name = "dataframes_archived.tar.gz"
 strd_dataframes_fn_list = []


 def pickle_df_and_store_as_table(dataframe, prefix):
    '''
    Take a dataframe and a filename prefix and save a pickled form of that 
    dataframe and a text tablular data version (tab-sepearated values).

    Returns the name of the pickled and text file.
    '''
    dataframe.to_pickle(prefix + ".pkl")
    dataframe.to_csv(prefix + ".tsv", sep='\t',index = False)
    return prefix + ".pkl", prefix + ".tsv"
 # To automate the dataframe handling, make a dictionary for each dataframe name string as key and filename prefix
 # associated as the value
 df_n_fn_dict = {
    "df": df,
    "another_df": another_df,
    "yet_another_df": yet_another_df,
 }
 import pandas as pd
 for prefix, dataframe in df_n_fn_dict.items():
    #pkl_fn, text_table_fn = pickle_df_and_store_as_table(dataframe, prefix)
    strd_dataframes_fn_list.extend(pickle_df_and_store_as_table(dataframe, prefix))

 archiving_fn_list = strd_dataframes_fn_list
 !tar czf {archive_file_name} {" ".join(archiving_fn_list)} # use the list for archiving command
 sys.stderr.write("\nCollected dataframes"
                 " gathered and saved as "
                 "`{}`.".format(archive_file_name))




 # for easy json dumping for many list use when archiving:
 file_names_for_lists_dict = {
    "annotation_fns.json":annot_fns,
    "genome_fnss.json":genomes_for_anot_fns,
    "fn_pairings.json":file_pairs,
 }
 import json
 for fn, lizt in file_names_for_lists_dict.items():
    with open(fn, 'w') as f:
        json.dump(lizt, f)





 # Use curl to get an FASTA file from OCA and remove html tags (may need `!pip install BS4` first)
 # Get FASTA file for the non yeast one
 import os
 #!curl -o 1x0t_A.fa http://oca.weizmann.ac.il/oca-bin/send-seq?1x0t_A
 os.system("curl -o 1x0t_A.fa http://oca.weizmann.ac.il/oca-bin/send-seq?1x0t_A")
 # remove HTML to leave actual FASTA
 # based on https://stackoverflow.com/a/21577649/8508004 and https://unix.stackexchange.com/a/64747
 import sys
 from bs4 import BeautifulSoup
 oca_file_to_fix = "1x0t_A.fa"
 soup = BeautifulSoup(open(oca_file_to_fix), "html.parser")
 for pre in soup.findAll("pre"):
    fasta =  pre.contents
 %store fasta[0] >{oca_file_to_fix}



 # NOTE ABOUT THE READING PART OF THIS NEXT BLOCK: seems more modern Pythonic way
 # is to leave out the `,'r'` part. See https://stackabuse.com/read-a-file-line-by-line-in-python/ under
 # 'Read a File Line-by-Line with a for Loop - Most Pythonic Approach'. Note also that
 # best to use `.strip()` or possibly slice `[:-1]` to remove line ending if going to 
 # rearrange because can get weird merge if alter order because usually last line will not have a new
 # line character.

 # add identifiers to each `chr` so results for each strain clear later
 chromosome_id_prefix = "chr"
 def add_strain_id_to_description_line(file,strain_id):
    '''
    Takes a file and edits every description line to add 
    strain_id after the caret.
    
    Saves the fixed file
    '''
    import sys
    output_file_name = "temp.txt"
    # prepare output file for saving so it will be open and ready
    with open(output_file_name, 'w') as output_file:

        # read in the input file
        with open(file, 'r') as input_handler:  # OR SEE NOTE ABOVE THIS CODE BLOCK HOW DON'T NEED `, 'r'` anymore.
            # prepare to give feeback later or allow skipping to certain start
            lines_processed = 0

            for line in input_handler:
                lines_processed += 1
                if line.startswith(">"):
                    rest_o_line = line.split(">")
                    new_line = ">"+strain_id + rest_o_line[1]
                else:
                    new_line = line
                
                # Send text to output
                output_file.write(new_line)

    
    # replace the original file with edited
    !mv temp.txt {file}
    # Feedback
    sys.stderr.write("\n{} chromosome identifiers tagged.".format(file))

 for s in yue_et_al_strains:
    add_strain_id_to_description_line(s+".genome.fa",s)

 # A find / replace similar to last example but pure Python (no-IPython magics or shell use)
 # (See `testing_repeat_number_by_looping_bendit_analysis.ipynb` for practical use of this to change a script on a loop to monitor effect on outcome)
 script_name = "donut_plot_with_subgroups_from_dataframe.py"
 def change_original_title(s):
    '''
    Change the plot title to the provided text.
    '''
    with open(script_name, 'r') as thefile:
        script=thefile.read()
    script = script.replace('BREAKDOWN', s)
    with open(script_name, 'w') as output_file:
        output_file.write(script)
 change_original_title("NEW TITLE GOES HERE") 
 # Note for making substituons, Python now allows you to use f-strings (formatted string literals ) substitute 
 # variables into strings by name, but also Python strings have 'Template strings' built in (Ex. `import string; t = string.Template('Hello, $name!'); print(t.substitute(name='World'))`

 # Collect list of image files in a directory
 # Run this in notebook that is in directory along with the folder containing 
 # images, i.e., is in the level above the actual images
 import os
 import sys
 try:
    from pathlib import Path
 except ImportError:
    from pathlib2 import Path


 img_folder = "Untitled Folder"

 img_file_extensions = [".png",".jpg",".jpeg"]
 list_imgs_in_directory = []
 for file in os.listdir(img_folder):
    #print (file)
    if Path(file).suffix in img_file_extensions:
        list_imgs_in_directory.append(file)
 len(list_imgs_in_directory)





 #Pathlib in Python 2 or 3 example:
 try:
    from pathlib import Path
 except ImportError:
    from pathlib2 import Path
 # list all files in a directory 
 [item for item in Path('.').glob('*')] # based on 
 # https://jefftriplett.com/2017/pathlib-is-wonderful/
 # list final file extension , see 'Path.suffix' at 
 #https://docs.python.org/3/library/pathlib.html
 [item.suffix for item in Path('.').glob('*')]
 # list the final suffixes if there is more than one - see 'Path.suffixes' at 
 #https://docs.python.org/3/library/pathlib.html






 # Collect list of image files in a directory and display them in a Jupyter 
 # notebook cell
 # Run this in notebook that is in directory along with the folder containing 
 # images, i.e., is in the level above the actual images
 import os
 import sys
 try:
    from pathlib import Path
 except ImportError:
    from pathlib2 import Path
 from IPython.display import Image
 from IPython.display import display

 img_folder = "Untitled Folder"

 img_file_extensions = [".png",".jpg",".jpeg"]
 list_imgs = []
 for file in os.listdir(img_folder):
    #print (file)
    if Path(file).suffix in img_file_extensions:
        list_imgs.append(Path(img_folder,file))
 imgl = [Image(filename=str(x)) for x in list_imgs] #had to cast the
 # path object to a string or else `display.py` was giving error 
 # `'PosixPath' object has no attribute 'split'`;seems `display.py` not able to 
 # handle path objects yet.
 display(*imgl)


 # Collect list of image files in a directory and display them in a Jupyter 
 # notebook cell WITH FILE NAMES SHOWN BELOW EACH
 # Run this in notebook that is in directory along with the folder containing 
 # images, i.e., is in the level above the actual images
 import os
 import sys
 try:
    from pathlib import Path
 except ImportError:
    from pathlib2 import Path
 from IPython.display import Image
 from IPython.display import display

 img_folder = "Untitled Folder"

 img_file_extensions = [".png",".jpg",".jpeg"]
 list_imgs = []
 for file in os.listdir(img_folder):
    #print (file)
    if Path(file).suffix in img_file_extensions:
        list_imgs.append(Path(img_folder,file))
 for i in list_imgs:
    display(Image(filename=str(i)))
    print("ABOVE: {}".format(i.name))




 #slide carousel-like example to show a subset of images that changes every five seconds (from `demo_palette.ipynb` in pymol-binder) with HTML labels for each image to make the text stand out:
 import IPython.display as ipd
 import time
 import os
 import sys
 import random
 def display_subset():
    img = {}
    for x in random.sample(range(shuffles_to_do), 3):
        img[x] = ipd.Image(filename="img_{}.png".format(x)) 
        ipd.display(img[x])
        ipd.display(ipd.HTML('ABOVE:&nbsp;<font size=5><b>img_{}.png</b></font>'.format(x)))
    time.sleep(5)
    ipd.clear_output(wait=True)
 while True:
    display_subset() 
 
 
    
 # Subset / restrict to a random sampling of items in a list , based on https://pynative.com/python-random-sample/
 # Good for doing right before EVEYRTHING GETS PROCESSED to pick a subset for testing, instead
 # of defining specifically
 import random
 genomes = random.sample(population=genomes, k=15)    



 # Run a function every 8 minutes
 %load https://gist.githubusercontent.com/fomightez/b012e51ebef6ec58c1515df3ee0c850a/raw/300da6c67ceeaf5384a3e500648b993345c361cb/run_every_eight_mins.py




 # RELOAD for when you are using `from python_file_containing_function import a_function` (Python 3)
 # Reload a function into a notebook after editing the script file in editor of running session;
 # this allows calling the function in the notebook whereas if just reload the script won't
 import importlib
 import python_file_containing_function; importlib.reload(python_file_containing_function); from python_file_containing_function import a_function
 # above line from https://stackoverflow.com/a/11724154/8508004

 # RELOAD for when you are using `import python_file` (Python 3)
 # Reload a script into a notebook after editing the script file in editor of running session;
 # note it is much more easily done then the case where using `from foo import foo`, but 
 # `from foo import foo` makes it easier to work in a notebook in many ways.
 import importlib;importlib.reload(import python_file)

 # Create a download link in Jupyter notebook; from 
 # https://medium.com/ibm-data-science-experience/how-to-upload-download-files-to-from-notebook-in-my-local-machine-6a4e65a15767
 # <-- Haven't tried it yet but it might be handy
 # for idea I am working on for making animations from pymol files using jmol or any where where I suggest
 # downloading an archive of results
 from IPython.display import HTML

 def create_download_link( df, title = "Download CSV file", filename = "data.csv"):  
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

 create_download_link(df)

 # For handling archive files to make a clickable download link, I found the section 'Create and download CSV/zip file' at https://blog.softhints.com/jupyter-ipython-download-files/ ;  however, the code seems incomplete as I don't see how they make the zip file in conjunction with the sending it through as payload. (I assume `create_download_files()` was triggered elsewhere already.) And minor thing too, why not returning `HTML(html)` in that code block?
 # Maybe Some of the answers here might help me reverse that Zipfile approach so it works to download to local?
 # https://stackoverflow.com/questions/5710867/downloading-and-unzipping-a-zip-file-without-writing-to-disk

 # Related to the topic of making downloadable links from Jupyter pages, I found https://stackoverflow.com/questions/26497912/trigger-file-download-within-ipython-notebook
 # and 
 # https://stackoverflow.com/questions/24437661/retrieving-files-from-remote-ipython-notebook-server/24439480#24439480 about 
 # FileLink / FileLinks; however, in JupyterLab if it is a gif or png that JupyterLab renders, it opens it in the application 
 # instead of allowing download. And if it is a tarball that it doesn't render and you click on it, instead of offering to download
 # it sats it isn't UTF-8 encoded.
 # Fortunately when in Voila apps, you can list the files with the following:
 from IPython.display import FileLink, FileLinks
    FileLinks(".")
 # And in VOILA those can be right clicked on and downloaded to local drive from those links using `Save link as..`.
 # Howeve, a better, relatedsolution for in Voila because it makes a pop-up automatically without needing user to use `Save as` is:
 %%html
 <a href="SVM_Confusion_Matrix.jpg" download="SVM_Confusion_Matrix.jpg">Click HERE to Download SVM image</a>
 # Using Panel (installable via pip) in a notebook (NOT VOILA) you can make a download file, too:
 import panel as pn
 pn.extension()
 # Create option to download SVM Confusion Matrix Graphic
 pn.widgets.FileDownload(
            file="SVM_Confusion_Matrix.jpg",
            embed=False, 
            name="Save SVM Confusion Matrix image"
 )