Created
April 15, 2025 08:21
-
-
Save dpoulopoulos/666e17fb42e98919d65d777f5848b84b to your computer and use it in GitHub Desktop.
Document-to-Podcase Snakemake adaptation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def download_document(document_url: str) -> bytes: | |
"""Download a document from the given URL. | |
Args: | |
document_url (str): The URL of the document to download. | |
""" | |
import requests | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
logger.info(f"Downloading document from {document_url}...") | |
response = requests.get(document_url) | |
response.raise_for_status() # Raise an error for bad responses | |
return response.content | |
rule download_document: | |
output: | |
output_path = "document.html" | |
params: | |
document_url = "https://lmsys.org/blog/2024-07-01-routellm/", | |
run: | |
content = download_document(params.document_url) | |
with open(output.output_path, 'wb') as f: | |
f.write(content) | |
rule convert_html_to_text: | |
input: | |
"document.html" | |
output: | |
"processed_document.txt" | |
shell: | |
""" | |
docker run --rm \ | |
-v $(pwd):/work \ | |
transformer-aarm64:v0.0.1 \ | |
--input /work/{input} \ | |
--output /work/{output} | |
""" | |
rule write_podcast_script: | |
input: | |
"processed_document.txt" | |
output: | |
"podcast_script.txt" | |
shell: | |
""" | |
docker run --rm \ | |
-v $(pwd):/work \ | |
text-to-script-aarm64:v0.0.1 \ | |
--input /work/{input} \ | |
--output /work/{output} | |
""" | |
rule generate_podcast: | |
input: | |
"podcast_script.txt" | |
output: | |
"podcast.wav" | |
shell: | |
""" | |
docker run --rm \ | |
-v $(pwd):/work \ | |
script_to_speech-aarm64:v0.0.1 \ | |
--input /work/{input} \ | |
--output /work/{output} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment