Last active
January 23, 2025 12:30
-
-
Save Kautenja/1044ea96d08330b0bdb53d5bbab87959 to your computer and use it in GitHub Desktop.
Functions for working with remote files using pandas and paramiko (SFTP/SSH).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Functions for working with remote files using pandas and paramiko (SFTP/SSH).""" | |
import pandas as pd | |
import paramiko | |
def read_csv_sftp(hostname: str, username: str, remotepath: str, *args, **kwargs) -> pd.DataFrame: | |
""" | |
Read a file from a remote host using SFTP over SSH. | |
Args: | |
hostname: the remote host to read the file from | |
username: the username to login to the remote host with | |
remotepath: the path of the remote file to read | |
*args: positional arguments to pass to pd.read_csv | |
**kwargs: keyword arguments to pass to pd.read_csv | |
Returns: | |
a pandas DataFrame with data loaded from the remote host | |
""" | |
# open an SSH connection | |
client = paramiko.SSHClient() | |
client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) | |
client.connect(hostname, username=username) | |
# read the file using SFTP | |
sftp = client.open_sftp() | |
remote_file = sftp.open(remotepath) | |
dataframe = pd.read_csv(remote_file, *args, **kwargs) | |
remote_file.close() | |
# close the connections | |
sftp.close() | |
client.close() | |
return dataframe | |
# explicitly define the outward facing API of this module | |
__all__ = [read_csv_sftp.__name__] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this function! FYI adding
remote_file.prefetch()
between lines 27 and 28 sped this up by 50x for me