- 
      
- 
        Save Jwely/ad8eb800bacef9e34dd775f9b3aad987 to your computer and use it in GitHub Desktop. 
| import ftplib | |
| import os | |
| import re | |
| """ | |
| MIT license: 2017 - Jwely | |
| Example usage: | |
| ``` python | |
| import ftplib | |
| ftp = ftplib.FTP(mysite, username, password) | |
| download_ftp_tree(ftp, remote_dir, local_dir) | |
| ``` | |
| The code above will look for a directory called "remote_dir" on the ftp host, and then duplicate the | |
| directory and its entire contents into the "local_dir". | |
| *** Note that if wget is an option, I recommend using that instead *** | |
| """ | |
| def _is_ftp_dir(ftp_handle, name, guess_by_extension=True): | |
| """ simply determines if an item listed on the ftp server is a valid directory or not """ | |
| # if the name has a "." in the fourth to last position, its probably a file extension | |
| # this is MUCH faster than trying to set every file to a working directory, and will work 99% of time. | |
| if guess_by_extension is True: | |
| if len(name) >= 4: | |
| if name[-4] == '.': | |
| return False | |
| original_cwd = ftp_handle.pwd() # remember the current working directory | |
| try: | |
| ftp_handle.cwd(name) # try to set directory to new name | |
| ftp_handle.cwd(original_cwd) # set it back to what it was | |
| return True | |
| except ftplib.error_perm as e: | |
| print(e) | |
| return False | |
| except Exception as e: | |
| print(e) | |
| return False | |
| def _make_parent_dir(fpath): | |
| """ ensures the parent directory of a filepath exists """ | |
| dirname = os.path.dirname(fpath) | |
| while not os.path.exists(dirname): | |
| try: | |
| os.makedirs(dirname) | |
| print("created {0}".format(dirname)) | |
| except OSError as e: | |
| print(e) | |
| _make_parent_dir(dirname) | |
| def _download_ftp_file(ftp_handle, name, dest, overwrite): | |
| """ downloads a single file from an ftp server """ | |
| _make_parent_dir(dest.lstrip("/")) | |
| if not os.path.exists(dest) or overwrite is True: | |
| try: | |
| with open(dest, 'wb') as f: | |
| ftp_handle.retrbinary("RETR {0}".format(name), f.write) | |
| print("downloaded: {0}".format(dest)) | |
| except FileNotFoundError: | |
| print("FAILED: {0}".format(dest)) | |
| else: | |
| print("already exists: {0}".format(dest)) | |
| def _file_name_match_patern(pattern, name): | |
| """ returns True if filename matches the pattern""" | |
| if pattern is None: | |
| return True | |
| else: | |
| return bool(re.match(pattern, name)) | |
| def _mirror_ftp_dir(ftp_handle, name, overwrite, guess_by_extension, pattern): | |
| """ replicates a directory on an ftp server recursively """ | |
| for item in ftp_handle.nlst(name): | |
| if _is_ftp_dir(ftp_handle, item, guess_by_extension): | |
| _mirror_ftp_dir(ftp_handle, item, overwrite, guess_by_extension, pattern) | |
| else: | |
| if _file_name_match_patern(pattern, name): | |
| _download_ftp_file(ftp_handle, item, item, overwrite) | |
| else: | |
| # quietly skip the file | |
| pass | |
| def download_ftp_tree(ftp_handle, path, destination, pattern=None, overwrite=False, guess_by_extension=True): | |
| """ | |
| Downloads an entire directory tree from an ftp server to the local destination | |
| :param ftp_handle: an authenticated ftplib.FTP instance | |
| :param path: the folder on the ftp server to download | |
| :param destination: the local directory to store the copied folder | |
| :param pattern: Python regex pattern, only files that match this pattern will be downloaded. | |
| :param overwrite: set to True to force re-download of all files, even if they appear to exist already | |
| :param guess_by_extension: It takes a while to explicitly check if every item is a directory or a file. | |
| if this flag is set to True, it will assume any file ending with a three character extension ".???" is | |
| a file and not a directory. Set to False if some folders may have a "." in their names -4th position. | |
| """ | |
| path = path.lstrip("/") | |
| original_directory = os.getcwd() # remember working directory before function is executed | |
| os.chdir(destination) # change working directory to ftp mirror directory | |
| _mirror_ftp_dir( | |
| ftp_handle, | |
| path, | |
| pattern=pattern, | |
| overwrite=overwrite, | |
| guess_by_extension=guess_by_extension) | |
| os.chdir(original_directory) # reset working directory to what it was before function exec | |
| if __name__ == "__main__": | |
| # Example usage mirroring all jpg files in an FTP directory tree. | |
| mysite = "some_ftp_site" | |
| username = "anonymous" | |
| password = None | |
| remote_dir = "" | |
| local_dir = "" | |
| pattern = ".*\.jpg$" | |
| ftp = ftplib.FTP(mysite, username, password) | |
| download_ftp_tree(ftp, remote_dir, local_dir, pattern=pattern, overwrite=False, guess_by_extension=True) | 
aah, i see. rsync looks quite interesting! I spent some time and done similar script for backing up my ftp server. It is based on this script, but uses ftputil library, so i could avoid file/folder guessing/checking and rely more on library to do things. I put it on github, so future people can save some time, if they want to do similar thing.
Hi @Jwely . Thanks for the code. Need some help. My local_dir = "D:/Projects/Temporary files" , mysite = 'ftp.dlptest.com' , username = 'dlpuser' , password = 'rNrKYTX9g7z3RgJRmxWuGHbeu' , remote_dir = '/' , and pattern = "..upt" or "..txt".
The program is getting executed without any errors but I am unable to find any of the files in the local directory.
Could you please check and let me know what is the issue or where am I going wrong.
@PhaniChandan, Have you tried adding ftp.set_debuglevel(1) in the main method ? Maybe there will be more information. And then if everything is still ok, try changing pattern to None.
@Jwely , Thanks for this code! Am stuck in a situation where I cannot use wget. I am using your solution, but seem to have an unhandled edge case that causes a silent fail - any file or folder with square brackets in it ( [  ] ) causes a recursion loop, as ftp_handle.nlist(name). contains only the targeted directory in the FTP server, and not the full list of nlist() it works fine for other cases.
I believe that bracket handling needs to be done, as I have tested this with multiple files and directories. However, changing the names of the files and directories on the FTP server is not an option.
As i could not edit my comment for some reason:
I found a workaround for my issue!
While this has not been proven to work with all edge cases (japanese characters still do not work so far!) I just create the nlst() output from mlsd(): and just calling it instead of ftp_handle.nlst()` in line 84. this now has no problem with directories with square brackets in them.
   def nlstSafe(self, directory):
       out = []
       for item in ftp_handle.mlsd(directory):
           out.append(os.path.join(directory, item[0]))
       return out
@Jwely, Thanks for this code!
I have a question about the _mirror_ftp_dir function. Shouldn't the second argument of the _file_name_match_patern function be "item" instead of "name"? Since "name" refers to the directory path, _file_name_match_patern(pattern, name) would be applying the regular expression to the directory path. I think "item" would be appropriate if you want to apply the regular expression to the file path.
:) ... I tried to look up to a bit different solution. I think I used rsync or rclone cli app to sync the ftp target. But also, I have got couple issues. But I think that world is beautiful in any case :)