Skip to content

Instantly share code, notes, and snippets.

@vaclavdekanovsky
Last active October 28, 2020 21:45
Show Gist options
  • Select an option

  • Save vaclavdekanovsky/8867d276227b50244050f58f874075a9 to your computer and use it in GitHub Desktop.

Select an option

Save vaclavdekanovsky/8867d276227b50244050f58f874075a9 to your computer and use it in GitHub Desktop.
Unzip a group of zip files in a folder
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.716281Z",
"start_time": "2020-10-28T21:45:03.706272Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import zipfile"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.731240Z",
"start_time": "2020-10-28T21:45:03.718241Z"
}
},
"outputs": [],
"source": [
"folder = \"to_process\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.763126Z",
"start_time": "2020-10-28T21:45:03.733200Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['a.xml', 'Test_001_20201027.zip', 'xyz_001_20201029.zip']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# os.listdir returns a list with all files and folders contained in a folder\n",
"os.listdir(folder)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.779079Z",
"start_time": "2020-10-28T21:45:03.767110Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['Test_001_20201027.zip', 'xyz_001_20201029.zip']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# to get only .zips let's use the list notation and endswith function\n",
"[f for f in os.listdir(folder) if f.endswith(\".zip\")]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.795061Z",
"start_time": "2020-10-28T21:45:03.781074Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['Test_001_20201027.zip', 'xyz_001_20201029.zip']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# alternativelly you can use os.path's splitext\n",
"[f for f in os.listdir(folder) if os.path.splitext(f)[1] == \".zip\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# break down the list comprehension"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.810998Z",
"start_time": "2020-10-28T21:45:03.797054Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['Test_001_20201027.zip', 'xyz_001_20201029.zip']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the list notations is a comprehensive way how to return a list out of a for loop\n",
"\n",
"# initiate an empty list\n",
"output = []\n",
"\n",
"# iterate over the files/subfolders in the folder\n",
"for f in os.listdir(folder):\n",
" \n",
" # if the file/folder ends with a string \".zip\"\n",
" if f.endswith(\".zip\"):\n",
" \n",
" # append it to the output\n",
" output.append(f)\n",
" \n",
"output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Unzip all zips in the folder"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.842908Z",
"start_time": "2020-10-28T21:45:03.820968Z"
}
},
"outputs": [],
"source": [
"# iterate over the identified zipfiles and unzip them\n",
"for zip_file in [f for f in os.listdir(folder) if f.endswith(\".zip\")]:\n",
" with zipfile.ZipFile(os.path.join(folder,zip_file), 'r') as zip_ref:\n",
" zip_ref.extractall(\"temp\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can wrap the unzipping into a function which would squeeze the final code into a one-liner"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.858896Z",
"start_time": "2020-10-28T21:45:03.845900Z"
}
},
"outputs": [],
"source": [
"def unzip(folder: str, file: str, folder_to_extract: str) -> list:\n",
" \"\"\"unzips a file in a folder into folder_to_extract\n",
" returns a list of files in the zip archive\"\"\"\n",
" with zipfile.ZipFile(os.path.join(folder,file), 'r') as zip_ref:\n",
" zip_ref.extractall(folder_to_extract)\n",
" return zip_ref.namelist()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2020-10-28T21:45:03.874821Z",
"start_time": "2020-10-28T21:45:03.860859Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[['a.xml'], ['a.xml']]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# applying a function to the output can be squeezed into the list comprehension\n",
"[unzip(folder, f, \"temp\") for f in os.listdir(folder) if f.endswith(\".zip\")]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment