Skip to content

Instantly share code, notes, and snippets.

@wassname
Last active July 21, 2022 13:50
Show Gist options
  • Save wassname/b25471b0f3bb2f9ff81f to your computer and use it in GitHub Desktop.
Save wassname/b25471b0f3bb2f9ff81f to your computer and use it in GitHub Desktop.
Convert .gitignore to .dockerignore: quick and dirty.
"""
Convert .gitignore to .dockerignore: quick and dirty.
This is a quick and dirty script to convert this:
`__pycache__/`
Into this:
```
__pycache__
*/__pycache__
*/*/__pycache__
*/*/*/__pycache__
```
Specifically [gitignores](https://git-scm.com/docs/gitignore) use a mixture of fname, and shell glob matching with lots of rules and the dockeringore
files use simple [golang matching](https://golang.org/pkg/path/filepath/#Match).
Compare [this dockerignore file](https://github.com/themattrix/python-pypi-template/blob/master/.dockerignore)
and the [equivalent gitignore file](https://github.com/themattrix/python-pypi-template/blob/master/.gitignore)
"""
def add_subdirs(pattern, max_depth=4):
"""
Replace start of glob with multiple levels
of path seperators.
*.pyc becomes *.pyc */*.pyc */*/*.pyc etc
"""
for i in range(max_depth):
yield pattern
pattern = '*/%s' % pattern
# and for a seperator line
yield ""
def generate_dockerignore(gitignore, max_depth=5, prefix_lines=[], comment=True):
"""Pass in array of gitignore lines"""
di = [] # docker ignore lines
# add comment and extra lines
if comment:
prefix_lines=['# converted by generate_dockerignore.py']+prefix_lines
for line in prefix_lines+gitignore:
# these slashes are ignored by dockerignore
line = line.rstrip('/')
# ingore whitespace and comments
if line.strip()=='':
di.append(line)
continue
elif line.startswith('#'):
di.append(line)
continue
# deal with **
elif '**' in line:
# replace wildcard glob **
# with multiple levels of seperators
# replace ** with *, */*, */*/*, etc
line_tmpl = line.replace('**','{}')
replacement = '*'
for i in range(max_depth):
pattern = line_tmpl.format(replacement)
replacement = '*/' + replacement
gitignore = open('.gitignore').readlines()
# now put * */, */*, */*/* on the beginning to match subdirs
if line_tmpl.startswith('/'):
di += list(add_subdirs(pattern))
else:
di.append(pattern)
# deal with lines that don't start with a slash
elif not line.startswith('/'):
# shell glob
di += list(add_subdirs(line))
return di
test_gitignore="""
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
""".split('\n')
test_dockerignore="""
# Byte-compiled / optimized / DLL files
__pycache__
*/__pycache__
*/*/__pycache__
*/*/*/__pycache__
*.py[cod]
*/*.py[cod]
*/*/*.py[cod]
*/*/*/*.py[cod]
""".split('\n')
if __name__=="__main__":
# test
results = generate_dockerignore(test_gitignore, prefix_lines=[], max_depth=4, comment=False)
print('\n'.join(results))
print('\n'.join(test_dockerignore))
assert len(test_dockerignore) == len(results)
assert '\n'.join(test_dockerignore)=='\n'.join(results)
# usage
# gitignore = open('.gitignore').readlines()
# results = generate_dockerignore(gitignore, prefix_lines=['.git'])
# open('.dockerignore','w').writelines(results)
# converted by generate_dockerignore.py
# Created by https://www.gitignore.io/api/python,bower,node,ipythonnotebook,linux
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*/__pycache__/
*/*/__pycache__/
*/*/*/__pycache__/
*.py[cod]
*/*.py[cod]
*/*/*.py[cod]
*/*/*/*.py[cod]
*$py.class
*/*$py.class
*/*/*$py.class
*/*/*/*$py.class
# C extensions
*.so
*/*.so
*/*/*.so
*/*/*/*.so
# Distribution / packaging
.Python
*/.Python
*/*/.Python
*/*/*/.Python
env/
*/env/
*/*/env/
*/*/*/env/
build/
*/build/
*/*/build/
*/*/*/build/
develop-eggs/
*/develop-eggs/
*/*/develop-eggs/
*/*/*/develop-eggs/
dist/
*/dist/
*/*/dist/
*/*/*/dist/
downloads/
*/downloads/
*/*/downloads/
*/*/*/downloads/
eggs/
*/eggs/
*/*/eggs/
*/*/*/eggs/
.eggs/
*/.eggs/
*/*/.eggs/
*/*/*/.eggs/
lib/
*/lib/
*/*/lib/
*/*/*/lib/
lib64/
*/lib64/
*/*/lib64/
*/*/*/lib64/
parts/
*/parts/
*/*/parts/
*/*/*/parts/
sdist/
*/sdist/
*/*/sdist/
*/*/*/sdist/
var/
*/var/
*/*/var/
*/*/*/var/
*.egg-info/
*/*.egg-info/
*/*/*.egg-info/
*/*/*/*.egg-info/
.installed.cfg
*/.installed.cfg
*/*/.installed.cfg
*/*/*/.installed.cfg
*.egg
*/*.egg
*/*/*.egg
*/*/*/*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*/*.manifest
*/*/*.manifest
*/*/*/*.manifest
*.spec
*/*.spec
*/*/*.spec
*/*/*/*.spec
# Installer logs
pip-log.txt
*/pip-log.txt
*/*/pip-log.txt
*/*/*/pip-log.txt
pip-delete-this-directory.txt
*/pip-delete-this-directory.txt
*/*/pip-delete-this-directory.txt
*/*/*/pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
*/htmlcov/
*/*/htmlcov/
*/*/*/htmlcov/
.tox/
*/.tox/
*/*/.tox/
*/*/*/.tox/
.coverage
*/.coverage
*/*/.coverage
*/*/*/.coverage
.coverage.*
*/.coverage.*
*/*/.coverage.*
*/*/*/.coverage.*
.cache
*/.cache
*/*/.cache
*/*/*/.cache
nosetests.xml
*/nosetests.xml
*/*/nosetests.xml
*/*/*/nosetests.xml
coverage.xml
*/coverage.xml
*/*/coverage.xml
*/*/*/coverage.xml
*,cover
*/*,cover
*/*/*,cover
*/*/*/*,cover
.hypothesis/
*/.hypothesis/
*/*/.hypothesis/
*/*/*/.hypothesis/
# Translations
*.mo
*/*.mo
*/*/*.mo
*/*/*/*.mo
*.pot
*/*.pot
*/*/*.pot
*/*/*/*.pot
# Django stuff:
*.log
*/*.log
*/*/*.log
*/*/*/*.log
# Sphinx documentation
docs/_build/
*/docs/_build/
*/*/docs/_build/
*/*/*/docs/_build/
# PyBuilder
target/
*/target/
*/*/target/
*/*/*/target/
### Bower ###
bower_components
*/bower_components
*/*/bower_components
*/*/*/bower_components
.bower-cache
*/.bower-cache
*/*/.bower-cache
*/*/*/.bower-cache
.bower-registry
*/.bower-registry
*/*/.bower-registry
*/*/*/.bower-registry
.bower-tmp
*/.bower-tmp
*/*/.bower-tmp
*/*/*/.bower-tmp
### Node ###
# Logs
logs
*/logs
*/*/logs
*/*/*/logs
*.log
*/*.log
*/*/*.log
*/*/*/*.log
npm-debug.log*
*/npm-debug.log*
*/*/npm-debug.log*
*/*/*/npm-debug.log*
# Runtime data
pids
*/pids
*/*/pids
*/*/*/pids
*.pid
*/*.pid
*/*/*.pid
*/*/*/*.pid
*.seed
*/*.seed
*/*/*.seed
*/*/*/*.seed
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
*/lib-cov
*/*/lib-cov
*/*/*/lib-cov
# Coverage directory used by tools like istanbul
coverage
*/coverage
*/*/coverage
*/*/*/coverage
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
*/.grunt
*/*/.grunt
*/*/*/.grunt
# node-waf configuration
.lock-wscript
*/.lock-wscript
*/*/.lock-wscript
*/*/*/.lock-wscript
# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release
*/build/Release
*/*/build/Release
*/*/*/build/Release
# Dependency directory
# https://docs.npmjs.com/misc/faq#should-i-check-my-node-modules-folder-into-git
node_modules
*/node_modules
*/*/node_modules
*/*/*/node_modules
# Optional npm cache directory
.npm
*/.npm
*/*/.npm
*/*/*/.npm
# Optional REPL history
.node_repl_history
*/.node_repl_history
*/*/.node_repl_history
*/*/*/.node_repl_history
### IPythonNotebook ###
# Temporary data
.ipynb_checkpoints/
*/.ipynb_checkpoints/
*/*/.ipynb_checkpoints/
*/*/*/.ipynb_checkpoints/
### Linux ###
*~
*/*~
*/*/*~
*/*/*/*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
*/.fuse_hidden*
*/*/.fuse_hidden*
*/*/*/.fuse_hidden*
# KDE directory preferences
.directory
*/.directory
*/*/.directory
*/*/*/.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
*/.Trash-*
*/*/.Trash-*
*/*/*/.Trash-*
# Created by https://www.gitignore.io/api/python,bower,node,ipythonnotebook,linux
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
### Bower ###
bower_components
.bower-cache
.bower-registry
.bower-tmp
### Node ###
# Logs
logs
*.log
npm-debug.log*
# Runtime data
pids
*.pid
*.seed
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# node-waf configuration
.lock-wscript
# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release
# Dependency directory
# https://docs.npmjs.com/misc/faq#should-i-check-my-node-modules-folder-into-git
node_modules
# Optional npm cache directory
.npm
# Optional REPL history
.node_repl_history
### IPythonNotebook ###
# Temporary data
.ipynb_checkpoints/
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
@kwerle
Copy link

kwerle commented Jun 28, 2017

I think you'd be interested to learn about **/__pycache__

@HitLuca
Copy link

HitLuca commented Mar 5, 2020

@kwerle gets it

@wassname
Copy link
Author

wassname commented Mar 5, 2020

Thanks, but I did know about that behavior, or at least i used it in gitingore. At the time I made this, the dockerignore syntax did not support that if I remember correctly. It looks like it does now, which is news to me, yay :)

So no one needs to use this gist.

@HitLuca
Copy link

HitLuca commented Mar 6, 2020

no worries

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment