junkor-1011 · April 24, 2023 15:21
diff --git a/.gitignore b/.gitignore
 testdata

 # Created by https://www.toptal.com/developers/gitignore/api/jupyternotebooks,visualstudiocode,python,pycharm+all
 # Edit at https://www.toptal.com/developers/gitignore?templates=jupyternotebooks,visualstudiocode,python,pycharm+all

 ### JupyterNotebooks ###
 # gitignore template for Jupyter Notebooks
 # website: http://jupyter.org/

 .ipynb_checkpoints
 */.ipynb_checkpoints/*

 # IPython
 profile_default/
 ipython_config.py

 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/

 ### PyCharm+all ###
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

 # User-specific stuff
 .idea/**/workspace.xml
 .idea/**/tasks.xml
 .idea/**/usage.statistics.xml
 .idea/**/dictionaries
 .idea/**/shelf

 # AWS User-specific
 .idea/**/aws.xml

 # Generated files
 .idea/**/contentModel.xml

 # Sensitive or high-churn files
 .idea/**/dataSources/
 .idea/**/dataSources.ids
 .idea/**/dataSources.local.xml
 .idea/**/sqlDataSources.xml
 .idea/**/dynamic.xml
 .idea/**/uiDesigner.xml
 .idea/**/dbnavigator.xml

 # Gradle
 .idea/**/gradle.xml
 .idea/**/libraries

 # Gradle and Maven with auto-import
 # When using Gradle or Maven with auto-import, you should exclude module files,
 # since they will be recreated, and may cause churn.  Uncomment if using
 # auto-import.
 # .idea/artifacts
 # .idea/compiler.xml
 # .idea/jarRepositories.xml
 # .idea/modules.xml
 # .idea/*.iml
 # .idea/modules
 # *.iml
 # *.ipr

 # CMake
 cmake-build-*/

 # Mongo Explorer plugin
 .idea/**/mongoSettings.xml

 # File-based project format
 *.iws

 # IntelliJ
 out/

 # mpeltonen/sbt-idea plugin
 .idea_modules/

 # JIRA plugin
 atlassian-ide-plugin.xml

 # Cursive Clojure plugin
 .idea/replstate.xml

 # SonarLint plugin
 .idea/sonarlint/

 # Crashlytics plugin (for Android Studio and IntelliJ)
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties

 # Editor-based Rest Client
 .idea/httpRequests

 # Android studio 3.1+ serialized cache file
 .idea/caches/build_file_checksums.ser

 ### PyCharm+all Patch ###
 # Ignore everything but code style settings and run configurations
 # that are supposed to be shared within teams.

 .idea/*

 !.idea/codeStyles
 !.idea/runConfigurations

 ### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class

 # C extensions
 *.so

 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST

 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec

 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/

 # Translations
 *.mo
 *.pot

 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal

 # Flask stuff:
 instance/
 .webassets-cache

 # Scrapy stuff:
 .scrapy

 # Sphinx documentation
 docs/_build/

 # PyBuilder
 .pybuilder/
 target/

 # Jupyter Notebook

 # IPython

 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version

 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock

 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock

 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml

 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/

 # Celery stuff
 celerybeat-schedule
 celerybeat.pid

 # SageMath parsed files
 *.sage.py

 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/

 # Spyder project settings
 .spyderproject
 .spyproject

 # Rope project settings
 .ropeproject

 # mkdocs documentation
 /site

 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json

 # Pyre type checker
 .pyre/

 # pytype static type analyzer
 .pytype/

 # Cython debug symbols
 cython_debug/

 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

 ### Python Patch ###
 # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
 poetry.toml

 # ruff
 .ruff_cache/

 # LSP config files
 pyrightconfig.json

 ### VisualStudioCode ###
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/extensions.json
 !.vscode/*.code-snippets

 # Local History for Visual Studio Code
 .history/

 # Built Visual Studio Code Extensions
 *.vsix

 ### VisualStudioCode Patch ###
 # Ignore all local history of files
 .history
 .ionide

 # End of https://www.toptal.com/developers/gitignore/api/jupyternotebooks,visualstudiocode,python,pycharm+all
diff --git a/README.md b/README.md
diff --git a/pyspark-test.ipynb b/pyspark-test.ipynb
diff --git a/pyspark-test.md b/pyspark-test.md
diff --git a/pyspark_env.yaml b/pyspark_env.yaml
 name: pyspark-intro
 channels:
  - conda-forge
 dependencies:
  - python=3.11
  - pyspark>=3.4
  - openjdk=17
  - pandas>=2.0.1
  - pyarrow
  # for testdata
  - numpy
  - seaborn
  # jupyter
  - jupyterlab
diff --git a/pyspark_env_freeze.yaml b/pyspark_env_freeze.yaml
 name: pyspark-intro
 channels:
  - conda-forge
 dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - aiofiles=22.1.0=pyhd8ed1ab_0
  - aiosqlite=0.18.0=pyhd8ed1ab_0
  - alsa-lib=1.2.8=h166bdaf_0
  - anyio=3.6.2=pyhd8ed1ab_0
  - argon2-cffi=21.3.0=pyhd8ed1ab_0
  - argon2-cffi-bindings=21.2.0=py311hd4cff14_3
  - arrow-cpp=11.0.0=ha770c72_14_cpu
  - asttokens=2.2.1=pyhd8ed1ab_0
  - attrs=22.2.0=pyh71513ae_0
  - aws-c-auth=0.6.26=hf365957_1
  - aws-c-cal=0.5.21=h48707d8_2
  - aws-c-common=0.8.14=h0b41bf4_0
  - aws-c-compression=0.2.16=h03acc5a_5
  - aws-c-event-stream=0.2.20=h00877a2_4
  - aws-c-http=0.7.6=hf342b9f_0
  - aws-c-io=0.13.19=h5b20300_3
  - aws-c-mqtt=0.8.6=hc4349f7_12
  - aws-c-s3=0.2.7=h909e904_1
  - aws-c-sdkutils=0.1.8=h03acc5a_0
  - aws-checksums=0.1.14=h03acc5a_5
  - aws-crt-cpp=0.19.8=hf7fbfca_12
  - aws-sdk-cpp=1.10.57=h17c43bd_8
  - babel=2.12.1=pyhd8ed1ab_1
  - backcall=0.2.0=pyh9f0ad1d_0
  - backports=1.0=pyhd8ed1ab_3
  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
  - beautifulsoup4=4.12.2=pyha770c72_0
  - bleach=6.0.0=pyhd8ed1ab_0
  - brotli=1.0.9=h166bdaf_8
  - brotli-bin=1.0.9=h166bdaf_8
  - brotlipy=0.7.0=py311hd4cff14_1005
  - bzip2=1.0.8=h7f98852_4
  - c-ares=1.18.1=h7f98852_0
  - ca-certificates=2022.12.7=ha878542_0
  - cairo=1.16.0=h35add3b_1015
  - certifi=2022.12.7=pyhd8ed1ab_0
  - cffi=1.15.1=py311h409f033_3
  - charset-normalizer=3.1.0=pyhd8ed1ab_0
  - comm=0.1.3=pyhd8ed1ab_0
  - contourpy=1.0.7=py311ha3edf6b_0
  - cryptography=40.0.2=py311h9b4c7bb_0
  - cycler=0.11.0=pyhd8ed1ab_0
  - debugpy=1.6.7=py311hcafe171_0
  - decorator=5.1.1=pyhd8ed1ab_0
  - defusedxml=0.7.1=pyhd8ed1ab_0
  - entrypoints=0.4=pyhd8ed1ab_0
  - executing=1.2.0=pyhd8ed1ab_0
  - expat=2.5.0=hcb278e6_1
  - flit-core=3.8.0=pyhd8ed1ab_0
  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
  - font-ttf-inconsolata=3.000=h77eed37_0
  - font-ttf-source-code-pro=2.038=h77eed37_0
  - font-ttf-ubuntu=0.83=hab24e00_0
  - fontconfig=2.14.2=h14ed4e7_0
  - fonts-conda-ecosystem=1=0
  - fonts-conda-forge=1=0
  - fonttools=4.39.3=py311h2582759_0
  - freetype=2.12.1=hca18f0e_1
  - gettext=0.21.1=h27087fc_0
  - gflags=2.2.2=he1b5a44_1004
  - giflib=5.2.1=h0b41bf4_3
  - glog=0.6.0=h6f12383_0
  - graphite2=1.3.13=h58526e2_1001
  - harfbuzz=6.0.0=h3ff4399_1
  - icu=72.1=hcb278e6_0
  - idna=3.4=pyhd8ed1ab_0
  - importlib-metadata=6.6.0=pyha770c72_0
  - importlib_metadata=6.6.0=hd8ed1ab_0
  - importlib_resources=5.12.0=pyhd8ed1ab_0
  - ipykernel=6.22.0=pyh210e3f2_0
  - ipython=8.12.0=pyh41d4057_0
  - ipython_genutils=0.2.0=py_1
  - jedi=0.18.2=pyhd8ed1ab_0
  - jinja2=3.1.2=pyhd8ed1ab_1
  - json5=0.9.5=pyh9f0ad1d_0
  - jsonschema=4.17.3=pyhd8ed1ab_0
  - jupyter_client=8.2.0=pyhd8ed1ab_0
  - jupyter_core=5.3.0=py311h38be061_0
  - jupyter_events=0.6.3=pyhd8ed1ab_0
  - jupyter_server=2.5.0=pyhd8ed1ab_0
  - jupyter_server_fileid=0.9.0=pyhd8ed1ab_0
  - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1
  - jupyter_server_ydoc=0.8.0=pyhd8ed1ab_0
  - jupyter_ydoc=0.2.3=pyhd8ed1ab_0
  - jupyterlab=3.6.3=pyhd8ed1ab_0
  - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
  - jupyterlab_server=2.22.1=pyhd8ed1ab_0
  - keyutils=1.6.1=h166bdaf_0
  - kiwisolver=1.4.4=py311h4dd048b_1
  - krb5=1.20.1=h81ceb04_0
  - lcms2=2.15=haa2dc70_1
  - ld_impl_linux-64=2.40=h41732ed_0
  - lerc=4.0.0=h27087fc_0
  - libabseil=20230125.0=cxx17_hcb278e6_1
  - libarrow=11.0.0=h93537a5_14_cpu
  - libblas=3.9.0=16_linux64_openblas
  - libbrotlicommon=1.0.9=h166bdaf_8
  - libbrotlidec=1.0.9=h166bdaf_8
  - libbrotlienc=1.0.9=h166bdaf_8
  - libcblas=3.9.0=16_linux64_openblas
  - libcrc32c=1.1.2=h9c3ff4c_0
  - libcups=2.3.3=h36d4200_3
  - libcurl=8.0.1=h588be90_0
  - libdeflate=1.18=h0b41bf4_0
  - libedit=3.1.20191231=he28a2e2_2
  - libev=4.33=h516909a_1
  - libevent=2.1.10=h28343ad_4
  - libexpat=2.5.0=hcb278e6_1
  - libffi=3.4.2=h7f98852_5
  - libgcc-ng=12.2.0=h65d4601_19
  - libgfortran-ng=12.2.0=h69a702a_19
  - libgfortran5=12.2.0=h337968e_19
  - libglib=2.76.2=hebfc3b9_0
  - libgomp=12.2.0=h65d4601_19
  - libgoogle-cloud=2.8.0=h0bc5f78_1
  - libgrpc=1.52.1=hcf146ea_1
  - libiconv=1.17=h166bdaf_0
  - libjpeg-turbo=2.1.5.1=h0b41bf4_0
  - liblapack=3.9.0=16_linux64_openblas
  - libnghttp2=1.52.0=h61bc06f_0
  - libnsl=2.0.0=h7f98852_0
  - libnuma=2.0.16=h0b41bf4_1
  - libopenblas=0.3.21=pthreads_h78a6416_3
  - libpng=1.6.39=h753d276_0
  - libprotobuf=3.21.12=h3eb15da_0
  - libsodium=1.0.18=h36c2ea0_1
  - libsqlite=3.40.0=h753d276_1
  - libssh2=1.10.0=hf14f497_3
  - libstdcxx-ng=12.2.0=h46fd767_19
  - libthrift=0.18.1=h5e4af38_0
  - libtiff=4.5.0=ha587672_6
  - libutf8proc=2.8.0=h166bdaf_0
  - libuuid=2.38.1=h0b41bf4_0
  - libwebp-base=1.3.0=h0b41bf4_0
  - libxcb=1.13=h7f98852_1004
  - libzlib=1.2.13=h166bdaf_4
  - lz4-c=1.9.4=hcb278e6_0
  - markupsafe=2.1.2=py311h2582759_0
  - matplotlib-base=3.7.1=py311h8597a09_0
  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
  - mistune=2.0.5=pyhd8ed1ab_0
  - munkres=1.1.4=pyh9f0ad1d_0
  - nbclassic=0.5.5=pyhb4ecaf3_1
  - nbclient=0.7.3=pyhd8ed1ab_0
  - nbconvert=7.3.1=pyhd8ed1ab_0
  - nbconvert-core=7.3.1=pyhd8ed1ab_0
  - nbconvert-pandoc=7.3.1=pyhd8ed1ab_0
  - nbformat=5.8.0=pyhd8ed1ab_0
  - ncurses=6.3=h27087fc_1
  - nest-asyncio=1.5.6=pyhd8ed1ab_0
  - notebook=6.5.4=pyha770c72_0
  - notebook-shim=0.2.2=pyhd8ed1ab_0
  - numpy=1.24.3=py311h64a7726_0
  - openjdk=17.0.3=h4335b31_6
  - openjpeg=2.5.0=hfec8fc6_2
  - openssl=3.1.0=hd590300_1
  - orc=1.8.3=hfdbbad2_0
  - packaging=23.1=pyhd8ed1ab_0
  - pandas=2.0.1=py311h320fe9a_0
  - pandoc=2.19.2=h32600fe_2
  - pandocfilters=1.5.0=pyhd8ed1ab_0
  - parquet-cpp=1.5.1=2
  - parso=0.8.3=pyhd8ed1ab_0
  - patsy=0.5.3=pyhd8ed1ab_0
  - pcre2=10.40=hc3806b6_0
  - pexpect=4.8.0=pyh1a96a4e_2
  - pickleshare=0.7.5=py_1003
  - pillow=9.5.0=py311h573f0d3_0
  - pip=23.1.1=pyhd8ed1ab_0
  - pixman=0.40.0=h36c2ea0_0
  - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
  - platformdirs=3.2.0=pyhd8ed1ab_0
  - pooch=1.7.0=pyha770c72_3
  - prometheus_client=0.16.0=pyhd8ed1ab_0
  - prompt-toolkit=3.0.38=pyha770c72_0
  - prompt_toolkit=3.0.38=hd8ed1ab_0
  - psutil=5.9.5=py311h2582759_0
  - pthread-stubs=0.4=h36c2ea0_1001
  - ptyprocess=0.7.0=pyhd3deb0d_0
  - pure_eval=0.2.2=pyhd8ed1ab_0
  - py4j=0.10.9.7=pyhd8ed1ab_0
  - pyarrow=11.0.0=py311hbdf6286_14_cpu
  - pycparser=2.21=pyhd8ed1ab_0
  - pygments=2.15.1=pyhd8ed1ab_0
  - pyopenssl=23.1.1=pyhd8ed1ab_0
  - pyparsing=3.0.9=pyhd8ed1ab_0
  - pyrsistent=0.19.3=py311h2582759_0
  - pysocks=1.7.1=pyha2e5f31_6
  - pyspark=3.4.0=pyhd8ed1ab_0
  - python=3.11.3=h2755cc3_0_cpython
  - python-dateutil=2.8.2=pyhd8ed1ab_0
  - python-fastjsonschema=2.16.3=pyhd8ed1ab_0
  - python-json-logger=2.0.7=pyhd8ed1ab_0
  - python-tzdata=2023.3=pyhd8ed1ab_0
  - python_abi=3.11=3_cp311
  - pytz=2023.3=pyhd8ed1ab_0
  - pyyaml=6.0=py311hd4cff14_5
  - pyzmq=25.0.2=py311hd6ccaeb_0
  - re2=2023.02.02=hcb278e6_0
  - readline=8.2=h8228510_1
  - requests=2.28.2=pyhd8ed1ab_1
  - rfc3339-validator=0.1.4=pyhd8ed1ab_0
  - rfc3986-validator=0.1.1=pyh9f0ad1d_0
  - s2n=1.3.41=h3358134_0
  - scipy=1.10.1=py311h8e6699e_0
  - seaborn=0.12.2=hd8ed1ab_0
  - seaborn-base=0.12.2=pyhd8ed1ab_0
  - send2trash=1.8.0=pyhd8ed1ab_0
  - setuptools=67.7.1=pyhd8ed1ab_0
  - six=1.16.0=pyh6c4a22f_0
  - snappy=1.1.10=h9fff704_0
  - sniffio=1.3.0=pyhd8ed1ab_0
  - soupsieve=2.3.2.post1=pyhd8ed1ab_0
  - stack_data=0.6.2=pyhd8ed1ab_0
  - statsmodels=0.13.5=py311h4c7f6c3_2
  - terminado=0.17.1=pyh41d4057_0
  - tinycss2=1.2.1=pyhd8ed1ab_0
  - tk=8.6.12=h27826a3_0
  - tomli=2.0.1=pyhd8ed1ab_0
  - tornado=6.3=py311h2582759_0
  - traitlets=5.9.0=pyhd8ed1ab_0
  - typing-extensions=4.5.0=hd8ed1ab_0
  - typing_extensions=4.5.0=pyha770c72_0
  - tzdata=2023c=h71feb2d_0
  - ucx=1.14.0=h8c404fb_1
  - urllib3=1.26.15=pyhd8ed1ab_0
  - wcwidth=0.2.6=pyhd8ed1ab_0
  - webencodings=0.5.1=py_1
  - websocket-client=1.5.1=pyhd8ed1ab_0
  - wheel=0.40.0=pyhd8ed1ab_0
  - xorg-fixesproto=5.0=h7f98852_1002
  - xorg-inputproto=2.3.2=h7f98852_1002
  - xorg-kbproto=1.0.7=h7f98852_1002
  - xorg-libice=1.0.10=h7f98852_0
  - xorg-libsm=1.2.3=hd9c2040_1000
  - xorg-libx11=1.8.4=h0b41bf4_0
  - xorg-libxau=1.0.9=h7f98852_0
  - xorg-libxdmcp=1.1.3=h7f98852_0
  - xorg-libxext=1.3.4=h0b41bf4_2
  - xorg-libxfixes=5.0.3=h7f98852_1004
  - xorg-libxi=1.7.10=h7f98852_0
  - xorg-libxrender=0.9.10=h7f98852_1003
  - xorg-libxtst=1.2.3=h7f98852_1002
  - xorg-recordproto=1.14.2=h7f98852_1002
  - xorg-renderproto=0.11.1=h7f98852_1002
  - xorg-xextproto=7.3.0=h0b41bf4_1003
  - xorg-xproto=7.0.31=h7f98852_1007
  - xz=5.2.6=h166bdaf_0
  - y-py=0.5.9=py311hfe55011_0
  - yaml=0.2.5=h7f98852_2
  - ypy-websocket=0.8.2=pyhd8ed1ab_0
  - zeromq=4.3.4=h9c3ff4c_1
  - zipp=3.15.0=pyhd8ed1ab_0
  - zlib=1.2.13=h166bdaf_4
  - zstd=1.5.2=h3eb15da_6
 # prefix: /path/to/pyspark-intro
	testdata

	# Created by https://www.toptal.com/developers/gitignore/api/jupyternotebooks,visualstudiocode,python,pycharm+all
	# Edit at https://www.toptal.com/developers/gitignore?templates=jupyternotebooks,visualstudiocode,python,pycharm+all

	### JupyterNotebooks ###
	# gitignore template for Jupyter Notebooks
	# website: http://jupyter.org/

	.ipynb_checkpoints
	/.ipynb_checkpoints/

	# IPython
	profile_default/
	ipython_config.py

	# Remove previous ipynb_checkpoints
	# git rm -r .ipynb_checkpoints/

	### PyCharm+all ###
	# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
	# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

	# User-specific stuff
	.idea/**/workspace.xml
	.idea/**/tasks.xml
	.idea/**/usage.statistics.xml
	.idea/**/dictionaries
	.idea/**/shelf

	# AWS User-specific
	.idea/**/aws.xml

	# Generated files
	.idea/**/contentModel.xml

	# Sensitive or high-churn files
	.idea/**/dataSources/
	.idea/**/dataSources.ids
	.idea/**/dataSources.local.xml
	.idea/**/sqlDataSources.xml
	.idea/**/dynamic.xml
	.idea/**/uiDesigner.xml
	.idea/**/dbnavigator.xml

	# Gradle
	.idea/**/gradle.xml
	.idea/**/libraries

	# Gradle and Maven with auto-import
	# When using Gradle or Maven with auto-import, you should exclude module files,
	# since they will be recreated, and may cause churn. Uncomment if using
	# auto-import.
	# .idea/artifacts
	# .idea/compiler.xml
	# .idea/jarRepositories.xml
	# .idea/modules.xml
	# .idea/*.iml
	# .idea/modules
	# *.iml
	# *.ipr

	# CMake
	cmake-build-*/

	# Mongo Explorer plugin
	.idea/**/mongoSettings.xml

	# File-based project format
	*.iws

	# IntelliJ
	out/

	# mpeltonen/sbt-idea plugin
	.idea_modules/

	# JIRA plugin
	atlassian-ide-plugin.xml

	# Cursive Clojure plugin
	.idea/replstate.xml

	# SonarLint plugin
	.idea/sonarlint/

	# Crashlytics plugin (for Android Studio and IntelliJ)
	com_crashlytics_export_strings.xml
	crashlytics.properties
	crashlytics-build.properties
	fabric.properties

	# Editor-based Rest Client
	.idea/httpRequests

	# Android studio 3.1+ serialized cache file
	.idea/caches/build_file_checksums.ser

	### PyCharm+all Patch ###
	# Ignore everything but code style settings and run configurations
	# that are supposed to be shared within teams.

	.idea/*

	!.idea/codeStyles
	!.idea/runConfigurations

	### Python ###
	# Byte-compiled / optimized / DLL files
	__pycache__/
	*.py[cod]
	*$py.class

	# C extensions
	*.so

	# Distribution / packaging
	.Python
	build/
	develop-eggs/
	dist/
	downloads/
	eggs/
	.eggs/
	lib/
	lib64/
	parts/
	sdist/
	var/
	wheels/
	share/python-wheels/
	*.egg-info/
	.installed.cfg
	*.egg
	MANIFEST

	# PyInstaller
	# Usually these files are written by a python script from a template
	# before PyInstaller builds the exe, so as to inject date/other infos into it.
	*.manifest
	*.spec

	# Installer logs
	pip-log.txt
	pip-delete-this-directory.txt

	# Unit test / coverage reports
	htmlcov/
	.tox/
	.nox/
	.coverage
	.coverage.*
	.cache
	nosetests.xml
	coverage.xml
	*.cover
	*.py,cover
	.hypothesis/
	.pytest_cache/
	cover/

	# Translations
	*.mo
	*.pot

	# Django stuff:
	*.log
	local_settings.py
	db.sqlite3
	db.sqlite3-journal

	# Flask stuff:
	instance/
	.webassets-cache

	# Scrapy stuff:
	.scrapy

	# Sphinx documentation
	docs/_build/

	# PyBuilder
	.pybuilder/
	target/

	# Jupyter Notebook

	# IPython

	# pyenv
	# For a library or package, you might want to ignore these files since the code is
	# intended to run in multiple environments; otherwise, check them in:
	# .python-version

	# pipenv
	# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
	# However, in case of collaboration, if having platform-specific dependencies or dependencies
	# having no cross-platform support, pipenv may install dependencies that don't work, or not
	# install all needed dependencies.
	#Pipfile.lock

	# poetry
	# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
	# This is especially recommended for binary packages to ensure reproducibility, and is more
	# commonly ignored for libraries.
	# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
	#poetry.lock

	# pdm
	# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
	#pdm.lock
	# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
	# in version control.
	# https://pdm.fming.dev/#use-with-ide
	.pdm.toml

	# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
	__pypackages__/

	# Celery stuff
	celerybeat-schedule
	celerybeat.pid

	# SageMath parsed files
	*.sage.py

	# Environments
	.env
	.venv
	env/
	venv/
	ENV/
	env.bak/
	venv.bak/

	# Spyder project settings
	.spyderproject
	.spyproject

	# Rope project settings
	.ropeproject

	# mkdocs documentation
	/site

	# mypy
	.mypy_cache/
	.dmypy.json
	dmypy.json

	# Pyre type checker
	.pyre/

	# pytype static type analyzer
	.pytype/

	# Cython debug symbols
	cython_debug/

	# PyCharm
	# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
	# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
	# and can be added to the global gitignore or merged into this file. For a more nuclear
	# option (not recommended) you can uncomment the following to ignore the entire idea folder.
	#.idea/

	### Python Patch ###
	# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
	poetry.toml

	# ruff
	.ruff_cache/

	# LSP config files
	pyrightconfig.json

	### VisualStudioCode ###
	.vscode/*
	!.vscode/settings.json
	!.vscode/tasks.json
	!.vscode/launch.json
	!.vscode/extensions.json
	!.vscode/*.code-snippets

	# Local History for Visual Studio Code
	.history/

	# Built Visual Studio Code Extensions
	*.vsix

	### VisualStudioCode Patch ###
	# Ignore all local history of files
	.history
	.ionide

	# End of https://www.toptal.com/developers/gitignore/api/jupyternotebooks,visualstudiocode,python,pycharm+all
	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica
	name: pyspark-intro
	channels:
	- conda-forge
	dependencies:
	- python=3.11
	- pyspark>=3.4
	- openjdk=17
	- pandas>=2.0.1
	- pyarrow
	# for testdata
	- numpy
	- seaborn
	# jupyter
	- jupyterlab