Skip to content

Instantly share code, notes, and snippets.

@jsierles
Last active February 12, 2018 18:43
Show Gist options
  • Save jsierles/17db7c8f06d3c5890c2f72e3d0080962 to your computer and use it in GitHub Desktop.
Save jsierles/17db7c8f06d3c5890c2f72e3d0080962 to your computer and use it in GitHub Desktop.
Custom modules for apache-arrow and feather support in Guix
(define-module (nextjournal packages python)
#:use-module (guix download)
#:use-module (guix git-download)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix packages)
#:use-module (guix utils)
#:use-module (guix build-system cmake)
#:use-module (guix build-system gnu)
#:use-module (guix build-system python)
#:use-module (nextjournal packages compression)
#:use-module (gnu packages)
#:use-module (gnu packages boost)
#:use-module (gnu packages check)
#:use-module (gnu packages cmake)
#:use-module (gnu packages compression)
#:use-module (gnu packages jemalloc)
#:use-module (gnu packages package-management)
#:use-module (gnu packages python)
#:use-module (gnu packages web))
(define-public apache-arrow
(package
(name "apache-arrow")
(version "0.5.0")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/apache/arrow")
(commit "apache-arrow-0.5.0")))
(patches (search-patches "arrow-fix-build.patch"))
(sha256
(base32
"104dqiz3kfianb85m2pr0xwzviggplnfhgr0hy9y0phf3h4sv4j7"))))
(build-system cmake-build-system)
(inputs
`(("boost" ,boost)
("rapidjson" ,rapidjson)
("brotli" ,google-brotli)
("flatbuffers" ,flatbuffers)
("jemalloc" ,jemalloc)
; -DARROW_PYTHON
("python-3" ,python)
("python-numpy" ,python-numpy)))
(arguments
`(#:tests? #f
#:phases
(modify-phases %standard-phases
(add-before 'configure 'enter-source-directory
(lambda _ (chdir "cpp") #t))
(add-after 'unpack 'set-env
(lambda _
(setenv "BOOST_ROOT" (assoc-ref %build-inputs "boost")) ; necessary?
(setenv "BROTLI_HOME" (assoc-ref %build-inputs "brotli"))
(setenv "FLATBUFFERS_HOME" (assoc-ref %build-inputs "flatbuffers"))
(setenv "JEMALLOC_HOME" (assoc-ref %build-inputs "jemalloc"))
(setenv "RAPIDJSON_HOME" (assoc-ref %build-inputs "rapidjson"))
#t)))
#:configure-flags (list "-DCMAKE_BUILD_TYPE=Release"
"-DARROW_WITH_ZSTD=OFF"
"-DARROW_WITH_LZ4=OFF"
;; Install to PREFIX/lib (the default is
;; PREFIX/lib64).
(string-append "-DCMAKE_INSTALL_LIBDIR="
(assoc-ref %outputs "out")
"/lib")
"-DARROW_PYTHON=ON"
;; XXX These Guix package offer static
;; libraries that are not position independent,
;; and ld fails to link them into the arrow .so
"-DARROW_WITH_SNAPPY=OFF"
"-DARROW_WITH_ZLIB=OFF"
;; Building the tests forces on all the
;; optional features and the use of static
;; libraries.
"-DARROW_BUILD_TESTS=OFF"
"-DARROW_BUILD_STATIC=OFF")))
(home-page "https://arrow.apache.org/")
(synopsis "Columnar in-memory analytics")
(description "Apache Arrow is a columnar in-memory analytics layer
designed to accelerate big data. It houses a set of canonical in-memory
representations of flat and hierarchical data along with multiple
language-bindings for structure manipulation. It also provides IPC and common
algorithm implementations.")
(license license:asl2.0)))
(define-public python-pyarrow
(package
(name "python-pyarrow")
(version "0.5.0")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/apache/arrow")
(commit "apache-arrow-0.5.0")))
(sha256
(base32
"104dqiz3kfianb85m2pr0xwzviggplnfhgr0hy9y0phf3h4sv4j7"))))
(build-system python-build-system)
(arguments
'(#:tests? #f ; XXX Test failures related to missing libhdfs, libhdfs3,
; and "Unsupported numpy type 22".
#:phases
(modify-phases %standard-phases
(add-after 'unpack 'enter-source-directory
(lambda _ (chdir "python") #t))
(add-after 'unpack 'set-env
(lambda _
;; Fails if using pkg-config instead.
(setenv "ARROW_HOME" (assoc-ref %build-inputs "apache-arrow")))))))
(propagated-inputs
`(("apache-arrow" ,apache-arrow)
("python-six" ,python-six)
("python-numpy" ,python-numpy)
("python-pandas" ,python-pandas)))
(native-inputs
`(("python-cython" ,python-cython)
("cmake" ,cmake)
("python-pytest" ,python-pytest)
("python-setuptools-scm" ,python-setuptools-scm)))
(home-page "https://arrow.apache.org/docs/python/")
(synopsis "Python library for Apache Arrow")
(description "This library provides a Pythonic API wrapper for the reference
Arrow C++ implementation, along with tools for interoperability with pandas,
NumPy, and other traditional Python scientific computing packages.")
(license license:asl2.0)))
;; Does it really not matter that apache-arrow's libarrow_python.so is
;; built using Python 3?
(define-public python2-pyarrow
(package-with-python2 python-pyarrow))
(define-public flatbuffers
(package
(name "flatbuffers")
(version "1.7.1")
(source
(origin
(method url-fetch)
(uri (string-append "https://github.com/google/flatbuffers/archive/v"
version ".tar.gz"))
(sha256
(base32
"129hzbx8ag7fvv4606wgaydkk11pdaagkr4pdxsk985bpg7wv38g"))))
(build-system cmake-build-system)
(home-page "https://google.github.io/flatbuffers/")
(synopsis "Memory-efficient serialization library")
(description "FlatBuffers is a cross platform serialization library for C++,
C#, C, Go, Java, JavaScript, PHP, and Python. It was originally created at
Google for game development and other performance-critical applications.")
(license license:asl2.0)))
(define-public python-feather-format
(package
(name "python-feather-format")
(version "0.4.0")
(source
(origin
(method url-fetch)
(uri (pypi-uri "feather-format" version))
(sha256
(base32
"1adivm5w5ji4qv7hq7942vqlk8l2wgw87bdlsia771z14z3zp857"))))
(build-system python-build-system)
(propagated-inputs
`(("python-pandas" ,python-pandas)
("python-pyarrow" ,python-pyarrow)))
(home-page "https://github.com/wesm/feather")
(synopsis "Wrapper library to the Feather file format")
(description "This package provides a Python wrapper library to the
Apache Arrow-based Feather file format.")
(license license:asl2.0)))
(define-public python2-feather-format
(package-with-python2 python-feather-format))
(define-public python-nbformat-4.3
(package
(name "python-nbformat")
(version "4.3.0")
(source
(origin
(method url-fetch)
(uri (pypi-uri "nbformat" version))
(sha256
(base32
"12s7j4qja8b5bs1kyw5dzmrqbjxxj8wk52cyasbiqbv7fblcrssz"))))
(build-system python-build-system)
(arguments `(#:tests? #f)) ; no test target
(propagated-inputs
`(("python-ipython-genutils" ,python-ipython-genutils)
("python-jsonschema" ,python-jsonschema)
("python-jupyter-core" ,python-jupyter-core)
("python-traitlets" ,python-traitlets)))
(home-page "http://jupyter.org")
(synopsis "Jupyter Notebook format")
(description "This package provides the reference implementation of the
Jupyter Notebook format and Python APIs for working with notebooks.")
(license license:bsd-3)))
(define-public python2-nbformat-4.3
(package-with-python2 python-nbformat))
(define-public python-plotly
(package
(name "python-plotly")
(version "2.0.14")
(source
(origin
(method url-fetch)
(uri (pypi-uri "plotly" version))
(sha256
(base32
"038x99hss8g9wvvnq1dpncw5bdsqkiffkhmbpjhiq3mf2ysdmhnf"))))
(build-system python-build-system)
(arguments
'(;; Tests fail as shown below. Are they even included in the PyPi release?
;; TypeError: clientresp() missing 1 required positional argument: 'data'
#:tests? #f
#:phases
(modify-phases %standard-phases
(add-before 'check 'set-HOME
;; The test suite wants to write to $HOME.
(lambda _
(setenv "HOME" "/tmp"))))))
(propagated-inputs
`(("python-decorator" ,python-decorator)
("python-nbformat" ,python-nbformat-4.3)
("python-pytz" ,python-pytz)
("python-requests" ,python-requests)
("python-six" ,python-six)))
(home-page "https://plot.ly/python/")
(synopsis "Interactive plotting library for Python")
(description "Plotly's Python graphing library makes interactive,
publication-quality graphs online. Examples of how to make line plots, scatter
plots, area charts, bar charts, error bars, box plots, histograms, heatmaps,
subplots, multiple-axes, polar charts, and bubble charts. ")
(license license:expat)))
(define-public python2-plotly
(package-with-python2 python-plotly))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment