seibert · August 7, 2015 14:50
diff --git a/engine_hsa.py b/engine_hsa.py
 from pandas.computation import engines
 from numba import vectorize


 class HSAEngine(engines.AbstractEngine):
    """Evaluate an expression using the Numba target.
    """
    has_neg_frac = False
    _func_cache = {}

    def __init__(self, expr):
        super(HSAEngine, self).__init__(expr)
        #print('__init__:', expr)
        self._args = [n for n in expr.names if isinstance(n, str)]

        function_name = '__numba_pandas_eval_ufunc'
        function_str = '''def %s(%s):
            return %s
        ''' % (function_name, ','.join(self._args), str(expr))

        if function_str in HSAEngine._func_cache:
            #print('cache hit')
            self._ufunc = HSAEngine._func_cache[function_str]
        else:
            #print('cache miss')
            scope = {}
            exec(function_str, scope)
            self._ufunc = vectorize(nopython=True)(scope[function_name])
            HSAEngine._func_cache[function_str] = self._ufunc

    def _evaluate(self):
        env = self.expr.env
        call_args = [env.resolve(name, False) for name in self._args]
        return self._ufunc(*call_args)

 engines._engines['hsa'] = HSAEngine
diff --git a/test_engine_hsa.py b/test_engine_hsa.py
 import engine_hsa  # register new engine

 import pandas as pd

 a = pd.DataFrame(dict(x=[1,2,3,4], y=[2,4,6,8]))

 print('Input:', type(a), '\n', a)

 b = a.eval('x + y', engine='hsa')
 b = a.eval('x + y', engine='hsa')

 print('Output:', type(b), '\n', b)

diff --git a/TimingTest.ipynb b/TimingTest.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import engine_hsa\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n = 1000000\n",
    "big_df = pd.DataFrame(\n",
    "    dict(x=np.random.uniform(size=n).astype(np.float32), \n",
    "         y=np.random.uniform(size=n).astype(np.float32)))\n",
    "#big_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10 loops, best of 3: 22 ms per loop\n",
      "100 loops, best of 3: 7.39 ms per loop\n",
      "100 loops, best of 3: 8.74 ms per loop\n"
     ]
    }
   ],
   "source": [
    "%timeit big_df.eval('(x**2 + y**2)**0.5', engine='python')\n",
    "%timeit big_df.eval('(x**2 + y**2)**0.5', engine='numexpr')\n",
    "%timeit big_df.eval('(x**2 + y**2)**0.5', engine='hsa')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> \u001b[0;32m/work/continuum/clients/amd/pandas_hsa/engine_hsa.py\u001b[0m(19)\u001b[0;36m__init__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m     18 \u001b[0;31m            \u001b[0;32mreturn\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0m\u001b[0;32m---> 19 \u001b[0;31m        ''' % (function_name, ','.join(self._args), str(expr))\n",
      "\u001b[0m\u001b[0;32m     20 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0m\n",
      "ipdb> self._args\n",
      "['x', 2.0, 'y']\n",
      "ipdb> q\n"
     ]
    }
   ],
   "source": [
    "%debug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	from pandas.computation import engines
	from numba import vectorize


	class HSAEngine(engines.AbstractEngine):
	"""Evaluate an expression using the Numba target.
	"""
	has_neg_frac = False
	_func_cache = {}

	def __init__(self, expr):
	super(HSAEngine, self).__init__(expr)
	#print('__init__:', expr)
	self._args = [n for n in expr.names if isinstance(n, str)]

	function_name = '__numba_pandas_eval_ufunc'
	function_str = '''def %s(%s):
	return %s
	''' % (function_name, ','.join(self._args), str(expr))

	if function_str in HSAEngine._func_cache:
	#print('cache hit')
	self._ufunc = HSAEngine._func_cache[function_str]
	else:
	#print('cache miss')
	scope = {}
	exec(function_str, scope)
	self._ufunc = vectorize(nopython=True)(scope[function_name])
	HSAEngine._func_cache[function_str] = self._ufunc

	def _evaluate(self):
	env = self.expr.env
	call_args = [env.resolve(name, False) for name in self._args]
	return self._ufunc(*call_args)

	engines._engines['hsa'] = HSAEngine
	import engine_hsa # register new engine

	import pandas as pd

	a = pd.DataFrame(dict(x=[1,2,3,4], y=[2,4,6,8]))

	print('Input:', type(a), '\n', a)

	b = a.eval('x + y', engine='hsa')
	b = a.eval('x + y', engine='hsa')

	print('Output:', type(b), '\n', b)
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"import engine_hsa\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import math"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"n = 1000000\n",
	"big_df = pd.DataFrame(\n",
	" dict(x=np.random.uniform(size=n).astype(np.float32), \n",
	" y=np.random.uniform(size=n).astype(np.float32)))\n",
	"#big_df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": false,
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"10 loops, best of 3: 22 ms per loop\n",
	"100 loops, best of 3: 7.39 ms per loop\n",
	"100 loops, best of 3: 8.74 ms per loop\n"
	]
	}
	],
	"source": [
	"%timeit big_df.eval('(x2 + y2)**0.5', engine='python')\n",
	"%timeit big_df.eval('(x2 + y2)**0.5', engine='numexpr')\n",
	"%timeit big_df.eval('(x2 + y2)**0.5', engine='hsa')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"> \u001b[0;32m/work/continuum/clients/amd/pandas_hsa/engine_hsa.py\u001b[0m(19)\u001b[0;36m__init__\u001b[0;34m()\u001b[0m\n",
	"\u001b[0;32m 18 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0m\u001b[0;32m---> 19 \u001b[0;31m ''' % (function_name, ','.join(self._args), str(expr))\n",
	"\u001b[0m\u001b[0;32m 20 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0m\n",
	"ipdb> self._args\n",
	"['x', 2.0, 'y']\n",
	"ipdb> q\n"
	]
	}
	],
	"source": [
	"%debug"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.4.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}