Created
October 14, 2019 08:04
-
-
Save kif/f1f36da22ead5e781af71d7da799350a to your computer and use it in GitHub Desktop.
Python impementation of IBM's vector instructions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Altivec programming in Python\n", | |
| "\n", | |
| "This notebooks contains a few functions which are manipulating 128-bit vectors as descrived in \n", | |
| "https://www.ibm.com/support/knowledgecenter/en/SSLTBW_2.3.0/com.ibm.zos.v2r3.cbcpx01/vectorbltin.htm\n", | |
| "\n", | |
| "The most interesting ones for data filtering are related to:\n", | |
| "* Load and Store\n", | |
| "* Gather and Scatter\n", | |
| "* Generate mask\n", | |
| "\n", | |
| "\n", | |
| "The work to perform is in 3 stages:\n", | |
| "1. Implement a few vector operations in Python according to the docstring provided in the documentation\n", | |
| "2. Use those function to implement the bitwise transposition for object of size:\n", | |
| " * 8 bits (easy)\n", | |
| " * 16 bits (as 8 bits)\n", | |
| " * 32 bits (more tricky)\n", | |
| " * 64 bits (as 32 bits)\n", | |
| " * 128 bits (even more tricky)\n", | |
| "3. Implement the reverse procedure. Mind thet " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_ld(a, b)\n", | |
| " \"\"\"Vector load\n", | |
| " d = vec_xl(a, b)\n", | |
| " \n", | |
| " Loads a 16-byte vector from the memory address that is specified by the displacement a and the pointer b. \n", | |
| " This function adds the displacement and the pointer R-value to obtain the address for the load operation.\n", | |
| " Addressed are always aligned on 16-bytes boundaries (i.e the 4 lower bits of the address are set to 0)\n", | |
| " \"\"\"\n", | |
| " pass #TODO\n", | |
| " return [None]*16" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_st(a, )\n", | |
| " \"\"\"Vector store\n", | |
| " d = vec_xst(a, b, c)\n", | |
| " \n", | |
| " Stores the elements of the 16-byte vector a to the effective address \n", | |
| " that is obtained by adding the displacement b in the address c.\n", | |
| " \"\"\"\n", | |
| " pass #TODO\n", | |
| " return None" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_insert(a, b, c):\n", | |
| " \"\"\"d = vec_insert(a, b, c)\n", | |
| " Returns a copy of the vector b with the value of its element c replaced by a. \n", | |
| " This function uses the modulo arithmetic on c to determine the element number. \n", | |
| " For example, if c is out of range, the compiler uses c modulo the number of \n", | |
| " elements in the vector to determine the element position.\"\"\"\n", | |
| " assert len(b) == 16\n", | |
| " d = b[...]\n", | |
| " d[c%16]=a\n", | |
| " return d\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_perm(a, b, c):\n", | |
| " \"\"\"d = vec_perm(a, b, c)\n", | |
| " Returns a vector that contains some elements of two vectors, \n", | |
| " in the order specified by a third vector.\n", | |
| " Each byte of the result is selected by using the least \n", | |
| " significant 5 bits of the corresponding byte of c as an index \n", | |
| " into the concatenated bytes of a and b.\n", | |
| " \"\"\"\n", | |
| " assert len(a) == 16\n", | |
| " assert len(b) == 16\n", | |
| " assert len(c) == 16\n", | |
| " d = [ a[i] if i>= 16 else b[i-16] for i in c]\n", | |
| " return d" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_sel(a, b, c):\n", | |
| " \"\"\"d = vec_sel(a, b, c)\n", | |
| " Returns a vector containing the value of either a or b depending on the value of c. \n", | |
| " Each bit of the result vector has the value of the corresponding bit of a if the corresponding bit of c is 0, \n", | |
| " or the value of the corresponding bit of b otherwise.\n", | |
| " \"\"\"\n", | |
| " pass" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def vec_bperm_u128(a, b):\n", | |
| " \"\"\"\n", | |
| " d = vec_bperm_u128(a, b)\n", | |
| "\n", | |
| " Gathers up to 16 1-bit values from a quadword in the specified order, \n", | |
| " and places them in the specified order in bits 48 - 63 of the result vector register, \n", | |
| " with the rest of the result zeroed.\n", | |
| "\n", | |
| " For each i (0 <= i < 16), suppose index denote the byte value of the i-th element of b.\n", | |
| " If index is greater than or equal to 128, bit 48+i of the result is set to 0.\n", | |
| "\n", | |
| " If index is smaller than 128, bit 48+i of the result is set to the value of the index-th bit of input a.\n", | |
| "\n", | |
| " All other bits are set to 0.\n", | |
| " For example:\n", | |
| "\n", | |
| " vector unsigned char a = (vector unsigned char) (65,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1);\n", | |
| " vector unsigned char b = (vector unsigned char)(0,0,0,0,1,1,1,1,128,128,128,128,255,255,255,255);\n", | |
| " vector unsigned long long d = vec_bperm_u128(a, b); //d[0]=0xF00, d[1]=0 \n", | |
| " \"\"\"\n", | |
| " #TODO\n", | |
| " pass\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1048576\n", | |
| "[588189 807523 628987 654570 372329 160504 881778 40554]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Example of usage, consider an array of 8 int32\n", | |
| "import numpy\n", | |
| "import bitshuffle\n", | |
| "\n", | |
| "size = 8\n", | |
| "dtype = \"int32\"\n", | |
| "valid_bits = 20\n", | |
| "maxi = 1<<valid_bits\n", | |
| "print(maxi)\n", | |
| "raw = numpy.random.randint(0, maxi, size=size).astype(dtype)\n", | |
| "print(raw)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([-1123955177, 771685989, -1646743039, -1653900817, 1330782300,\n", | |
| " 0, 0, 0], dtype=int32)" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "bitshuffle.bitshuffle(raw)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "#" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment