Today, many datas are geolocalised (meaning that they have a position in space). They're named GIS datas.
It's not rare that we need to do operations on those, such as aggregations, and there are many optimisations existing to do that.
Today, many datas are geolocalised (meaning that they have a position in space). They're named GIS datas.
It's not rare that we need to do operations on those, such as aggregations, and there are many optimisations existing to do that.
| # A simple cheat sheet of Spark Dataframe syntax | |
| # Current for Spark 1.6.1 | |
| # import statements | |
| from pyspark.sql import SQLContext | |
| from pyspark.sql.types import * | |
| from pyspark.sql.functions import * | |
| #creating dataframes | |
| df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data |
| from threading import Thread | |
| from time import sleep | |
| import uuid | |
| from dask.distributed import LocalCluster, Client | |
| import dask.dataframe as dd | |
| import pandas as pd | |
| import pyspark | |
| Host * | |
| ControlPath ~/.ssh/control/%C | |
| ControlMaster auto |
| { | |
| "__inputs": [], | |
| "__requires": [ | |
| { | |
| "type": "grafana", | |
| "id": "grafana", | |
| "name": "Grafana", | |
| "version": "4.6.3" | |
| }, | |
| { |
| --[[ | |
| Youtube playlist importer for VLC media player 1.1 and 2.0 | |
| Copyright 2012 Guillaume Le Maout | |
| Authors: Guillaume Le Maout | |
| Contact: http://addons.videolan.org/messages/?action=newmessage&username=exebetche | |
| This program is free software; you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation; either version 2 of the License, or |
| import batchspawner | |
| # The port for this process | |
| c.JupyterHub.hub_port = 8081 | |
| # The ip for this process | |
| c.JupyterHub.hub_ip = '127.0.0.1' | |
| class SlurmSpawnerNoLocalUsers(batchspawner.SlurmSpawner): | |
| """Slurm Spawner that does not need local Unix users on the Hub server""" |
| FROM python:2-alpine | |
| RUN pip install \ | |
| beautifulsoup4 \ | |
| requests | |
| COPY papers.py /usr/local/bin/ | |
| RUN chmod +x /usr/local/bin/papers.py | |
| WORKDIR /root |
| # Go to https://developer.github.com/v4/explorer/ and enter the graphql query with the query veriable: | |
| # {"queryString": "your-githubuser-name"} | |
| cat results.json | | |
| jq '.data.user.repositories.edges[] | { Count: .node.collaborators.totalCount, Repo: .node.name} | select(.Count > 2)' |