Skip to content

Instantly share code, notes, and snippets.

View geoHeil's full-sized avatar
💭
🚀

geoHeil geoHeil

💭
🚀
View GitHub Profile
MATCH p = (source:Person)-[:call|text]-(destination:Person)
RETURN
source.name as Vertex,
source.known_terrorist as known_terrorist,
apoc.coll.avg(COLLECT(
CASE WHEN ALL(r in relationships(p) where type(r)='call') THEN destination.known_terrorist ELSE NULL END
)) as type_undir_1_call,
apoc.coll.avg(COLLECT(
CASE WHEN ALL(r in relationships(p) where type(r)='text') THEN destination.known_terrorist ELSE NULL END
)) as type_undir_1_text,
╒═════════════╤════════════════════════╤══════════════════════════════════╕
│"source.name"│"source.known_terrorist"│"avg(destination.known_terrorist)"│
╞═════════════╪════════════════════════╪══════════════════════════════════╡
│"Gabby" │0 │0.13333333333333336 │
├─────────────┼────────────────────────┼──────────────────────────────────┤
│"Esther" │0 │0.30434782608695654 │
├─────────────┼────────────────────────┼──────────────────────────────────┤
│"Charlie" │0 │0.3333333333333333 │
├─────────────┼────────────────────────┼──────────────────────────────────┤
│"David" │0 │0.3589743589743589 │
MATCH (source:Person)-[:call|text*1..3]-(destination:Person)
RETURN source.name, source.known_terrorist, avg(destination.known_terrorist)
@geoHeil
geoHeil / encoding.py
Created August 6, 2017 10:26
Sicket learn Multi label encoder with automatic most frequent imputation of unseen labels
class EncodeCategorical(TransformerMixin):
"""
Encodes a specified list of columns or all columns if None. Impute unseen labels with most frequent value per column
"""
def __init__(self, columns=None):
self.columns = columns
self.encoders = None
self.fillNewLabels = None
@geoHeil
geoHeil / profile_dump.prof
Created July 31, 2017 15:59
python profiling
k
types | # objects | total size
===================================== | =========== | ============
<class 'list | 88245 | 39.45 MB
<class 'str | 155697 | 16.03 MB
<class 'set | 2087 | 8.95 MB
<class 'int | 221725 | 6.69 MB
<class 'dict | 11138 | 5.12 MB
<class 'code | 20045 | 2.77 MB
<class 'pandas.core.frame.DataFrame | 11 | 2.52 MB
<class 'type | 2293 | 2.33 MB
@geoHeil
geoHeil / sparkEncoder.scala
Created July 10, 2017 14:32
spark wrong encoder for non product class
import org.apache.spark.SparkConf
import org.apache.spark.sql.{ Encoder, Encoders, SparkSession }
class SomeOtherClass(foo: Int)
case class FooWithSomeOtherClass(a: Int, b: String, bar: SomeOtherClass)
case class FooWithoutOtherClass(a: Int, b: String, bar: Int)
case class Foo(a: Int)
object EncoderSpark extends App {
@geoHeil
geoHeil / f.txt
Created July 5, 2017 15:32
catch all exceptions
17/07/05 17:57:10 ERROR Executor: Exception in task 7.0 in stage 1.0 (TID 66)
java.lang.IllegalStateException: Can't overwrite cause with java.sql.SQLException: ORA-01461: can bind a LONG value only for insert into a LONG column
at java.lang.Throwable.initCause(Throwable.java:457)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:612)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:670)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:670)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1951)
@geoHeil
geoHeil / errors.txt
Created July 4, 2017 04:49
xgboost spark rabbit networking issues on OS X
Last login: Tue Jul 4 06:28:29 on ttys001
[geoheil@Georgs-MacBook-Pro:~/Dropbox/masterThesis/thesis/researchCode/python/TMA/0evaluation on master]
% cd ~/development/xgboost/ ✹ ✚ ✭
[geoheil@Georgs-MacBook-Pro:~/development/xgboost on master]
% unset CC ✭
[geoheil@Georgs-MacBook-Pro:~/development/xgboost on master]
% unset CXX ✭
[geoheil@Georgs-MacBook-Pro:~/development/xgboost on master]
% make clean ✭
rm -f -rf build build_plugin lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o xgboost
@geoHeil
geoHeil / stats.txt
Created June 29, 2017 11:17
EVRPTH stats
######## c103_21.txt ########
./data/instances/c103_21.txt testing heuristic evrptw.ea.geneticAlgorithm
0.37420715768045437 seconds for evrptw.ea.geneticAlgorithm with algorithm eax and parameters {'alpha': 10, 'beta': 5, 'gamma': 15, 'numberOfGenerations': 1, 'initialPopSize': 1, 'parentsPopSize': 1, 'kTournament': 2, 'crossoverProbability': 0.95, 'mutationProbability': 0.0, 'noOffspringUseParent': True, 'randomNeighborParents': True, 'initialSweep': 0.5}.
2816.1965718078386, 2816.1965718078386, 2816.1965718078386
median: 002816.197, mean: 002816.197, stddev: 000000.000
47.6415086123161 seconds for evrptw.ea.geneticAlgorithm with algorithm eax and parameters {'alpha': 10, 'beta': 5, 'gamma': 15, 'numberOfGenerations': 50, 'initialPopSize': 20, 'parentsPopSize': 20, 'kTournament': 2, 'crossoverProbability': 0.95, 'mutationProbability': 0.0, 'noOffspringUseParent': True, 'randomNeighborParents': True, 'initialSweep': 0.5}.
2762.1839634638295, 2453.7455024654359, 2593.3262788668226