mh0w’s gists

mh0w / flatten list of lists to a list.py

Created October 21, 2022 15:30

flatten list of lists to a list

	l = [[0, 1, 3, 4, 5],
	[0, 1, 2, 3],
	[0, 3, 4, 5, 6]]

	print([e for l in ll for e in l]) # [0, 1, 3, 4, 5, 0, 1, 2, 3, 0, 3, 4, 5, 6]

	# Key terms: list comprehension

mh0w / create random int or float in a given range.py

Last active July 27, 2023 15:29

create random int or float in a given range

	import random

	# Set a seed number for replicable results
	random.seed(3210)

	random.uniform(1.5, 1.9) # 1.795802587856317
	random.uniform(1.5, 1.9) # 1.795802587856317
	random.randint(1, 2) # 1 or 2

	np.random.randint(1, 3, 100) # array, len(100), range(0, 3)

mh0w / remove all occurrences of an element in a list.py

Created October 21, 2022 15:33

remove all occurrences of an element in a list

	l = [1,2,3,2,2,2,3,4]
	print(list(filter(lambda x: x != 2, l))) # [1, 3, 3, 4]

mh0w / copying lists with and without linking them.py

Created October 21, 2022 15:35

copying lists with and without linking them

	# Here, updating l2 will also update l
	l = [1,2,3]
	l2 = l
	l2[0] += 1
	print(l) # [2,2,3]
	print(l2) # [2,2,3]

	# Here, updating l2 will not update l
	l = [1,2,3]
	l2 = l.copy()

mh0w / print dictionary (dict) in long thin format.py

Created October 21, 2022 15:39

print dictionary (dict) in long thin format rather than as one line

	import pprint

	d = {'math': 99, 'english': 80, 'chemistry': 67, 'biology': 88, 'physics': 93}

	print(d) # {'math': 99, 'english': 80, 'chemistry': 67, 'biology': 88, 'physics': 93}

	pprint.pprint(d, width=1)
	"""{'biology': 88,
	'chemistry': 67,
	'english': 80,

mh0w / random sample from a dictionary (dict).py

Created October 21, 2022 15:41

random sample from a dictionary (dict)

	import random

	# Set seed for replicable results
	random.seed(3210)

	d = {'math': 99, 'english': 80, 'chemistry': 67, 'biology': 88, 'physics': 93}
	dict(random.sample(d.items(), 2))

mh0w / print formatted float (limiting decimal points).py

Created October 21, 2022 15:42

print formatted float (limiting decimal points)

mh0w / Pyspark or Pydoop - json (read and write) locally or on hdfs.py

Last active October 19, 2023 14:21

Pyspark or Pydoop - json (read and write) locally or on hdfs

	from pyspark.sql import SparkSession
	import databricks.koalas as ks
	import json
	import pydoop.hdfs as hdfs

	spark = SparkSession.builder.enableHiveSupport().getOrCreate()

	my_data = {"name": ["John", "Mary", "Kevin"],
	"area": ["London", "Munich", "Berlin"],
	"age": [33, 56, 44]}

mh0w / pandas: create dataframe with only column names.py

Created October 21, 2022 15:44

pandas: create dataframe with only column names

	import pandas as pd

	df = pd.DataFrame(columns=['A','B','C','D','E','F','G'])

	print(df)
	"""
	Empty DataFrame
	Columns: [A, B, C, D, E, F, G]
	Index: []
	"""

Created October 21, 2022 15:48

pandas: dataframes: count rows or columns

	import pandas as pd

	df = pd.DataFrame(columns=['A','B','C','D','E','F','G'])

	# rows
	print(len(df.index)) # 0 - slightly faster in large dfs
	print(df.shape[0]) # 0

	# columns
	print(len(df.columns)) # 7

Matthew Hawkes_ONS mh0w