Skip to content

Instantly share code, notes, and snippets.

@canimus
canimus / format_genbank.py
Created January 14, 2024 19:31
Format genbank sequence
def format_sequence(sequence: str):
"""Format a DNA sequence as genbank file"""
counter = 1
print(f"{counter}".rjust(4, " "), end=" ")
for a,b in zip(range(0,len(sequence)+1, 10), range(10, len(sequence)+1, 10)):
print(sequence[a:b], end="")
if ((counter % 6) == 0) and (b < (len(sequence)-1)):
print("")
print(f'{((counter*10) + 1)}'.rjust(4, " "), end=" ")
else:
@canimus
canimus / cpu-frequency.sh
Created September 25, 2023 10:18
Cpu Frequency
cat /proc/cpuinfo | grep MHz | cut -d":" -f2 | tr -d " " | tr "\n" "," | sed 's/[^0-9\.,]//g' | rev | cut -c2- | rev | (echo -n "cpu," && cat) | termgraph
@canimus
canimus / sparker-s3.py
Created March 22, 2023 15:15
An initializer for PySpark reading from S3
from pyspark.sql import SparkSession
from pyspark import SparkConf
conf = (
SparkConf()
.setAppName("Connect AWS")
.setMaster("local[*]")
)
conf.set("spark.jars.packages","org.apache.hadoop:hadoop-aws:3.3.2")
@canimus
canimus / file_section.sh
Created February 9, 2023 14:20
Capture gene sections on extract
sed -n '/^\^SAMPLE/,/sample_table_end/p' GSE27219_family.soft > out2.txt
@canimus
canimus / gen_sample.py
Created February 9, 2023 14:10
Conversion of gene to frame
series = []
d = {}
for row in raw:
try:
if len(row) == 0:
continue
if row.startswith("!sample_table_end"):
for k in arr:
if k in d:
d[k] = [d[k]]
@canimus
canimus / asyncio-semaphore.py
Created November 9, 2022 19:29
asyncio semaphore implementation
import asyncio
from random import randint
async def download(code):
wait_time = randint(1, 3)
print('downloading {} will take {} second(s)'.format(code, wait_time))
await asyncio.sleep(wait_time) # I/O, context will switch to main function
print('downloaded {}'.format(code))
@canimus
canimus / cert_and_key.sh
Created July 1, 2022 16:59
Create local server certificate
openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes
@canimus
canimus / pyspark_df_full_show.py
Created June 17, 2022 21:36
Allow full display of data frame in Jupyter Lab
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))
@canimus
canimus / map_null_count.py
Created May 19, 2022 18:17
PySpark Map Transform for Null Counts
df.select(
F.array(
F.create_map(
F.lit("k1"), F.col("c1"), F.lit("k2"), F.col("c2"), F.lit("k3"), F.col("c3")
)
).alias("losing_bids")
).select(
F.transform(
"losing_bids",
lambda m: F.transform_values(m, lambda k, v: v.isNull().cast("integer")),
@canimus
canimus / forward_fill.py
Created December 21, 2021 22:42
PySpark FFill Implementation
import pyspark.sql.functions as F
from pyspark.sql import DataFrame
from pyspark.sql import Window as W
from pyspark.sql.window import WindowSpec
__all__ = ["forward_fill"]
def _window_all_previous_rows(partition, order) -> WindowSpec:
"""Select the window on which values are filled in a forward manner."""