Skip to content

Instantly share code, notes, and snippets.

@JoeThunyathep
JoeThunyathep / download_textbooks.py
Last active August 2, 2020 04:29
Python Script to Download Springer Textbooks
import requests, wget
import pandas as pd
df = pd.read_excel("Free+English+textbooks.xlsx")
for index, row in df.iterrows():
# loop through the excel list
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
url = f"{row.loc['OpenURL']}"
r = requests.get(url)
download_url = f"{r.url.replace('book','content/pdf')}.pdf"
wget.download(download_url, f"./download/{file_name}.pdf")
@JoeThunyathep
JoeThunyathep / resizeImages.py
Last active April 10, 2024 20:46
Resize images in a folder using Pillow
from PIL import Image
import pathlib
maxsize = (512, 512)
for input_img_path in pathlib.Path("input").iterdir():
output_img_path = str(input_img_path).replace("input","output")
with Image.open(input_img_path) as im:
im.thumbnail(maxsize)
im.save(output_img_path, "JPEG", dpi=(300,300))
print(f"processing file {input_img_path} done...")
import requests,datetime
from bs4 import BeautifulSoup
def scrapeGlobalCase ():
try:
url = "https://www.worldometers.info/coronavirus/"
req = requests.get(url)
bsObj = BeautifulSoup(req.text, "html.parser")
data = bsObj.find_all("div",class_ = "maincounter-number")
NumConfirmed = int(data[0].text.strip().replace(',', ''))
NumDeaths = int(data[1].text.strip().replace(',', ''))
@JoeThunyathep
JoeThunyathep / lambda_function.py
Created May 8, 2020 11:54
Lambda function for scraping COVID-19 data and save to S3 bucket
import json
import boto3
from covid19scraper import scrapeGlobalCase
def lambda_handler(event, context):
s3 = boto3.resource('s3')
print ("[INFO] Request COVID-19 data...")
update_covid_cases = scrapeGlobalCase()
BUCKET_NAME = "hourlycovid19"
DATE = f"{update_covid_cases['date']}"
OUTPUT_NAME = f"dataKeyTest{DATE}.json"
@JoeThunyathep
JoeThunyathep / JSONsToDataframe.py
Last active May 8, 2020 15:54
read several JSONs to Dataframe
import pandas as pd
import glob, os, json
df = pd.DataFrame()
path_to_json = 'path_to_your_json_folder'
json_pattern = os.path.join(path_to_json,'*.json')
file_list = glob.glob(json_pattern)
for file in file_list:
data = pd.read_json(file, lines=True)
df = df.append(data)
df = df.set_index('date')
@JoeThunyathep
JoeThunyathep / covid19aggregator.py
Last active November 14, 2020 01:40
COVID 19 Time Series Aggregator
import pandas as pd
import sys
# Example: '$ python TimeSeriesAnalysis.py 3 M'
number_of_countries = int(sys.argv[1]) # 3
aggregation_time_interval = sys.argv[2] # 'M'
def covid19analysis (number_of_countries, aggregation_time_interval):
df = pd.read_csv(f'time_series_covid19_confirmed_global.csv')
df = df.drop(columns=['Province/State','Lat', 'Long'])
df = df.groupby('Country/Region').agg('sum')
dfT = df.T
{
"US": {
"1580428800000": 38,
"1582934400000": 622,
"1585612800000": 1086688,
"1588204800000": 19518863,
"1590883200000": 10858599
},
"Spain": {
"1580428800000": 0,
@JoeThunyathep
JoeThunyathep / server.js
Created May 11, 2020 21:46
NodeJS server with Spawning Python Process
//Part1: Express Web Server
var express = require('express');
var app = express();
var port = 3000;
app.listen(port, function () {
console.log(`server running on http://localhost:${port}`);
console.log(`Try Getting Aggregated COVID19 data at http://localhost:${port}/covid_19_timeseries?numberOfCountries=3&aggregationInterval=W`);
})
//Part2: Express Get Request for Covid-19 Time Series data
app.get('/covid_19_timeseries', function (req, res) {
import os
import pandas as pd
from google.cloud import bigquery
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="Path to your Service Account ~ JSON key"
client = bigquery.Client() # Start the BigQuery Client
# Input your Query Syntax here; You may try it first at https://console.cloud.google.com/bigquery
QUERY = (
'SELECT * FROM `bigquery-public-data.covid19_nyt.us_counties` '
'ORDER BY date DESC,confirmed_cases DESC '
'LIMIT 20')
import requests, pandas as pd
r = requests.get('http://www.worldometers.info/coronavirus/')
dfs = pd.read_html(r.text)