JoeThunyathep’s gists

JoeThunyathep / download_textbooks.py

Last active August 2, 2020 04:29

Python Script to Download Springer Textbooks

	import requests, wget
	import pandas as pd
	df = pd.read_excel("Free+English+textbooks.xlsx")
	for index, row in df.iterrows():
	# loop through the excel list
	file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
	url = f"{row.loc['OpenURL']}"
	r = requests.get(url)
	download_url = f"{r.url.replace('book','content/pdf')}.pdf"
	wget.download(download_url, f"./download/{file_name}.pdf")

JoeThunyathep / resizeImages.py

Last active April 10, 2024 20:46

Resize images in a folder using Pillow

	from PIL import Image
	import pathlib
	maxsize = (512, 512)
	for input_img_path in pathlib.Path("input").iterdir():
	output_img_path = str(input_img_path).replace("input","output")
	with Image.open(input_img_path) as im:
	im.thumbnail(maxsize)
	im.save(output_img_path, "JPEG", dpi=(300,300))
	print(f"processing file {input_img_path} done...")

JoeThunyathep / covid19scraper.py

Created May 7, 2020 22:26

COVID-19 Scraper

	import requests,datetime
	from bs4 import BeautifulSoup
	def scrapeGlobalCase ():
	try:
	url = "https://www.worldometers.info/coronavirus/"
	req = requests.get(url)
	bsObj = BeautifulSoup(req.text, "html.parser")
	data = bsObj.find_all("div",class_ = "maincounter-number")
	NumConfirmed = int(data[0].text.strip().replace(',', ''))
	NumDeaths = int(data[1].text.strip().replace(',', ''))

JoeThunyathep / lambda_function.py

Created May 8, 2020 11:54

Lambda function for scraping COVID-19 data and save to S3 bucket

	import json
	import boto3
	from covid19scraper import scrapeGlobalCase
	def lambda_handler(event, context):
	s3 = boto3.resource('s3')
	print ("[INFO] Request COVID-19 data...")
	update_covid_cases = scrapeGlobalCase()
	BUCKET_NAME = "hourlycovid19"
	DATE = f"{update_covid_cases['date']}"
	OUTPUT_NAME = f"dataKeyTest{DATE}.json"

JoeThunyathep / JSONsToDataframe.py

Last active May 8, 2020 15:54

read several JSONs to Dataframe

	import pandas as pd
	import glob, os, json
	df = pd.DataFrame()
	path_to_json = 'path_to_your_json_folder'
	json_pattern = os.path.join(path_to_json,'*.json')
	file_list = glob.glob(json_pattern)
	for file in file_list:
	data = pd.read_json(file, lines=True)
	df = df.append(data)
	df = df.set_index('date')

JoeThunyathep / covid19aggregator.py

Last active November 14, 2020 01:40

COVID 19 Time Series Aggregator

	import pandas as pd
	import sys
	# Example: '$ python TimeSeriesAnalysis.py 3 M'
	number_of_countries = int(sys.argv[1]) # 3
	aggregation_time_interval = sys.argv[2] # 'M'
	def covid19analysis (number_of_countries, aggregation_time_interval):
	df = pd.read_csv(f'time_series_covid19_confirmed_global.csv')
	df = df.drop(columns=['Province/State','Lat', 'Long'])
	df = df.groupby('Country/Region').agg('sum')
	dfT = df.T

JoeThunyathep / exampleJSONResult.json

Created May 11, 2020 15:56

JoeThunyathep / server.js

Created May 11, 2020 21:46

NodeJS server with Spawning Python Process

	//Part1: Express Web Server
	var express = require('express');
	var app = express();
	var port = 3000;
	app.listen(port, function () {
	console.log(`server running on http://localhost:${port}`);
	console.log(`Try Getting Aggregated COVID19 data at http://localhost:${port}/covid_19_timeseries?numberOfCountries=3&aggregationInterval=W`);
	})
	//Part2: Express Get Request for Covid-19 Time Series data
	app.get('/covid_19_timeseries', function (req, res) {

JoeThunyathep / BigQueryAPI.py

Last active May 16, 2020 23:06

	import os
	import pandas as pd
	from google.cloud import bigquery
	os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="Path to your Service Account ~ JSON key"
	client = bigquery.Client() # Start the BigQuery Client
	# Input your Query Syntax here; You may try it first at https://console.cloud.google.com/bigquery
	QUERY = (
	'SELECT * FROM `bigquery-public-data.covid19_nyt.us_counties` '
	'ORDER BY date DESC,confirmed_cases DESC '
	'LIMIT 20')

JoeThunyathep / WorldometerCovid19.py

Last active December 19, 2020 14:02

	import requests, pandas as pd
	r = requests.get('http://www.worldometers.info/coronavirus/')
	dfs = pd.read_html(r.text)

JOE Thunyathep S. JoeThunyathep