Created
March 3, 2025 06:56
-
-
Save ZediWards/230811f1f26825c8a069db239155b8f8 to your computer and use it in GitHub Desktop.
Python / Pandas read write
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
import sys | |
from io import StringIO | |
import pandas as pd | |
import requests | |
# setting path for vscode interactive window | |
sys.path.append('/home/<user>/Code/python_playground') | |
print(sys.path) | |
pd.options.display.width = None | |
# types/subtype | |
# .json application/json | |
# .gz application/gzip | application/gzip | |
# .csv text/csv | |
# .tsv text/tab-separated-values | |
# .txt text/plain | |
# .html text/html | |
# .xls application/vnd.ms-excel | |
# .xlsx application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | |
# - write file from response | |
# - df from response | |
# - df from file | |
# - read file | |
# ------------------------------------------------- http request returning json | |
response = requests.get('https://jsonplaceholder.typicode.com/todos/1') | |
response.headers['Content-Type'] # application/json; charset=utf-8 | |
response.content | |
response.text | |
# ------------------------------------------------ writing json files | |
# validates json and parses into a dict | |
data = response.json() | |
# write json to file | |
with open('data.json', 'w', encoding='utf-8') as file: | |
json.dump(data, file, indent=4) | |
# create df from response as a dict | |
df = pd.DataFrame(data=data) | |
# create df from json file | |
df = pd.read_json('writing_files/data.json') | |
# reading json file | |
with open('writing_files/data.json', 'r') as file: | |
data = json.load(file) | |
print(data) | |
# ------------------------------------- writing csv files | |
# ------------------------------------ mock csv from requests.text | |
mem_csv = """name,age,score | |
Alice,28,87.5 | |
Bob,34,92.1 | |
Charlie,19,78.6""" | |
# ------------------- writing file from response | |
# csv.reader takes in iterable | |
data = mem_csv.splitlines() # data = response.text.splitlines() | |
# returns an iterator of lines | |
csv_reader = csv.reader(data, delimiter=',') | |
# writing csv to file - newline=' tells python to let csv module handle it (handles it per platform) | |
with open('csv_data.csv', 'w', newline='', encoding='utf-8') as file: | |
csv_writer = csv.writer(file) | |
for row in csv_reader: | |
csv_writer.writerow(row) | |
# ------------------- df from api response StringIO is file like | |
df = pd.read_csv(StringIO(response.text)) | |
df = pd.read_csv(StringIO(response.content), compression='gzip') | |
# ------------------- df from file | |
# df from csv file | |
df = pd.read_csv('csv_data.csv') | |
df = pd.read_csv('csv_data.csv.gz', compression='gzip') | |
# ------------------- read file with csv module | |
with open('csv_data.csv', 'r') as file: | |
csv_file = csv.reader(file, delimiter=',') | |
for lines in csv_file: | |
print(lines) | |
# ------------------- bonus -- read csv file and convert to dict in memory using csv module | |
with open('csv_data.csv', 'r') as file: | |
csv_file = csv.DictReader(file, delimiter=',') | |
for line in csv_file: | |
print(line) | |
# ------------------------------------ writing tsv files | |
# ------------------------------- writing file from response | |
mem_tsv = """Name Age Country | |
John 34 USA | |
Alice 28 Canada | |
Bob 45 UK""" | |
# iterable for csv reader | |
data = mem_tsv.splitlines() # data = response.text.splitlines() | |
# returns an iterator of lines | |
tsv_reader = csv.reader(data, delimiter='\t') | |
# writing response to a tsv file | |
with open('tsv_data.tsv', 'w', newline='', encoding='utf-8') as file: | |
tsv_writer = csv.writer(file, delimiter='\t') | |
for row in tsv_reader: | |
tsv_writer.writerow(row) | |
# ----------------------------- df from api response - StringIO is file like | |
df = pd.read_csv(StringIO(response.text), delimiter='\t') | |
df = pd.read_csv(StringIO(response.content), delimiter='\t', compression='gzip') | |
# ----------------------------- df from file | |
df = pd.read_csv('tsv_data.tsv', delimiter='\t') | |
df = pd.read_csv('tsv_data.tsv', delimiter='\t', compression='gzip') | |
# ----------------------------- read file with csv module | |
with open('tsv_data.tsv', 'r') as file: | |
tsv_file = csv.reader(file, delimiter='\t') | |
for line in tsv_file: | |
print(line) | |
# ---------------------------- bonus -- read tsv and convert to dict in memory | |
with open('tsv_data.tsv', 'r') as file: | |
tsv_file = csv.DictReader(file) | |
for line in tsv_file: | |
print(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment