Skip to content

Instantly share code, notes, and snippets.

@Databracket9
Created July 7, 2024 09:51
Show Gist options
  • Save Databracket9/f6507607048697fc403e0753d64e1bf4 to your computer and use it in GitHub Desktop.
Save Databracket9/f6507607048697fc403e0753d64e1bf4 to your computer and use it in GitHub Desktop.
End-to-End Data Engineering with Pandas
import pandas as pd
import requests
import configparser
import psycopg2
# raw_data = requests.get("https://storage.googleapis.com/generall-shared-data/startups_demo.json")
# with open("raw_files/raw_data.json", "w") as f:
# f.write(raw_data.text)
raw_df = pd.read_json("raw_files/raw_data.json",lines=True)
secure_startups_df = raw_df.loc[(raw_df['link'].str.contains("https")) & (raw_df['city'] == "New York")].sort_values("name").reset_index(drop=True)
config = configparser.ConfigParser()
config.read('secrets.ini')
user = config['POSTGRES']['USER']
password = config['POSTGRES']['PASSWORD']
port = config['POSTGRES']['PORT']
database = config['POSTGRES']['DATABASE']
pg_conn = psycopg2.connect(
user=user,
password=password,
host="localhost",
port=port,
database=database
)
cur = pg_conn.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS SECURE_STARTUPS (id bigserial PRIMARY KEY, name varchar(400), images varchar(400), alt varchar(1000), description varchar(4000), link varchar(400), city varchar(100))")
for row in secure_startups_df.itertuples():
cur.execute("INSERT INTO SECURE_STARTUPS (NAME, IMAGES, ALT, DESCRIPTION, LINK, CITY) VALUES ('{}', '{}', '{}', '{}', '{}', '{}')".format(row.name.replace("'","''"), row.images, row.alt.replace("'","''"), row.description.replace("'","''"), row.link, row.city))
pg_conn.commit()
cur.close()
pg_conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment