Skip to content

Instantly share code, notes, and snippets.

@naufalso
Created November 8, 2024 10:03
Show Gist options
  • Save naufalso/4a5befbb589ffa10166143a889baf0cb to your computer and use it in GitHub Desktop.
Save naufalso/4a5befbb589ffa10166143a889baf0cb to your computer and use it in GitHub Desktop.
Export chrome history
import sqlite3
import pandas as pd
from datetime import datetime, timedelta
# Path to your Chrome history database [Select one based on your os and chrome history path]
history_path = "C:\\Users\\[USER_NAME]\\AppData\\Local\\Google\\Chrome\\User Data\\Default\\History" # Windows
history_path = "~/Library/Application Support/Google/Chrome/Default/History" # MAC
history_path = "~/.config/google-chrome/Default/History" # Linux
# Connect to the database
conn = sqlite3.connect(history_path)
cursor = conn.cursor()
# Get the start of today in microseconds since epoch
start_of_today = datetime.combine(datetime.today(), datetime.min.time())
start_timestamp = int((start_of_today - datetime(1601, 1, 1)).total_seconds() * 1000000)
# Query to get all available fields by joining `urls` and `visits` tables
query = """
SELECT
urls.url AS URL,
urls.title AS Title,
urls.visit_count AS Visit_Count,
urls.typed_count AS Typed_Count,
urls.last_visit_time AS Last_Visit_Time,
visits.visit_time AS Visit_Time,
visits.from_visit AS Referrer_URL_ID,
visits.transition AS Transition_Type
FROM urls
JOIN visits ON urls.id = visits.url
WHERE urls.last_visit_time >= ?
ORDER BY urls.last_visit_time DESC;
"""
# Execute query
cursor.execute(query, (start_timestamp,))
rows = cursor.fetchall()
# Convert timestamp and load data into a DataFrame
data = []
epoch_start = datetime(1601, 1, 1)
for row in rows:
url, title, visit_count, typed_count, last_visit_time, visit_time, referrer_url_id, transition_type = row
last_visit_timestamp = epoch_start + timedelta(microseconds=last_visit_time)
visit_timestamp = epoch_start + timedelta(microseconds=visit_time)
data.append((url, title, visit_count, typed_count, last_visit_timestamp, visit_timestamp, referrer_url_id, transition_type))
# Create DataFrame
df = pd.DataFrame(data, columns=[
'URL', 'Title', 'Visit Count', 'Typed Count',
'Last Visit Time', 'Visit Time', 'Referrer URL ID', 'Transition Type'
])
# Closing the connection
conn.close()
# Analyze or save your data
print(df.head())
# Save the df to CSV
df.to_csv('output.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment