Skip to content

Instantly share code, notes, and snippets.

@curiousleo
Last active March 4, 2019 23:33
Show Gist options
  • Save curiousleo/0a54c23db6a218bce12ce01f154bc3d6 to your computer and use it in GitHub Desktop.
Save curiousleo/0a54c23db6a218bce12ce01f154bc3d6 to your computer and use it in GitHub Desktop.
Convert Goodreads CSV export to CSV with only relevant fields
import pandas as pd
INPUT_FIELDS = [
"Book Id",
"Title",
"Author",
"Author l-f",
"Binding",
"Original Publication Year",
"Bookshelves",
"Owned Copies",
"Condition",
]
OUTPUT_FIELDS = ["Title", "Author", "Binding", "Condition", "Goodreads Page"]
def process(goodreads_export):
books = pd.read_csv(goodreads_export, usecols=INPUT_FIELDS)
# Only keep books in the "to-sell" shelf
books = books[
books["Bookshelves"].str.contains("to-sell", na=False, regex=False)
& (books["Owned Copies"] > 0)
]
# Sort by "Author l-f" and "Original Publication Year"
books = books.sort_values(by=["Author l-f", "Original Publication Year"])
# Add Goodreads links
book_ids = books["Book Id"].astype(str)
books["Goodreads Page"] = "https://goodreads.com/book/show/" + book_ids
# Only keep certain columns (in the specified order)
return books[OUTPUT_FIELDS]
if __name__ == "__main__":
import sys
process(sys.stdin).to_csv(sys.stdout, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment