https://sites.google.com/eng.ucsd.edu/ucsdbookgraph/home
@inproceedings{DBLP:conf/recsys/WanM18,
author = {Mengting Wan and
Julian J. McAuley},
editor = {Sole Pera and
Michael D. Ekstrand and
Xavier Amatriain and
John O'Donovan},
title = {Item recommendation on monotonic behavior chains},
booktitle = {Proceedings of the 12th {ACM} Conference on Recommender Systems, RecSys
2018, Vancouver, BC, Canada, October 2-7, 2018},
pages = {86--94},
publisher = {{ACM}},
year = {2018},
url = {https://doi.org/10.1145/3240323.3240369},
doi = {10.1145/3240323.3240369},
timestamp = {Mon, 22 Jul 2019 19:11:02 +0200},
biburl = {https://dblp.org/rec/conf/recsys/WanM18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
CREATE TABLE goodreads_reviews AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_reviews_dedup.json',ignore_errors='true',lines='true') ;
CREATE TABLE goodreads AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_books.json',lines='true');
CREATE TABLE goodreads_authors AS SELECT * FROM read_json_auto('/Users/vicki/viberary/data/goodreads_book_authors.json',lines='true');
CREATE table authorid as select REGEXP_EXTRACT(authors, '[0-9]+') as author_id, title, description, average_rating, book_id FROM goodreads;
COPY (SELECT review_text,title,description,authorid.average_rating, goodreads_authors.name as author FROM authorid JOIN goodreads_reviews ON authorid.book_id = goodreads_reviews.book_id JOIN goodreads_authors ON authorid.author_id = goodreads_authors.author_id where authorid.author_id NOT ILIKE '' ) TO '20230630_training.parquet' (FORMAT PARQUET);