Skip to content

Instantly share code, notes, and snippets.

View JasonKessler's full-sized avatar

Jason S. Kessler JasonKessler

View GitHub Profile
import pandas as pd
import scattertext as st
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
reviews_df['parse'] = reviews_df['review'].apply(st.whitespace_nlp_with_sentences)
corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'decision', parsed_col = 'parse')
.build().remove_categories(['Workshop']))
html = st.produce_scattertext_explorer(corpus,
category='Accept', not_categories=['Reject'],
transform = st.Scalers.dense_rank,
four_square_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse')
.build()
.get_unigram_corpus()
.compact(st.ClassPercentageCompactor(term_count=1)))
four_square_axes = st.FourSquareAxes(four_square_corpus,
left_categories=['Accept, Positive'],
right_categories=['Accept, Negative'],
top_categories=['Reject, Positive'],
bottom_categories=['Reject, Negative'],
labels = {'a': 'Positive',
four_square_corpus_phrases = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse',
feats_from_spacy_doc=st.PhraseMachinePhrases())
.build().compact(st.ClassPercentageCompactor(term_count=1)))
four_square_axes = st.FourSquareAxes(four_square_corpus_phrases,
left_categories=['Accept, Positive'],
right_categories=['Accept, Negative'],
top_categories=['Reject, Positive'],
bottom_categories=['Reject, Negative'],
labels = {'a': 'Positive',
'b': 'Review that was Contrary to Accpetance Decision',