Skip to content

Instantly share code, notes, and snippets.

@devin-petersohn
Last active March 17, 2021 00:35
Show Gist options
  • Save devin-petersohn/b680d0c167ada97baa33e0e93a4d8d2d to your computer and use it in GitHub Desktop.
Save devin-petersohn/b680d0c167ada97baa33e0e93a4d8d2d to your computer and use it in GitHub Desktop.
Modin SQL API Demo
import modin.pandas as pd
gstore_apps_df = pd.read_csv("https://tinyurl.com/googleplaystorecsv")
"SELECT App, Category, Rating FROM gstore_apps WHERE Price = '0'"
# Select columns
result_df1 = gstore_apps_df.loc[,: ['App', 'Category', 'Rating']]
# From the resulting dataframe filter where Price = 0
result_df = result_df1['Price'] == 0
import modin.sql as mdsql
sql_str = "SELECT App,Category,Rating FROM gstore_apps WHERE Price = '0'"
result_df = mdsql.query(sql_str, gstore_apps=gstore_apps_df)
query_str = "SELECT App, Category, Rating WHERE Price = '0'"
result_df = mdsql.query(query_str, from=gstore_apps_df)
gstore_reviews_df = pd.read_csv("https://tinyurl.com/gstorereviewscsv")
sql_str = (
"""SELECT category,
avg(sentiment_polarity) as avg_sentiment_polarity,
avg(sentiment_subjectivity) as avg_sentiment_subjectivity
FROM (
SELECT category, sentiment, sentiment_polarity
FROM google_items_df INNER JOIN gstore_reviews_df
ON google_apps_df.app = gstore_reviews_df.app
)
GROUP BY category
HAVING CAST(avg_sentiment_subjectivity as float) < 0.5
ORDER BY avg_sentiment_polarity DESC
LIMIT 10"""
)
result_df = mdsql.query(
sql_str,
google_apps_df=google_apps_df,
gstore_reviews_df=gstore_reviews_df
)
# join the items and reviews
result_df = mdsql.query(
"""SELECT category, sentiment, sentiment_polarity
FROM google_items_df INNER JOIN gstore_reviews_df
ON google_apps_df.app = gstore_reviews_df.app""",
google_apps_df=google_apps_df,
gstore_reviews_df=gstore_reviews_df
)
# group by category and calculate averages
result_df = mdsql.query(
"""SELECT category,
avg(sentiment_polarity) as avg_sentiment_polarity,
avg(sentiment_subjectivity) as avg_sentiment_subjectivity
GROUP BY category
HAVING CAST(avg_sentiment_subjectivity as float) < 0.5
ORDER BY avg_sentiment_polarity DESC
LIMIT 10""",
from=result_df
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment