Last active
April 4, 2021 01:45
-
-
Save shadabshaukat/71f375f415d4de6a05c4fad7943bb8df to your computer and use it in GitHub Desktop.
SQL Code for Tweets OCI Sentiment Analysis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| CREATE TABLE "ADMIN"."TWEET_LIVE_STREAM" | |
| ( "ID" NUMBER, | |
| "USERNAME" VARCHAR2(500 BYTE), | |
| "TIMEWHYUPUNISHME" TIMESTAMP (6) DEFAULT systimestamp, | |
| "TWEET_JSON" CLOB, | |
| CONSTRAINT "ENSURE_JSON" CHECK (TWEET_JSON is JSON) ENABLE | |
| ) | |
| CREATE OR REPLACE FORCE EDITIONABLE VIEW "ADMIN"."V_LIVE_TWITTER_FEED" ("CST_DATE", "UTC_DATE", "UTC_HOUR", "UTC_MINUTE", "ID", "CREATED_ON", "SCREEN_NAME", "LOCATION", "FOLLOWERS_CNT", "FRIENDS_CNT", "LISTED_CNT", "FAVOURITES_CNT", "STATUSES_CNT", "RETWEET_CNT", "FAVOURITE_CNT", "URL", "PROFILE_IMAGE_URL", "BANNER_IMAGE_URL", "HASHTAGS", "TWEET", "EMT_TIMEWHYUPUNISHME") AS | |
| SELECT cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') at Time zone 'CST' as date) CST_DATE, | |
| cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date) UTC_DATE, | |
| cast(to_char(cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date),'HH24') as number) UTC_HOUR, | |
| cast(to_char(cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date),'MI') as number) UTC_MINUTE, | |
| a."ID",b."CREATED_ON",a."USERNAME",b."LOCATION",b."FOLLOWERS_CNT",b."FRIENDS_CNT",b."LISTED_CNT",b."FAVOURITES_CNT",b."STATUSES_CNT", | |
| b."RETWEET_CNT",b."FAVOURITE_CNT",b."URL",b."PROFILE_IMAGE_URL",b."BANNER_IMAGE_URL",b."HASHTAGS",b."TWEET", | |
| a.TIMEWHYUPUNISHME | |
| FROM TWEET_LIVE_STREAM a, | |
| json_table(tweet_json, | |
| '$' columns( | |
| id varchar(50) path '$.id', | |
| created_on varchar2(100) path '$.created_at', | |
| screen_name varchar2(200) path '$."user".screen_name', | |
| location varchar2(250) path '$."user"."location"', | |
| followers_cnt number path '$."user".followers_count', | |
| friends_cnt number path '$."user".friends_count', | |
| listed_cnt number path '$."user".listed_count', | |
| favourites_cnt number path '$."user".favourites_count', | |
| statuses_cnt number path '$."user".statuses_count', | |
| retweet_cnt number path '$.retweet_count', | |
| favourite_cnt number path '$.favorite_count', | |
| url varchar2(250) path '$."user"."url"', | |
| profile_image_url varchar2(500) path '$."user".profile_image_url', | |
| banner_image_url varchar2(500) path '$."user".profile_banner_url', | |
| hashtags varchar2(500) format json with wrapper path '$.entities.hashtags[*].text', | |
| tweet varchar2(250) path '$.text' | |
| -- nested path '$.entities.hashtags[*]' columns (ind_hashtag varchar2(30) path '$.text' ) | |
| ) | |
| ) b | |
| CREATE OR REPLACE VIEW CLEAN_TWEETS AS select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED; | |
| -- SQL Queries -- | |
| select tweet from V_LIVE_TWITTER_FEED; | |
| select count(*) from V_LIVE_TWITTER_FEED; | |
| select * from TWEET_LIVE_STREAM; | |
| -- Remove Punctuation & URL -- | |
| select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|(http[s]?://(.*?)(\s|$)))', '', 1, 0) "TWEETS" from V_LIVE_TWITTER_FEED; | |
| -- Remove RT -- | |
| SELECT regexp_replace("TWEET", 'RT', '') "TWEET" FROM V_LIVE_TWITTER_FEED | |
| -- Remove Punctuation, URL's & RT -- | |
| select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED; | |
| CREATE OR REPLACE VIEW CLEAN_TWEETS AS select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED; | |
| SELECT * FROM CLEAN_TWEETS | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment