Skip to content

Instantly share code, notes, and snippets.

@shadabshaukat
Last active April 4, 2021 01:45
Show Gist options
  • Select an option

  • Save shadabshaukat/71f375f415d4de6a05c4fad7943bb8df to your computer and use it in GitHub Desktop.

Select an option

Save shadabshaukat/71f375f415d4de6a05c4fad7943bb8df to your computer and use it in GitHub Desktop.
SQL Code for Tweets OCI Sentiment Analysis
CREATE TABLE "ADMIN"."TWEET_LIVE_STREAM"
( "ID" NUMBER,
"USERNAME" VARCHAR2(500 BYTE),
"TIMEWHYUPUNISHME" TIMESTAMP (6) DEFAULT systimestamp,
"TWEET_JSON" CLOB,
CONSTRAINT "ENSURE_JSON" CHECK (TWEET_JSON is JSON) ENABLE
)
CREATE OR REPLACE FORCE EDITIONABLE VIEW "ADMIN"."V_LIVE_TWITTER_FEED" ("CST_DATE", "UTC_DATE", "UTC_HOUR", "UTC_MINUTE", "ID", "CREATED_ON", "SCREEN_NAME", "LOCATION", "FOLLOWERS_CNT", "FRIENDS_CNT", "LISTED_CNT", "FAVOURITES_CNT", "STATUSES_CNT", "RETWEET_CNT", "FAVOURITE_CNT", "URL", "PROFILE_IMAGE_URL", "BANNER_IMAGE_URL", "HASHTAGS", "TWEET", "EMT_TIMEWHYUPUNISHME") AS
SELECT cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') at Time zone 'CST' as date) CST_DATE,
cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date) UTC_DATE,
cast(to_char(cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date),'HH24') as number) UTC_HOUR,
cast(to_char(cast(TO_TIMESTAMP_TZ(REPLACE(upper(b.created_on),'+0000','-00:00'),'DY MON DD HH24:MI:SS TZH:TZM YYYY') as date),'MI') as number) UTC_MINUTE,
a."ID",b."CREATED_ON",a."USERNAME",b."LOCATION",b."FOLLOWERS_CNT",b."FRIENDS_CNT",b."LISTED_CNT",b."FAVOURITES_CNT",b."STATUSES_CNT",
b."RETWEET_CNT",b."FAVOURITE_CNT",b."URL",b."PROFILE_IMAGE_URL",b."BANNER_IMAGE_URL",b."HASHTAGS",b."TWEET",
a.TIMEWHYUPUNISHME
FROM TWEET_LIVE_STREAM a,
json_table(tweet_json,
'$' columns(
id varchar(50) path '$.id',
created_on varchar2(100) path '$.created_at',
screen_name varchar2(200) path '$."user".screen_name',
location varchar2(250) path '$."user"."location"',
followers_cnt number path '$."user".followers_count',
friends_cnt number path '$."user".friends_count',
listed_cnt number path '$."user".listed_count',
favourites_cnt number path '$."user".favourites_count',
statuses_cnt number path '$."user".statuses_count',
retweet_cnt number path '$.retweet_count',
favourite_cnt number path '$.favorite_count',
url varchar2(250) path '$."user"."url"',
profile_image_url varchar2(500) path '$."user".profile_image_url',
banner_image_url varchar2(500) path '$."user".profile_banner_url',
hashtags varchar2(500) format json with wrapper path '$.entities.hashtags[*].text',
tweet varchar2(250) path '$.text'
-- nested path '$.entities.hashtags[*]' columns (ind_hashtag varchar2(30) path '$.text' )
)
) b
CREATE OR REPLACE VIEW CLEAN_TWEETS AS select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED;
-- SQL Queries --
select tweet from V_LIVE_TWITTER_FEED;
select count(*) from V_LIVE_TWITTER_FEED;
select * from TWEET_LIVE_STREAM;
-- Remove Punctuation & URL --
select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|(http[s]?://(.*?)(\s|$)))', '', 1, 0) "TWEETS" from V_LIVE_TWITTER_FEED;
-- Remove RT --
SELECT regexp_replace("TWEET", 'RT', '') "TWEET" FROM V_LIVE_TWITTER_FEED
-- Remove Punctuation, URL's & RT --
select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED;
CREATE OR REPLACE VIEW CLEAN_TWEETS AS select REGEXP_REPLACE("TWEET", '([[:digit:]*]|[[:punct:]*]|([RT])(.*?)(\s|$)|(http[s]?://(.*?)(\s|$)))', '',1,0) "TWEETS" from V_LIVE_TWITTER_FEED;
SELECT * FROM CLEAN_TWEETS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment