Created
February 26, 2019 16:41
-
-
Save igorbrigadir/20f9d4fdf43fb495d4ea0ac7d1391d32 to your computer and use it in GitHub Desktop.
Extract Tweet ID components
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # This small script shows how to deconstruct a Twitter tweet id into its | |
| # various components. The tweet_components method accepts a tweet id and | |
| # returns a dict object with key / values representing the various | |
| # components of a tweet id. Each component has its own method detailing | |
| # how values are extracted from the tweet id. | |
| def melt_snowflake(snowflake_id): | |
| """return tuple of snowflake components given a tweet id""" | |
| timestamp_ms = ((snowflake_id >> 22) + 1288834974657) | |
| datacenter_id = (snowflake_id >> 17) & 0b11111 | |
| worker_id = (snowflake_id >> 12) & 0b11111 | |
| sequence_id = snowflake_id & 0b111111111111 | |
| # this is a combination of worker_id id and datacenter id | |
| machine_id = (snowflake_id >> 12) & 0b1111111111 | |
| return (timestamp_ms, datacenter_id, worker_id, sequence_id, machine_id) | |
| def tweet_components(tweet_id): | |
| parts = melt_snowflake(tweet_id) | |
| c = {} # Components | |
| c['creation_time_milli'] = parts[0] | |
| c['datacenter_id'] = parts[1] | |
| c['server_id'] = parts[2] | |
| c['sequence_id'] = parts[3] | |
| # combine worker and datacenter | |
| c['machine_id'] = parts[4] | |
| return(c) | |
| tweet_id = 1100125195476631553 # Must be an int and not str | |
| data = tweet_components(tweet_id) | |
| print(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment