Created
November 7, 2015 15:07
-
-
Save swshan/051ff5ccebfcbf0decc1 to your computer and use it in GitHub Desktop.
http://zone.wooyun.org/content/23689, 获取Twitter的位置数据
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=UTF-8 | |
import urllib | |
from anonBrowser import * | |
import json | |
import re | |
import urllib2 | |
class reconPerson: | |
def __init__(self, handle): | |
self.handle = handle | |
self.tweets = self.get_tweets() | |
def get_tweets(self): | |
query = urllib.quote_plus('from:' + self.handle+' since:2009-01-01 include:retweets') | |
tweets = [] | |
browser = anonBrowser() | |
browser.anonymize() | |
response = browser.open('http://search.twitter.com/search.json?q=' + query) | |
json_objects = json.load(response) | |
for result in json_objects['results']: | |
new_result = {} | |
new_result['from_user'] = result['from_user_name'] | |
new_result['geo'] = result['geo'] | |
new_result['tweet'] = result['text'] | |
tweets.append(new_result) | |
return tweets | |
def find_interests(self): | |
interests = {} | |
interests['links'] = [] | |
interests['users'] = [] | |
interests['hashtags'] = [] | |
for tweet in self.tweets: | |
text = tweet['tweet'] | |
links = re.compile('(http.*?)\Z|(http.*?) ').findall(text) | |
for link in links: | |
if link[0]: | |
link = link[0] | |
elif link[1]: | |
link = link[1] | |
else:continue | |
try: | |
response = urllib2.urlopen(link) | |
full_link = response.url | |
interests['links'].append(full_link) | |
except: | |
pass | |
interests['users'] +=re.compile('(@\w+)').findall(text) | |
interests['hashtags'] +=re.compile('(#\w+)').findall(text) | |
interests['users'].sort() | |
interests['hashtags'].sort() | |
interests['links'].sort() | |
return interests | |
def twitter_locate(self, cityFile): | |
cities = [] | |
if cityFile != None: | |
for line in open(cityFile).readlines(): | |
city = line.strip('\n').strip('\r').lower() | |
cities.append(city) | |
locations = [] | |
locCnt = 0 | |
cityCnt = 0 | |
tweetsText = '' | |
for tweet in self.tweets: | |
if tweet['geo'] != None: | |
locations.append(tweet['geo']) | |
locCnt += 1 | |
tweetsText += tweet['tweet'].lower() | |
for city in cities: | |
if city in tweetsText: | |
locations.append(city) | |
cityCnt += 1 | |
return locations |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment