Created
October 5, 2014 17:23
-
-
Save jwinterm/b83090d6e0b238e5b8c7 to your computer and use it in GitHub Desktop.
Python script to count number of users on btctalk thread
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests as req | |
# URL of the first page of target BTCtalk thread | |
targetURL = "https://bitcointalk.org/index.php?topic=788916.0" | |
# List variables used for collecting information | |
masterList = [] | |
tempList = [] | |
oldTempList = [] | |
# While loop that keeps querying thread until reaches repeat of users | |
while True: | |
try: | |
print(targetURL) | |
r = req.get(targetURL) | |
textList = r.text.split('\n') | |
for line in textList: | |
if "View the profile of" in line: | |
# print(line) | |
user = line.split('View the profile of ')[1].split('">')[0] | |
# print(userName) | |
if not user.isdigit(): | |
tempList.append(user) | |
if tempList != oldTempList: | |
for i in tempList: | |
masterList.append(i) | |
else: | |
break | |
except: | |
break | |
# Increment the URL to next 20 posts | |
listURL = targetURL.split('.') | |
listURL[3] = str(int(listURL[3])+20) | |
targetURL = '.'.join(listURL) | |
# Set oldTempList to templist, so if they're equal break loop | |
oldTempList = tempList | |
tempList = [] | |
# Create sorted set of unique entries in masterList | |
setList = sorted(set(masterList)) | |
# Create list of people that posted more than once and ten times | |
doubleList = [] | |
tenList = [] | |
for i in setList: | |
# print(i, rawList.count(i)) | |
if masterList.count(i) > 1: | |
doubleList.append(i) | |
if masterList.count(i) > 9: | |
tenList.append(i) | |
# Write data to file | |
f = open("userStats.txt", 'w') | |
f.write('{0:30s} {1:5d}\n'.format("Total number of posts: ", len(masterList))) | |
f.write('{0:30s} {1:5d}\n'.format("Total number of unique posters: ", len(setList))) | |
f.write('{0:30s} {1:5d}\n'.format("Total number of posters more than once: ", len(doubleList))) | |
f.write('{0:30s} {1:5d}\n'.format("Total number of posters at least 10x: ", len(tenList))) | |
f.write('{0:20s} {1:15s}\n'.format('Username', 'number of posts')) | |
for i in setList: | |
f.write('{0:20s} {1:5d}\n'.format(i, masterList.count(i))) | |
f.close() | |
print("Done") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment