Last active
August 29, 2015 14:00
-
-
Save gerbal/6149be791ab1222320fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
def randomMAC(): | |
mac = [0x00, 0x16, 0x3e, | |
random.randint(0x00, 0x7f), | |
random.randint(0x00, 0xff), | |
random.randint(0x00, 0xff)] | |
return ':'.join(map(lambda x: "%02x" % x, mac)) | |
def randomIP(): | |
not_valid = [10, 127, 169, 172, 192] | |
first = random.randrange(1, 256) | |
while first in not_valid: | |
first = random.randrange(1, 256) | |
ip = ".".join([str(first), str(random.randrange(1, 256)), | |
str(random.randrange(1, 256)), str(random.randrange(1, 256))]) | |
return ip | |
def strTimeProp(start, end, format, prop): | |
"""Get a time at a proportion of a range of two formatted times. | |
start and end should be strings specifying times formated in the | |
given format (strftime-style), giving an interval [start, end]. | |
prop specifies how a proportion of the interval to be taken after | |
start. The returned time will be in the specified format. | |
""" | |
stime = time.mktime(time.strptime(start, format)) | |
etime = time.mktime(time.strptime(end, format)) | |
ptime = stime + prop * (etime - stime) | |
return time.strftime(format, time.localtime(ptime)) | |
def randomDate(start, end, prop): | |
return strTimeProp(start, end, '%m/%d/%Y %I:%M %p', prop) | |
eightyIPs = [] | |
for i in range(80): | |
eightyIPs.append(randomIP()) | |
hosts = ["RunPapa.com", "lucidhost.com", | |
"hoststhemost.net", "freedomhostworks.su"] | |
site_owner = ["megacorp inc", "non-profic llc", "duetches web gbh"] | |
tld = [".com", ".gov", ".edu", ".org", ".net", ".io", ".hobbies", | |
".icann-messed-up-with-the-new-gTLDs", ".bogus", ".su", "co.uk"] | |
sites = ["nile", "brootoople", "alohationary", "pinkinternets", "unc", "plums", | |
"apples", "hotpants", "felinesrus", "email", "plantsforsale", "newtonianagronomy"] | |
file_extensions = [".exe", ".php", ".html", | |
".asp", ".xml", ".pdf", ".?=1234", ""] | |
pages = [ | |
"search", "about", "index", "billing", "admin", "how-do-i-stop-the-burning", "sesamestreet", | |
"courses", "watch", "404", "careers", "item", "pies", "shipping", "forks", "magicians-for-hire"] | |
device = ["mobile", "tablet", "desktop", "desktop", "mobile"] | |
browser = ["firefox", "chrome", "safari", "ie6", "mosaic"] | |
OS = ["windows", "iOS", "OSX", "Linux", "Android"] | |
referral = ["organic", "referral", "direct"] | |
city = ["springfield", "jonesboro", "paris", "miloud", "send-help", | |
"southbend", "unicode-error", "townville", "citytown"] | |
country = ["USA", "France", "Belize", "The Moon", | |
"United Kingdom", "Germany", "Canada"] | |
def gen_sites(): | |
return_sites = [] | |
domain_names = [] | |
while len(return_sites) <= 10: | |
random_domain = random.choice(sites) + random.choice(tld) | |
if random_domain not in domain_names: | |
return_sites.append( | |
[random_domain, random.choice(site_owner), random.choice(hosts)]) | |
domain_names.append(random_domain) | |
return return_sites | |
def gen_pages(sites_list): | |
return_pages = [] | |
for a_site in sites_list: | |
ret_list = [] | |
filenames_list = [] | |
while len(ret_list) < 10: | |
page_name = random.choice(pages) | |
page_ext = random.choice(file_extensions) | |
if page_name not in filenames_list: | |
filename = a_site[0] + "/" + page_name + page_ext | |
handback = [filename, page_name, a_site[0]] | |
filenames_list.append(page_name) | |
ret_list = ret_list + [handback] | |
return_pages = return_pages + ret_list | |
return return_pages | |
def gen_devices(): | |
return_devices = [] | |
for i in range(50): | |
device_desc = [] | |
device_desc = [randomMAC(), random.choice( | |
device), random.choice(browser), random.choice(OS)] | |
return_devices.append(device_desc) | |
return return_devices | |
def gen_visit(devices_list, pages_list, sites_list): | |
return_visits = [] | |
# visit = date_time, duration, ip_address, traffic_source, city, country, device_id, browser, OS, filename | |
for genned_page in pages_list: | |
for i in range(random.randrange(20)): | |
random_device = random.choice(devices_list) | |
visit_record = [randomDate("4/25/2014 12:00 am", "5/2/2014 5:00 pm", random.random()), str(int(random.expovariate(1.0 / (random.randint(1, 20))))), random.choice(eightyIPs), random.choice( | |
referral), random.choice(city), random.choice(country), random_device[0], random_device[2], random_device[3], genned_page[0]] | |
return_visits.append(visit_record) | |
return return_visits | |
def output_sql(table_name, meta_array): | |
giant_list = [] | |
for i in meta_array: | |
giant_list.append("','".join(i)) | |
table_str = "');\nINSERT INTO " + table_name + " VALUES('" | |
outstr = table_str.join(giant_list) | |
return "INSERT INTO " + table_name + " VALUES('" + outstr + "');" | |
db_sites_table = gen_sites() | |
db_pages_table = gen_pages(db_sites_table) | |
db_device_table = gen_devices() | |
db_visit_table = gen_visit(db_device_table, db_pages_table, db_sites_table) | |
populate_file = open("output.txt", "wb+") | |
websites_sql = output_sql("Website", db_sites_table) | |
webpage_sql = output_sql("web_Page", db_pages_table) | |
device_sql = output_sql("Device", db_device_table) | |
visit_sql = output_sql("visit", db_visit_table) | |
populate_file.write(websites_sql + "\n" + | |
webpage_sql + "\n" + device_sql + "\n" + visit_sql) | |
populate_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment