Created
May 8, 2012 14:42
-
-
Save aniemerg/2635735 to your computer and use it in GitHub Desktop.
Script to retrieve the Number of total patents granted at the end of the year(1980-2012)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to retrieve the Number of total patents granted at the end of the year | |
# for several years using the USPTO's search site | |
import urllib2 | |
import re | |
import datetime | |
import pprint | |
import time | |
# Choose the range of years | |
years = range(1980,2012) | |
results = [] | |
#Loop over years | |
for year in years: | |
#Find the last Tuesday of the Year | |
current = datetime.date(year,12,31) | |
if current.weekday() == 0: | |
#if New Years Eve falls on Monday, go back into previous week | |
current = current - datetime.timedelta(days=1) | |
current = current - datetime.timedelta(days=current.weekday()) + datetime.timedelta(days=1) | |
month = str(current.month) | |
day = str(current.day) | |
year = str(current.year) | |
# Create Search Address with Correct Search Options | |
search_address = "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm" | |
options = "&r=0&p=1&f=S&l=50&Query=+ISD/%s/%s/%s+AND+APT/1&d=PTXT" % (month, day, year) | |
the_add = search_address + options | |
# Query USPTO search | |
response = urllib2.urlopen(the_add) | |
html = response.read() | |
# Extract Patent Numbers and Sort | |
nums = re.findall('(\d),(\d\d\d),(\d\d\d)', html) | |
nums2 = [] | |
for num in nums: | |
nums2.append(''.join(num)) | |
sortednums = sorted(nums2) | |
highest = sortednums[len(sortednums)-1] | |
result = (year, highest) | |
results.append(result) | |
#Print out result | |
print "The Total Number of U.S. Patents issued as of the Year %s is: %s" % \ | |
(year, highest) | |
#Let's not flood the patent office | |
time.sleep(3) | |
#Save to File | |
outfile = open('PatentsGrantedbyYear.csv', 'w') | |
for result in results: | |
line = "%s, %s\n" % (result[0], result[1]) | |
outfile.write(line) | |
outfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment