Created
June 10, 2012 01:10
-
-
Save aniemerg/2903364 to your computer and use it in GitHub Desktop.
Counts various statistics of claims, such as total claims, number of independent claims, dependent claims
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ClaimCounts() | |
# Counts various statistics of claims, such as total claims, | |
# number of independent claims, dependent claims | |
# files to create database can be found at: | |
# https://github.com/aniemerg/Patent-Tools | |
# written on May 30, 2012 for a blog post | |
import MySQLdb as mdb | |
import sys | |
import datetime | |
import re | |
from pprint import pprint | |
import math | |
def ClaimCounts(): | |
print "Started Counting Claims at Time:" | |
print str(datetime.datetime.now()) | |
#mdb.connect('localhost', 'username', 'password', 'database'); | |
con = mdb.connect('localhost', 'root', 'password', 'database'); | |
with con: | |
cur = con.cursor() | |
cur.execute("SELECT USPatents.claims from USPatents LIMIT 10000") | |
rows = cur.fetchall() | |
#dictionary for the number of claims | |
totalclaims = {} | |
#dictionary for the number of independent claims | |
indclaims = {} | |
#dictionary for the number of dependent claims | |
depclaims = {} | |
#Process the claims | |
for x in rows: | |
claims = [] | |
claims = re.split("\d\.\ ", x[0]) | |
claims = claims[1:] | |
totalclaims[len(claims)] = totalclaims.get(len(claims),0) + 1 | |
indclaimsc = 0 | |
for claim in claims: | |
if re.findall('claim', claim) == []: | |
indclaimsc += 1 | |
indclaims[indclaimsc] = indclaims.get(indclaimsc, 0) +1 | |
if len(claims) < indclaimsc: | |
print "There is a serious error: More claims than independent claims were found!" | |
depclaims[len(claims) - indclaimsc] = depclaims.get(len(claims) - indclaimsc, 0) + 1 | |
#Now write this stuff out to a file | |
#Save Total Claims to File | |
outfile = open('Total_Claim_Counts.csv', 'w') | |
results = totalclaims.items() | |
results.sort() | |
for result in results: | |
line = "[\'%s\', %s],\n" % (result[0], result[1]) | |
outfile.write(line) | |
outfile.close() | |
#Save Independent Claims to File | |
outfile = open('Independent_Claim_Counts.csv', 'w') | |
results = indclaims.items() | |
results.sort() | |
for result in results: | |
line = "[\'%s\', %s],\n" % (result[0], result[1]) | |
outfile.write(line) | |
outfile.close() | |
#Save Independent Claims to File | |
outfile = open('Dependent_Claim_Counts.csv', 'w') | |
results = depclaims.items() | |
results.sort() | |
for result in results: | |
line = "[\'%s\', %s],\n" % (result[0], result[1]) | |
outfile.write(line) | |
outfile.close() | |
if __name__ == '__main__': | |
ClaimCounts() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment