Skip to content

Instantly share code, notes, and snippets.

@manichabba
Created August 23, 2016 20:45
Show Gist options
  • Select an option

  • Save manichabba/4564251b5ba18b2de0c6bec72c715238 to your computer and use it in GitHub Desktop.

Select an option

Save manichabba/4564251b5ba18b2de0c6bec72c715238 to your computer and use it in GitHub Desktop.
Counting Organizations This application will read the mailbox data (mbox.txt) count up the number email messages per organization (i.e. domain name of the email address) using a database with the following schema to maintain the counts.
import re
import sqlite3
conn = sqlite3.connect('emaildb.sqlite')
cur = conn.cursor()
cur.execute('''
DROP TABLE IF EXISTS Counts''')
cur.execute('''
CREATE TABLE Counts (org TEXT, count INTEGER)''')
fname = raw_input('Enter file name: ')
if ( len(fname) < 1 ) : fname = 'mbox.txt'
fh = open(fname)
for line in fh:
if not line.startswith('From: ') : continue
pieces = line.split()[1]
org = pieces.split('@')[1]
print org
cur.execute('SELECT count FROM Counts WHERE org = ? ', (org, ))
row = cur.fetchone()
if row is None:
cur.execute('''INSERT INTO Counts (org, count)
VALUES ( ?, 1 )''', ( org, ) )
else :
cur.execute('UPDATE Counts SET count=count+1 WHERE org = ?',
(org, ))
conn.commit()
sqlstr = 'SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10'
print
print "Counts:"
for row in cur.execute(sqlstr) :
print str(row[0]), row[1]
cur.close()
@SomayehNorouzi
Copy link

For those having difficulties in finding sqlite file, you can add the following block to the top of your code and this will set the working directory to your target folder where the generated sqlite file would be saved

Change the working directory to the exercise folder

exercise_folder = 'C:\sqlt\exercise'
os.chdir(exercise_folder)


import sqlite3
import os

Change the working directory to the exercise folder

exercise_folder = 'C:\sqlt\exercise'
os.chdir(exercise_folder)

SqliteCursor = sqlite3.connect('orgcountingdb.sqlite')
pter = SqliteCursor.cursor()

pter.execute('DROP TABLE IF EXISTS Counts')

pter.execute('CREATE TABLE Counts(org TEXT, count INTEGER)')

mboxdb = 'C:\sqlt\exercise\mbox.txt'
mydb = open(mboxdb)
for line in mydb:
if not line.startswith('From: '):
continue
pieces = line.split()
email = pieces[1]
org = email.split('@')[1]
pter.execute('SELECT count FROM Counts WHERE org = ? ', (org,))
row = pter.fetchone()
if row is None:
pter.execute('INSERT INTO Counts (org, count) VALUES (?, 1)', (org,))
else:
pter.execute('UPDATE Counts SET count = count + 1 WHERE org = ?', (org,))
SqliteCursor.commit()

sqlstr = 'SELECT org, count FROM Counts ORDER BY count DESC LIMIT 10'
for row in pter.execute(sqlstr):

print(str(row[0]), row[1])

with open('orgcountingdb.sql', 'w') as f:
for line in SqliteCursor.iterdump():
f.write('%s\n' % line)

pter.close()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment