produnis · June 27, 2012 13:58 · produnis · Jun 28, 2012
diff --git a/_README b/_README
 #------------------------------------------------------------------------------------------
 #----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
 #----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
 #------------------------------------------------------------------------------------------

 This projects aims to plot your personal mail-traffic and calendar-events, inspired by Wolfram's 
 blogpost "Analytics of My Life".

 It uses python-scripts to collect your data
 and GNU R (http://www.r-project.org) to plot it.

 This will only work, if you 
    a) use Thunderbird as your mail-client, 
    b) manage your calendars with OwnCloud/MySQL or have it stored as *.ics-Files.

 The data we are interested in are DATETIMES of any mails or events. We will neither grep 
 descriptions, tiles, nor authors or anything else, just DATETIMEs.
 The Python-Scripts will create texttable-files, wich will contain the DATETIMEs of any mail- or 
 calendar-events as "YYYYMMDD, HHMMSS". Each line represents a single mail or event.

 GNU R can use these texttable-files to import the data. If you haven't installed GNU R yet,
 visits its homepage to do so: http://www.r-project.org
 On Ubuntu, just type in a terminal: "sudo apt-get install r-recommended"


 #### How to proceed ######

 I. Maildata
 #----------
 There are two ways to collect your maildata: 
 a) get all maildata of all imap-(sub)folders at once
 b) get maildata for one imap-folder only

 a) if you like to create a texttabel containing DATETIMES of all mails in all IMAP-(sub)folders, 
   use the script "analyzeMails.py" (see the scripts head for how to use it).
 b) if you like to create a texttable with DATETIMES of a specific IMAP-folder, user the script 
   "mailscanner.py" (see the scripts head for how to use it)

 Import your data into R and plot your stuff. See "My_R-Code_for_Mail.R" for how to do it.

 II. Calendar events
 #------------------
 THere are two ways to collect your calendar-events:
 a) you have all Calendars in an OwnCloud-instance using MySQL
 b) you have an *.ics-file for any calendar

 a) if you have an OwnCloud-instance using MySQL, use "owncloudevents.py" to create DATETIME-
   texttables for every calendar. If you use OwnCloud with SQLite, this will *not* work.
 b) if you have your calendars as *.ics-files, use "terminscanner.py" (have a look at the script's 
   head for how to use it) to create your texttable files.


 Import your data into R and plot your stuff. See "The_R-Code_for_Calendar.R" for how to do it.
diff --git a/analyzeMails.py b/analyzeMails.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 #-----------------------------------------------------------------------------------------
 #----- Ich mache ne Faxe wie hier:
 #----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
 #----- 
 #----- Dies ist eine Anpassung von 'mailscanner.py':
 #----- 2012-06-28 - Micha (mail@kleinschnitker.com) 
 #----- All mboxes are analyzed rekursiv 
 #----- Thanks Micha !!!
 #----- Example call: 
 #------ cd ~/.thunderbird/PROFILE.default/ImapMail/imap.produnis.de 
 #-----  /path/to/analyzeMails.py .
 #----- Licence: GPL
 #------------------------------------------------------------------------------------------

 import sys
 import os
 import string
 import sys
 import time
 import datetime
 from mailbox import mbox as MBox
 from email.utils import parsedate


 mBoxes = [] # Paths of all mBoxes

 ''' Analyze all mails of submitted mboxes and write result to file'''
 def analyzeMBox(dir, file):
    noOfMails = 0
    mbox = MBox(dir)
    for msg in mbox:
        noOfMails = noOfMails + 1 
        try:  # check if mail is corrupt... 
            maildate = time.strftime('%Y%m%d', parsedate(msg.get("date")))
            mailtime = time.strftime('%H%M%S', parsedate(msg.get("date")))
        except:
            file.write(str('NA,NA\n')) # if corrupt, create NAs (missing values) for GNU_R
        else:
            file.write(str("%s,%s\n") % (maildate, mailtime))
    return noOfMails # Number of Mails in mBox


 ''' Get all mBoxes rekursiv from submitted folder'''
 def findMBoxes(dir):
    join = os.path.join
    for cur in os.listdir(dir): # For any element in current directory
        pathname = join(dir, cur) # put path 
        if os.path.isdir(pathname): # check if path is a folder
            if string.find(cur, '.sbd') != -1: # Get all folders with sbd-suffix
                findMBoxes(pathname) # Function rekursiv
        elif os.path.isfile(pathname): # current element is a file 
            if(string.find(cur, '.') == -1): # current element has no "."
                mBoxes.append(pathname) # put dirname to list        

 def main():
    numberOfMails = 0 # number of Mails
    findMBoxes(sys.argv[1]) # Find all mBoxes 
    mBoxes.sort() # mBoxes sort

    print('Mailbox enthaelt %d mBoxes' % len(mBoxes))

    dest = ("mBox-Summary-%s.txt") % (sys.argv[1]) # Name of Output-File
    file = open(dest,"w") # Open file for writing
    for i in mBoxes: # For evey mBox
        print(i)
        numberOfMails = numberOfMails + analyzeMBox(i,file) # Analyze Mails in mBox   
    file.close() # close file
    
    print('%d Mails analysiert' % numberOfMails)


 if  __name__ == "__main__":
    main()
 ## END OF FILE
diff --git a/mailscanner.py b/mailscanner.py
 #!/usr/bin/python 
 # mailscanner.py  Version 1
 #------------------------------------------------------------------------------------------
 #----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
 #----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
 #------------------------------------------------------------------------------------------
 # call this script like this:
 # /path/to/mailscanner.py MAILBOX APPENDIX
 # where MAILBOX is the path to your thunderbird mailboxfile, most likely  
 #              ~/.thunderbird/foobar.default/ImapMail/imap.foo.bar/INBOX
 # but may be any other mailfolder-file,
 # and where APPENDIX is the appendix of the outputfile to be created by this script 
 # (and which contains MAILDATE and MAILTIME for each mail in a texttable for GNU R)
 #--------------------------------------------------------------------------------------
 import sys
 import time
 import datetime
 from mailbox import mbox as MBox
 from email.utils import parsedate
 #---------------------------------
 mailboxname = sys.argv[1] # e.g.("~/.thunderbird/foobar.default/ImapMail/imap.foo.bar/INBOX")
 myfilename  = sys.argv[2] # outputfile appendix
 mbox = MBox(mailboxname)
 ZIEL = ("MyMail-%s.txt") % (myfilename) # filename of textable created by this script
 SCHREIB = open(ZIEL,"w")
 #-----------------------------------
 for msg in mbox:
  try:  # test if mail is corrupt
    maildate = time.strftime('%Y%m%d', parsedate(msg.get("date")))
    mailtime = time.strftime('%H%M%S', parsedate(msg.get("date")))
    print maildate, ",", mailtime
  except:
    SCHREIB.write(str('NA,NA\n')) # if corrupt, create NAs (missing values) for GNU_R
  else:
    SCHREIB.write(str("%s,%s\n") % (maildate, mailtime))
 SCHREIB.close()
 ## END OF FILE
diff --git a/My_R-Code_for_Mail.R b/My_R-Code_for_Mail.R
 # Read texttable into R
 # You need to specify the path to textfile (created with mailscanner.py) 
 # in the command below
 inbox <- read.table("/Path/to/MyMail-inbox.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(inbox) <- c("Datum","Uhrzeit") # give names to colums
 inbox$Datum <- strptime(inbox$Datum,format="%Y%m%d") # turn into format DATE
 inbox$Uhrzeit <- strptime(inbox$Uhrzeit, format="%H%M%S") # turn into format TIME
 plot(inbox$Datum, inbox$Uhrzeit) # plot the stuff
 #
 # Add data of another texttable
 # You need to specify the path to textfile in the command below
 test <- read.table("/Pfad/zu/MyMail-outbox.txt", sep=",", colClasses = "character") # read another texttable
 colnames(test) <- c("Datum","Uhrzeit")
 test$Datum <- strptime(test$Datum,format="%Y%m%d")
 test$Uhrzeit <- strptime(test$Uhrzeit, format="%H%M%S")
 points(test$Datum, test$Uhrzeit) # add data to plot
 #
 #
 #
 # Use Demotable "Produnis" from the internet
 produnis <- read.table("http://www.produnis.de/R/MAILLISTE.TXT", sep=",", colClasses = "character") # read as characters
 colnames(produnis) <- c("Datum","Uhrzeit") # give names to columns
 produnis$Datum <- strptime(produnis$Datum,format="%Y%m%d") # turn into format DATE
 produnis$Uhrzeit <- strptime(produnis$Uhrzeit, format="%H%M%S") # turn into format TIME
 plot(produnis$Datum, produnis$Uhrzeit) # plot it
 produnis <- produnis[-which(produnis$Datum==min(produnis$Datum,na.rm=T)),] #remove outsider of 1970s
 plot(produnis$Datum, produnis$Uhrzeit) # plot it again
 #remove duplicates and plot again
 produnis <- produnis[-which(duplicated(produnis)),]
 plot(produnis$Datum, produnis$Uhrzeit) # plot it again
 #----------------------------
 #
 ### Plot Mailsums per day
 produnis <- read.table("http://www.produnis.de/R/MAILLISTE.TXT", sep=",", colClasses = "character") # read as characters
 colnames(produnis) <- c("Datum","Uhrzeit") # name Colums
 # dont turn colums into DATE format! this specific plot works with character data only

 produnis <- produnis[-which(produnis$Datum==min(produnis$Datum,na.rm=T)),] #remove outsider of 1970s
 plot(strptime(levels(as.factor(produnis$Datum)),format="%Y%m%d"),as.vector(table(as.factor(produnis$Datum))),type="l")
 #remove duplicates and plot again
 produnis <- produnis[-which(duplicated(produnis)),]
 plot(strptime(levels(as.factor(produnis$Datum)),format="%Y%m%d"),as.vector(table(as.factor(produnis$Datum))),type="l")
 #
 plot(strptime(produnis$Datum,format="%Y%m%d"),strptime(produnis$Uhrzeit,format="%H%M%S"),pch=19,cex=0.2,col=rgb(0,0,0,0.2))


 ## Do stuff with ggplot
 install.packages("ggplot2",dependencies=TRUE)# install ggplot2
 library(ggplot2) # activate ggplot2
 produnis$Datum <- strptime(produnis$Datum,format="%Y%m%d") # turn into format DATE
 produnis$Uhrzeit <- strptime(produnis$Uhrzeit, format="%H%M%S") # turn into format TIME
 g <- ggplot(produnis,aes(x=Datum,y=Uhrzeit)) # create ggplot-object
  g + geom_point(alpha=0.3) +  # draw points with transparency 0.3
      scale_x_datetime(limits=c(as.POSIXct("2004",format="%Y"),as.POSIXct("2012",format="%Y"))) + #limit x-axis
      stat_density2d(color="#88EEAA") # draw density circles
diff --git a/owncloudevents.py b/owncloudevents.py
 #!/usr/bin/python 
 # owncloudevents.py  Version 1
 #------------------------------------------------------------------------------------------
 #----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
 #----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
 #------------------------------------------------------------------------------------------
 # If you use OwnCloud with MySQL to manage your calendars, you may use this script
 # instead of 'terminscanner.py'
 # WORKS ONLY WITH MYSQL, *NOT* SQLITE
 #
 # call this script like this:
 # /path/to/owncloudevents.py
 # This script will scan all events of all calendars in your OwnCloud-MySQL database.
 # For each calendar, a texttable-file will be created 
 # which will contain DATE and TIME for every calendar-entry for GNU R import)
 #--------------------------------------------------------------------------------------
 import sys
 import MySQLdb
 import os,re

 try:
   conn = MySQLdb.connect (host = "localhost",  #
                             user = "dbuser",   # change to your mysql user
                             passwd = "dbpass", # and password
                             db = "owncloud")   # database
 except MySQLdb.Error, e:
   print "Error %d: %s" % (e.args[0], e.args[1])
   sys.exit (1)
 cursor = conn.cursor (MySQLdb.cursors.DictCursor)
 cursor.execute ("SELECT id, displayname FROM oc_calendar_calendars")
 result_set = cursor.fetchall ()
 calendar_rowcount = cursor.rowcount
 for row in result_set:
   print "%s, %s" % (row["id"], row["displayname"].replace(" ", "_", 1))
   cursor.execute ("SELECT startdate FROM oc_calendar_objects WHERE calendarid = '%s'" % (row["id"]))
   result_set2 = cursor.fetchall ()
   ZIEL = ("MyCalendar-%s.txt") % (row["displayname"].replace(" ", "_", 1))
   SCHREIB = open(ZIEL,"w")
   for row2 in result_set2:
        eventdata = "%s" % (row2["startdate"])
        print eventdata
        eventdate = eventdata[0:10]
        eventdate = eventdate.replace("-", "", 2)
        print eventdate
        eventtime = eventdata[11:19]
        eventtime = eventtime.replace(":","",2)
        print eventtime
        SCHREIB.write(str("%s,%s\n") % (eventdate, eventtime))
   print "Number of Events parsed: %d" % cursor.rowcount
   SCHREIB.close()
 print "Number of Calendars parsed: %d" % calendar_rowcount
 cursor.close ()
 ##END OF FILE
diff --git a/terminscanner.py b/terminscanner.py
 #!/usr/bin/python 
 # terminscanner.py  Version 1
 #------------------------------------------------------------------------------------------
 #----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
 #----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
 #------------------------------------------------------------------------------------------
 # call this script like this:
 # /path/to/terminscanner.py CALENDAR APPENDIX
 # where CALENDAR is the path to your ics-file,   
 #              /foo/bar/calendar.ics
 # and where APPENDIX is the appendix of the outputfile to be created by this script 
 # (and which will contain DATE and TIME for each calendar-entry in a texttable for GNU R)
 #--------------------------------------------------------------------------------------
 # IMPORTS #
 import os,re
 import sys
 import time
 kalendername = sys.argv[1] # e.g.("/foo/bar/calendar.ics")
 myfilename  =  sys.argv[2] # Filename-Appendix, e.g "Office"

 # modify to your needs
 ZIEL = ("MyCalendar-%s.txt") % (myfilename) # change filename-prefix to your needs


 ### nothing to edit from here on, leave it alone ...
 ### My Functions and Routines
 #-- BashReturn benoetigt:  os ---------------------------------------
 def BashReturn(cmd):
        output = "a" # is a dummy, will be deleted afterwards
        f=os.popen(cmd)
        for i in f.readlines():
                output = output + i
        output = output[1:] # kill the dummy-a
        return output
 #-------------------------------------------------------------------
 #==============End of Functions ========================================

 SCHREIB = open(ZIEL,"w")
 cmd = 'less %s|grep DTSTART' % (kalendername)
 bla = BashReturn(cmd)
 for line in bla.split('\n'):
  if len(line)!=0: # check if line is an entry
    if line[len(line)-8]=="T": # check if it has TIME
      s = len(line)-7  # TIME
      z = len(line)-1  # TIME
      x = len(line)-16 # DATE
      y = len(line)-8  # DATE
      termintime = line[s:z]
      termindate = line[x:y]
      #print termindate
      #print termintime
    else: # it has DATE only 
      termintime = "000000"
      x = len(line)-9 # DATE
      y = len(line)-1 # DATE
      termindate = line[x:y]
  else: # is no real entry
      termintime = "NA"
      termindate = "NA"   
  SCHREIB.write(str("%s,%s\n") % (termindate, termintime))
 SCHREIB.close()  
 ## END OF FILE
diff --git a/The_R-Code_for_Calendar.R b/The_R-Code_for_Calendar.R
 # Read texttable into R
 # You need to specify the path to textfile (created with terminscanner.py) 
 # in the command below
 mycalendar <- read.table("/Path/to/MyCalendar-foobar.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(mycalendar) <- c("Datum","Uhrzeit") # give names to colums
 mycalendar$Datum <- strptime(inbox$Datum,format="%Y%m%d") # turn into format DATE
 mycalendar$Uhrzeit <- strptime(inbox$Uhrzeit, format="%H%M%S") # turn into format TIME
 plot(mycalendar$Datum, mycalendar$Uhrzeit) # plot the stuff



 ### demo

 privat <- read.table("http://www.produnis.de/R/CALENDAR1.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(privat) <- c("Datum","Uhrzeit") # give names to colums
 privat$Datum <- strptime(privat$Datum,format="%Y%m%d") # turn into format DATE
 privat$Uhrzeit <- strptime(privat$Uhrzeit, format="%H%M%S") # turn into format TIME
 plot(privat, xlim=c(as.POSIXct(strptime("20020801",format="%Y%m%d")), as.POSIXct(strptime("20120701",format="%Y%m%d"))))

 #
 lecture <- read.table("http://www.produnis.de/R/CALENDAR2.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(lecture) <- c("Datum","Uhrzeit") # give names to colums
 lecture$Datum <- strptime(lecture$Datum,format="%Y%m%d") # turn into format DATE
 lecture$Uhrzeit <- strptime(lecture$Uhrzeit, format="%H%M%S") # turn into format TIME
 #
 uni <- read.table("http://www.produnis.de/R/CALENDAR3.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(uni) <- c("Datum","Uhrzeit") # give names to colums
 uni$Datum <- strptime(uni$Datum,format="%Y%m%d") # turn into format DATE
 uni$Uhrzeit <- strptime(uni$Uhrzeit, format="%H%M%S") # turn into format TIME
 #

 plot(privat, col="darkgreen", xlim=c(as.POSIXct(strptime("20020801",format="%Y%m%d")), as.POSIXct(strptime("20120701",format="%Y%m%d"))))
 points(lecture, col="red")
 points(uni, col="darkblue")
 legend("bottomleft",legend=c("Privat","Lectures","Uni"),fill=c("darkgreen","red","darkblue"),inset=.03)


 ### Entry-Sums per day
 # read tables again, and
 # dont turn colums into DATE format! this specific plot works with character data only
 privat <- read.table("http://www.produnis.de/R/CALENDAR1.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(privat) <- c("Datum","Uhrzeit") # give names to colums
 #
 lecture <- read.table("http://www.produnis.de/R/CALENDAR2.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(lecture) <- c("Datum","Uhrzeit") # give names to colums
 #
 uni <- read.table("http://www.produnis.de/R/CALENDAR3.txt", sep=",", colClasses = "character") # Read table as characters
 colnames(uni) <- c("Datum","Uhrzeit") # give names to colums
 #
 plot(strptime(levels(as.factor(privat$Datum)),format="%Y%m%d"),as.vector(table(as.factor(privat$Datum))),type="l",col="darkgreen")
 points(strptime(levels(as.factor(lecture$Datum)),format="%Y%m%d"),as.vector(table(as.factor(lecture$Datum))),type="l",col="red")
 points(strptime(levels(as.factor(uni$Datum)),format="%Y%m%d"),as.vector(table(as.factor(uni$Datum))),type="l",col="darkblue")
 #
 #
 allcalendar <- rbind(privat,uni,lecture)
 plot(strptime(levels(as.factor(allcalendar$Datum)),format="%Y%m%d"),as.vector(table(as.factor(allcalendar$Datum))),type="l",col="black")
 #
 # remove outliners of 1970er from plot
 plot(strptime(levels(as.factor(allcalendar$Datum)),format="%Y%m%d"),as.vector(table(as.factor(allcalendar$Datum))),type="l",col="black",xlim=c(as.POSIXct(strptime("20020801",format="%Y%m%d")), as.POSIXct(strptime("20120701",format="%Y%m%d"))),xaxt="n")
 axis.POSIXct(1,at=seq(as.Date("20020101",format="%Y%m%d"),as.Date("20130101",format="%Y%m%d"),by="years"),format="%Y",labels=TRUE)

 # alternative:
 # remove outliners of 1970er from dataset
 allcalendar <- allcalendar[-grep("^197",allcalendar$Datum),]
 plot(strptime(levels(as.factor(allcalendar$Datum)),format="%Y%m%d"),as.vector(table(as.factor(allcalendar$Datum))),type="l",col="black")

 ## Do stuff with ggplot
 install.packages("ggplot2",dependencies=TRUE)# install ggplot2
 library(ggplot2) # activate ggplot2
 #
 privat.f <- cbind(privat, "Privat")#   add a colum to privat
 lecture.f <- cbind(lecture,"Lecture")# add a colum 
 uni.f <- cbind(uni, "Uni")           # add a colum
 colnames(privat.f) <- c("Datum","Uhrzeit", "Kalender")  # give names to colums
 colnames(lecture.f) <- c("Datum","Uhrzeit", "Kalender") # give names to colums
 colnames(uni.f) <- c("Datum","Uhrzeit", "Kalender")     # give names to colums
 calendars <- data.frame(rbind(privat.f,uni.f,lecture.f)) # create a data.frame
 calendars$Datum <- strptime(calendars$Datum,format="%Y%m%d") # format time
 calendars$Uhrzeit <- strptime(calendars$Uhrzeit, format="%H%M%S") # format time
 #
 g <- ggplot(calendars,aes(x=Datum,y=Uhrzeit,color=Kalender)) # create ggplot-object
  g + geom_point() + # plot data as points  
      scale_x_datetime(limits=c(as.POSIXct("2002",format="%Y"),as.POSIXct("2012",format="%Y"))) + # limit x-axis
      stat_density2d() # draw densities
	#------------------------------------------------------------------------------------------
	#----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
	#----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
	#------------------------------------------------------------------------------------------

	This projects aims to plot your personal mail-traffic and calendar-events, inspired by Wolfram's
	blogpost "Analytics of My Life".

	It uses python-scripts to collect your data
	and GNU R (http://www.r-project.org) to plot it.

	This will only work, if you
	a) use Thunderbird as your mail-client,
	b) manage your calendars with OwnCloud/MySQL or have it stored as *.ics-Files.

	The data we are interested in are DATETIMES of any mails or events. We will neither grep
	descriptions, tiles, nor authors or anything else, just DATETIMEs.
	The Python-Scripts will create texttable-files, wich will contain the DATETIMEs of any mail- or
	calendar-events as "YYYYMMDD, HHMMSS". Each line represents a single mail or event.

	GNU R can use these texttable-files to import the data. If you haven't installed GNU R yet,
	visits its homepage to do so: http://www.r-project.org
	On Ubuntu, just type in a terminal: "sudo apt-get install r-recommended"


	#### How to proceed ######

	I. Maildata
	#----------
	There are two ways to collect your maildata:
	a) get all maildata of all imap-(sub)folders at once
	b) get maildata for one imap-folder only

	a) if you like to create a texttabel containing DATETIMES of all mails in all IMAP-(sub)folders,
	use the script "analyzeMails.py" (see the scripts head for how to use it).
	b) if you like to create a texttable with DATETIMES of a specific IMAP-folder, user the script
	"mailscanner.py" (see the scripts head for how to use it)

	Import your data into R and plot your stuff. See "My_R-Code_for_Mail.R" for how to do it.

	II. Calendar events
	#------------------
	THere are two ways to collect your calendar-events:
	a) you have all Calendars in an OwnCloud-instance using MySQL
	b) you have an *.ics-file for any calendar

	a) if you have an OwnCloud-instance using MySQL, use "owncloudevents.py" to create DATETIME-
	texttables for every calendar. If you use OwnCloud with SQLite, this will not work.
	b) if you have your calendars as *.ics-files, use "terminscanner.py" (have a look at the script's
	head for how to use it) to create your texttable files.


	Import your data into R and plot your stuff. See "The_R-Code_for_Calendar.R" for how to do it.
	#!/usr/bin/python
	# -- coding: utf-8 --
	#-----------------------------------------------------------------------------------------
	#----- Ich mache ne Faxe wie hier:
	#----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
	#-----
	#----- Dies ist eine Anpassung von 'mailscanner.py':
	#----- 2012-06-28 - Micha (mail@kleinschnitker.com)
	#----- All mboxes are analyzed rekursiv
	#----- Thanks Micha !!!
	#----- Example call:
	#------ cd ~/.thunderbird/PROFILE.default/ImapMail/imap.produnis.de
	#----- /path/to/analyzeMails.py .
	#----- Licence: GPL
	#------------------------------------------------------------------------------------------

	import sys
	import os
	import string
	import sys
	import time
	import datetime
	from mailbox import mbox as MBox
	from email.utils import parsedate


	mBoxes = [] # Paths of all mBoxes

	''' Analyze all mails of submitted mboxes and write result to file'''
	def analyzeMBox(dir, file):
	noOfMails = 0
	mbox = MBox(dir)
	for msg in mbox:
	noOfMails = noOfMails + 1
	try: # check if mail is corrupt...
	maildate = time.strftime('%Y%m%d', parsedate(msg.get("date")))
	mailtime = time.strftime('%H%M%S', parsedate(msg.get("date")))
	except:
	file.write(str('NA,NA\n')) # if corrupt, create NAs (missing values) for GNU_R
	else:
	file.write(str("%s,%s\n") % (maildate, mailtime))
	return noOfMails # Number of Mails in mBox


	''' Get all mBoxes rekursiv from submitted folder'''
	def findMBoxes(dir):
	join = os.path.join
	for cur in os.listdir(dir): # For any element in current directory
	pathname = join(dir, cur) # put path
	if os.path.isdir(pathname): # check if path is a folder
	if string.find(cur, '.sbd') != -1: # Get all folders with sbd-suffix
	findMBoxes(pathname) # Function rekursiv
	elif os.path.isfile(pathname): # current element is a file
	if(string.find(cur, '.') == -1): # current element has no "."
	mBoxes.append(pathname) # put dirname to list

	def main():
	numberOfMails = 0 # number of Mails
	findMBoxes(sys.argv[1]) # Find all mBoxes
	mBoxes.sort() # mBoxes sort

	print('Mailbox enthaelt %d mBoxes' % len(mBoxes))

	dest = ("mBox-Summary-%s.txt") % (sys.argv[1]) # Name of Output-File
	file = open(dest,"w") # Open file for writing
	for i in mBoxes: # For evey mBox
	print(i)
	numberOfMails = numberOfMails + analyzeMBox(i,file) # Analyze Mails in mBox
	file.close() # close file

	print('%d Mails analysiert' % numberOfMails)


	if __name__ == "__main__":
	main()
	## END OF FILE
	#!/usr/bin/python
	# mailscanner.py Version 1
	#------------------------------------------------------------------------------------------
	#----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
	#----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
	#------------------------------------------------------------------------------------------
	# call this script like this:
	# /path/to/mailscanner.py MAILBOX APPENDIX
	# where MAILBOX is the path to your thunderbird mailboxfile, most likely
	# ~/.thunderbird/foobar.default/ImapMail/imap.foo.bar/INBOX
	# but may be any other mailfolder-file,
	# and where APPENDIX is the appendix of the outputfile to be created by this script
	# (and which contains MAILDATE and MAILTIME for each mail in a texttable for GNU R)
	#--------------------------------------------------------------------------------------
	import sys
	import time
	import datetime
	from mailbox import mbox as MBox
	from email.utils import parsedate
	#---------------------------------
	mailboxname = sys.argv[1] # e.g.("~/.thunderbird/foobar.default/ImapMail/imap.foo.bar/INBOX")
	myfilename = sys.argv[2] # outputfile appendix
	mbox = MBox(mailboxname)
	ZIEL = ("MyMail-%s.txt") % (myfilename) # filename of textable created by this script
	SCHREIB = open(ZIEL,"w")
	#-----------------------------------
	for msg in mbox:
	try: # test if mail is corrupt
	maildate = time.strftime('%Y%m%d', parsedate(msg.get("date")))
	mailtime = time.strftime('%H%M%S', parsedate(msg.get("date")))
	print maildate, ",", mailtime
	except:
	SCHREIB.write(str('NA,NA\n')) # if corrupt, create NAs (missing values) for GNU_R
	else:
	SCHREIB.write(str("%s,%s\n") % (maildate, mailtime))
	SCHREIB.close()
	## END OF FILE
	# Read texttable into R
	# You need to specify the path to textfile (created with mailscanner.py)
	# in the command below
	inbox <- read.table("/Path/to/MyMail-inbox.txt", sep=",", colClasses = "character") # Read table as characters
	colnames(inbox) <- c("Datum","Uhrzeit") # give names to colums
	inbox$Datum <- strptime(inbox$Datum,format="%Y%m%d") # turn into format DATE
	inbox$Uhrzeit <- strptime(inbox$Uhrzeit, format="%H%M%S") # turn into format TIME
	plot(inbox$Datum, inbox$Uhrzeit) # plot the stuff
	#
	# Add data of another texttable
	# You need to specify the path to textfile in the command below
	test <- read.table("/Pfad/zu/MyMail-outbox.txt", sep=",", colClasses = "character") # read another texttable
	colnames(test) <- c("Datum","Uhrzeit")
	test$Datum <- strptime(test$Datum,format="%Y%m%d")
	test$Uhrzeit <- strptime(test$Uhrzeit, format="%H%M%S")
	points(test$Datum, test$Uhrzeit) # add data to plot
	#
	#
	#
	# Use Demotable "Produnis" from the internet
	produnis <- read.table("http://www.produnis.de/R/MAILLISTE.TXT", sep=",", colClasses = "character") # read as characters
	colnames(produnis) <- c("Datum","Uhrzeit") # give names to columns
	produnis$Datum <- strptime(produnis$Datum,format="%Y%m%d") # turn into format DATE
	produnis$Uhrzeit <- strptime(produnis$Uhrzeit, format="%H%M%S") # turn into format TIME
	plot(produnis$Datum, produnis$Uhrzeit) # plot it
	produnis <- produnis[-which(produnis$Datum==min(produnis$Datum,na.rm=T)),] #remove outsider of 1970s
	plot(produnis$Datum, produnis$Uhrzeit) # plot it again
	#remove duplicates and plot again
	produnis <- produnis[-which(duplicated(produnis)),]
	plot(produnis$Datum, produnis$Uhrzeit) # plot it again
	#----------------------------
	#
	### Plot Mailsums per day
	produnis <- read.table("http://www.produnis.de/R/MAILLISTE.TXT", sep=",", colClasses = "character") # read as characters
	colnames(produnis) <- c("Datum","Uhrzeit") # name Colums
	# dont turn colums into DATE format! this specific plot works with character data only

	produnis <- produnis[-which(produnis$Datum==min(produnis$Datum,na.rm=T)),] #remove outsider of 1970s
	plot(strptime(levels(as.factor(produnis$Datum)),format="%Y%m%d"),as.vector(table(as.factor(produnis$Datum))),type="l")
	#remove duplicates and plot again
	produnis <- produnis[-which(duplicated(produnis)),]
	plot(strptime(levels(as.factor(produnis$Datum)),format="%Y%m%d"),as.vector(table(as.factor(produnis$Datum))),type="l")
	#
	plot(strptime(produnis$Datum,format="%Y%m%d"),strptime(produnis$Uhrzeit,format="%H%M%S"),pch=19,cex=0.2,col=rgb(0,0,0,0.2))


	## Do stuff with ggplot
	install.packages("ggplot2",dependencies=TRUE)# install ggplot2
	library(ggplot2) # activate ggplot2
	produnis$Datum <- strptime(produnis$Datum,format="%Y%m%d") # turn into format DATE
	produnis$Uhrzeit <- strptime(produnis$Uhrzeit, format="%H%M%S") # turn into format TIME
	g <- ggplot(produnis,aes(x=Datum,y=Uhrzeit)) # create ggplot-object
	g + geom_point(alpha=0.3) + # draw points with transparency 0.3
	scale_x_datetime(limits=c(as.POSIXct("2004",format="%Y"),as.POSIXct("2012",format="%Y"))) + #limit x-axis
	stat_density2d(color="#88EEAA") # draw density circles
	#!/usr/bin/python
	# owncloudevents.py Version 1
	#------------------------------------------------------------------------------------------
	#----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
	#----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
	#------------------------------------------------------------------------------------------
	# If you use OwnCloud with MySQL to manage your calendars, you may use this script
	# instead of 'terminscanner.py'
	# WORKS ONLY WITH MYSQL, NOT SQLITE
	#
	# call this script like this:
	# /path/to/owncloudevents.py
	# This script will scan all events of all calendars in your OwnCloud-MySQL database.
	# For each calendar, a texttable-file will be created
	# which will contain DATE and TIME for every calendar-entry for GNU R import)
	#--------------------------------------------------------------------------------------
	import sys
	import MySQLdb
	import os,re

	try:
	conn = MySQLdb.connect (host = "localhost", #
	user = "dbuser", # change to your mysql user
	passwd = "dbpass", # and password
	db = "owncloud") # database
	except MySQLdb.Error, e:
	print "Error %d: %s" % (e.args[0], e.args[1])
	sys.exit (1)
	cursor = conn.cursor (MySQLdb.cursors.DictCursor)
	cursor.execute ("SELECT id, displayname FROM oc_calendar_calendars")
	result_set = cursor.fetchall ()
	calendar_rowcount = cursor.rowcount
	for row in result_set:
	print "%s, %s" % (row["id"], row["displayname"].replace(" ", "_", 1))
	cursor.execute ("SELECT startdate FROM oc_calendar_objects WHERE calendarid = '%s'" % (row["id"]))
	result_set2 = cursor.fetchall ()
	ZIEL = ("MyCalendar-%s.txt") % (row["displayname"].replace(" ", "_", 1))
	SCHREIB = open(ZIEL,"w")
	for row2 in result_set2:
	eventdata = "%s" % (row2["startdate"])
	print eventdata
	eventdate = eventdata[0:10]
	eventdate = eventdate.replace("-", "", 2)
	print eventdate
	eventtime = eventdata[11:19]
	eventtime = eventtime.replace(":","",2)
	print eventtime
	SCHREIB.write(str("%s,%s\n") % (eventdate, eventtime))
	print "Number of Events parsed: %d" % cursor.rowcount
	SCHREIB.close()
	print "Number of Calendars parsed: %d" % calendar_rowcount
	cursor.close ()
	##END OF FILE
	#!/usr/bin/python
	# terminscanner.py Version 1
	#------------------------------------------------------------------------------------------
	#----- Inspired by Wolfram's "Analytics of My Life", we try it ourself
	#----- http://blog.stephenwolfram.com/2012/03/the-personal-analytics-of-my-life/
	#------------------------------------------------------------------------------------------
	# call this script like this:
	# /path/to/terminscanner.py CALENDAR APPENDIX
	# where CALENDAR is the path to your ics-file,
	# /foo/bar/calendar.ics
	# and where APPENDIX is the appendix of the outputfile to be created by this script
	# (and which will contain DATE and TIME for each calendar-entry in a texttable for GNU R)
	#--------------------------------------------------------------------------------------
	# IMPORTS #
	import os,re
	import sys
	import time
	kalendername = sys.argv[1] # e.g.("/foo/bar/calendar.ics")
	myfilename = sys.argv[2] # Filename-Appendix, e.g "Office"

	# modify to your needs
	ZIEL = ("MyCalendar-%s.txt") % (myfilename) # change filename-prefix to your needs


	### nothing to edit from here on, leave it alone ...
	### My Functions and Routines
	#-- BashReturn benoetigt: os ---------------------------------------
	def BashReturn(cmd):
	output = "a" # is a dummy, will be deleted afterwards
	f=os.popen(cmd)
	for i in f.readlines():
	output = output + i
	output = output[1:] # kill the dummy-a
	return output
	#-------------------------------------------------------------------
	#==============End of Functions ========================================

	SCHREIB = open(ZIEL,"w")
	cmd = 'less %s\|grep DTSTART' % (kalendername)
	bla = BashReturn(cmd)
	for line in bla.split('\n'):
	if len(line)!=0: # check if line is an entry
	if line[len(line)-8]=="T": # check if it has TIME
	s = len(line)-7 # TIME
	z = len(line)-1 # TIME
	x = len(line)-16 # DATE
	y = len(line)-8 # DATE
	termintime = line[s:z]
	termindate = line[x:y]
	#print termindate
	#print termintime
	else: # it has DATE only
	termintime = "000000"
	x = len(line)-9 # DATE
	y = len(line)-1 # DATE
	termindate = line[x:y]
	else: # is no real entry
	termintime = "NA"
	termindate = "NA"
	SCHREIB.write(str("%s,%s\n") % (termindate, termintime))
	SCHREIB.close()
	## END OF FILE