oberhamsi · July 28, 2010 16:11
diff --git a/evala.py b/evala.py
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 
 """
 EXTRA TASK
 ==============
 * nur cod zeilen (und nachfolgende) und außerung darüber, alles andere löschen.
 * alles in ein file, headers per file


 semantischer inhalt = +C+T



 ASV === neu: euro zeile
 ------------------------

 alle auswertungen für:
  * nur mutter
  * nur len
  nur lan
  * 
  
 Verbtypen
 =========
 dollar zeilen können unterschieden werden je nach vertyp. es gibt 4 verbtypen:
  * VDI, VDV  (dynamic motion)
  * VMC, VKM (cause motion)

 alle auswertungen 3x:
  * alle dollar zeilen
  * nur dynamic
  * nur cause motion

 ---------------------

 dollar zeile
 ===============
  * beginnt mit $:
  * endet mit }\n
  
  
 euro special
 ===============
 wenn nachfolgende dollar zeile euro enthält, zählt sie zu vorheriger dollar zeile dazu.

 semantic content var
 =====================
 sind folgende großbuchstaben 
   * + davor
   * T, M, C, A
   * danach: + oder |

 Density (dont)
 =========================
 wieviele unterschiedliche semantic content vars kommen pro dollar zeile vor

 V Feld
 =======
 feld nach $: bis zum nächsten |, aber: V Feld nicht berücksichtigen wenn ASV in dollar zeile vorkommt.
 
 gibt nur eins pro dollar zeile

 E Feld
 ========
 beginnt mit :E endet mit |
 mehrere pro dollar zeile

 Other Devices Feld
 ===============
 das sind:
  * alle E Felder

 AUSWERTUNG Locus 1
 =======
 * welche kombination von semconvars kommt in V Feld vor
 * welche kombin an semcons kommt in Other Devices Feld vor 
   ZUSATZ: in E feldern kann auch ein +L vorkommen
 ( "" ist auch eine - leere - kombination)
 
 pro file: wie oft kommen die kombinationen vor.

 AUSWERTUNG Locus 2
 ====================
 In wievielen dollarzeilen kommt ein beliebiges semconvar in
   * V Feld AND in Other Devices, +V+X
   * in V Feld und NICHT in Other Devices, +V-X
   * in Other Devices und NICHT in V Feld, -V+X
   * ... -V-X

 Focus pro dollar zeile
 =======================
 welche kombination an semantic content vars kommt pro dollar zeile vor (z.b.: TMC, TAC,..).

 AUSERTUNG Focus pro File
 ==================
 wieviele dollar zeilen gibt es für jede semantic content var kombination.
 ( "" ist auch eine - leere - kombination)

 """

 import sys
 import csv

 SEM_CON_VARS = ['T', 'C', 'M', 'A']
 SEM_CON_VARS_L = ['T', 'C', 'M', 'A', 'L']
 SEM_CON_VARS.sort()
 SEM_CON_VARS_L.sort()

 TOKEN_VARS = ['Tbo', 'Tve', 'Tpa', 'Tgo', 'Tso', 'Tdx', 'L', 'M', 'A', 'C']
 TOKEN_VARS.sort()

 def getTokenFrequenceVE(fields):
   total = 0
   for field in fields:
      for tokenVar in TOKEN_VARS:
         total += field.count('+' + tokenVar)
   return total * '+'

 def getTokenCountsVE(fields):
   counts = {}
   for field in fields:
      for tokenVar in TOKEN_VARS:
         k = '+' + tokenVar
         if field.find(k) > 1:
            if not counts.has_key(k):
               counts[k] = 0
            counts[k] += 1
   return counts

 def getTokenCounts(line):
   counts = {}
   
   k = ""
   for semConVar in SEM_CON_VARS:
      count = line.count('+' + semConVar)
      if count <=0: continue
      k += semConVar * count
      
   if not counts.has_key(k):
      counts[k] = 0
   counts[k] += 1
   return counts

 def getTokenFrequence(line):
   total = 0
   for semConVar in SEM_CON_VARS:
      count = line.count('+' + semConVar)
      total += count
   return total * '+';
   
 def getPatternInFields(fields, includeL = False):
   SCV = SEM_CON_VARS
   if includeL is True:
      SCV = SEM_CON_VARS_L
   combi = set()
   for field in fields:
      for semConVar in SCV:
         if field.find('+' + semConVar) > 1:
            combi.add(semConVar)
   return combi

 def setToStr(ss):
   ll = list(ss)
   ll.sort()
   return ''.join(ll)

 def getFields(line, fieldType):
   fields = line.split(':')
   return [field for field in fields if fieldType == '*' or field[0] == fieldType]
      
 #def getOtherDevicePattern(line):
 #   fields = line.split(':')
 #   totalCombi = set()
 #   lookFor = 'E'
 #   if fields[0][0] == '€':
 #      lookFor = 'V'
 #   for f in fields:
 #      if f[0] == lookFor:
 #         totalCombi = totalCombi.union(getPatternInField(f))

 #   return setToStr(totalCombi)

 def getAllPattern(line):
   fields = line.split(':')
   totalCombi = set()
   for f in fields:
      totalCombi = totalCombi.union(getPatternInField(f))
   
   return setToStr(totalCombi)

 def isCodEnd(line):
   return line[0] == '*' or line[0:1].lower() == '@' or line[0:4].lower() == '%com' or line[0:1] == '%'

 def isCodStart(line):
   return line[0:4].lower() == '%cod'

 def isUserLine(line):
   return line[0] == '*'

 def codLines(lines, suser):
   inCod = False
   user = None
   for line in lines:
      if isCodStart(line):
         inCod = True
      elif isCodEnd(line):
         inCod = False
      
      if isUserLine(line):
         user = line[1:4]
      
      if user == suser and inCod:
         yield line

 def printSetAsTable(myset):
   for key in myset:
      print key + "\t" + str(myset[key])

 def increase(dct, key):
   key = setToStr(key)
   if not dct.has_key(key):
      dct[key] = 0
   dct[key] += 1

 def increaseSetCount(total, line):
   for k in line:
      if not k in total:
         total[k] = 0
      total[k] += line[k]
   return

 def increaseCounts(old, new):
   for k in new:
      if (not k in old):
         old[k] = 0
      old[k] += new[k]
   return

 def increaseFreq(d, k):
   if not k in d:
      d[k] = 0
   d[k] += 1

 codFile = None
 paths = sys.argv[1:]
 IGNORE_IF = False
 ONLY_SPONTAN = False
 ONLY_CAUSE = False
 if paths[0] == 'ignoreif':
   IGNORE_IF = True
   paths = paths[1:]
 if paths[0] == 'onlyspontan':
   ONLY_SPONTAN = True
   paths = paths[1:]
 if paths[0] == 'onlycause':
   ONLY_CAUSE = True
   paths = paths[1:]

 codOut = open('c:\\evala\\sim_analyse.cod', 'w')
 totalConflationVerbCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalConflationOtherDevicesCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalConflationOverallCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalSemanticContent = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalSemConTokens = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalTokenFrequence = {'LEN': {}, 'MUT': {}, 'JAN': {}}
 totalTokensV = {'LEN':{}, 'MUT':{}, 'JAN':{}}
 totalTokensE = {'LEN':{}, 'MUT':{}, 'JAN':{}}
 totalTokenFrequenceV = {'LEN':{}, 'MUT':{}, 'JAN':{}}
 totalTokenFrequenceE ={'LEN':{}, 'MUT':{}, 'JAN':{}}

 for COD_PATH in paths:
   codFile = open(COD_PATH)
   print codFile.name
   codOut.write('%com\t\t=======FILE: ' + codFile.name + '\n')
   lines = codFile.readlines()
   for suser in ['LEN', 'MUT', 'JAN']:
      codOut.write('%com\t\t=======\n')
      codOut.write('%com\t\t======= USER: ' + suser + '\n')
      codOut.write('%com\t\t=======\n')
      print ">>> " + suser
      conflationVerbCount = {}
      conflationOtherDevicesCount = {}
      conflationOverallCount = {}
      semanticContent = {}
      semConTokens = {}
      tokenFrequence = {}
      tokensV = {}
      tokensE = {}
      tokenFrequenceV = {}
      tokenFrequenceE = {}
      for cline in codLines(lines, suser):
         ## ignore if's if in IGNORE_IF mode
         iftest = getFields(cline, '*')
         if IGNORE_IF and iftest[-1] and iftest[-1].find('{if') >= 0:         
            continue
         if ONLY_CAUSE and not (cline.count('$:VMC') or cline.count('$:VK')): continue
         if ONLY_SPONTAN and not (cline.count('$:VMS') or cline.count('$:VMI')): continue
         
         ## locus 1
         verbPattern = getPatternInFields(getFields(cline, 'V'))
         increase(conflationVerbCount, verbPattern)
         increase(totalConflationVerbCount[suser], verbPattern)
         
         otherDevicesPattern = getPatternInFields(getFields(cline, 'E'), True)
         increase(conflationOtherDevicesCount, otherDevicesPattern)
         increase(totalConflationOtherDevicesCount[suser], otherDevicesPattern)
         
         ## focus
         allFields = getFields(cline, '*')
         overallPattern = getPatternInFields(allFields)
         increase(conflationOverallCount, overallPattern)
         increase(totalConflationOverallCount[suser], overallPattern)
         
         ## locus 2
         verbPatternForLocus = getPatternInFields(getFields(cline, 'V'))
         otherDevicesPatternForLocus = getPatternInFields(getFields(cline, 'E'))
         semCon = ""
         if len(verbPatternForLocus) == 0:
            semCon = "-V"
         else:
            semCon = "+V"
            
         if len(otherDevicesPatternForLocus) == 0:
            semCon += "-E"
         else:
            semCon += "+E"
         semConPattern = {semCon: 1}
         increase(semanticContent, semConPattern)
         increase(totalSemanticContent[suser], semConPattern)
         
         #semcontokens
         
         semConTokenCounts = getTokenCounts(cline)
         increaseCounts(semConTokens, semConTokenCounts)
         increaseCounts(totalSemConTokens[suser], semConTokenCounts)
         
         #tokenFrequence
         freq = getTokenFrequence(cline)
         increaseFreq(tokenFrequence, freq)
         increaseFreq(totalTokenFrequence[suser], freq)

         #tokens E und V
         toksV = getTokenCountsVE(getFields(cline, 'V'))
         increaseSetCount(tokensV, toksV)
         increaseSetCount(totalTokensV[suser], toksV)
         toksE = getTokenCountsVE(getFields(cline, 'E'))
         increaseSetCount(tokensE, toksE)
         increaseSetCount(totalTokensE[suser], toksE)

         # tokens E und V frqeuence
         freqV = getTokenFrequenceVE(getFields(cline, 'V'))
         increaseFreq(tokenFrequenceV, freqV)
         increaseFreq(totalTokenFrequenceV[suser], freqV)
         freqE = getTokenFrequenceVE(getFields(cline, 'E'))
         increaseFreq(tokenFrequenceE, freqE)
         increaseFreq(totalTokenFrequenceE[suser], freqE)
         
         ## out file
         codOut.write(cline)
         codOut.write('%com\t ' + setToStr(verbPattern) + ' \tverbPattern\n')
         codOut.write('%com\t ' + setToStr(otherDevicesPattern) + ' \totherDevicesPattern\n')
         codOut.write('%com\t ' + setToStr(overallPattern) + ' \toverallPattern\n')
         codOut.write('%com\t ' + setToStr(semConPattern) + ' \tsemConPattern\n')
         codOut.write('%com\t ' + setToStr(semConTokens) + ' \tsemConTokens\n')
         codOut.write('%com\t ' + setToStr(tokenFrequence) + ' \ttokenFrequence\n')
         codOut.write('%com\t ' + setToStr(toksE) + ' \ttokensE\n')
         codOut.write('%com\t ' + setToStr(toksV) + ' \ttokensV\n')
         
      
      print "conflationVerbCount "
      printSetAsTable (conflationVerbCount)
      print "conflationOtherDevicesCount "
      printSetAsTable(conflationOtherDevicesCount)
      print "conflationOverallCount "
      printSetAsTable(conflationOverallCount)
      print "semanticContentCount "
      printSetAsTable(semanticContent)
      print "semanticContentTokens "
      printSetAsTable(semConTokens)
      print "tokenfrequence "
      printSetAsTable(tokenFrequence)
      print "tokensE"
      printSetAsTable(tokensE)
      print "tokensV"
      printSetAsTable(tokensV)
      
   print "=============================="
   codFile.close()
 codOut.close()
 print "========= S U M   O F   A L L   F I L E S  ====================="
 print "IGNORE IF \t" + str(IGNORE_IF)
 print "ONLY CAUSE \t" + str(ONLY_CAUSE)
 print "ONLY SPONTAN \t" + str(ONLY_SPONTAN)
 print "==============================================="
 for suser in ['LEN', 'MUT', 'JAN']:
   print suser
   print "total conflationVerbCount "
   printSetAsTable(totalConflationVerbCount[suser])
   print "total otherDevicesPattern "
   printSetAsTable(totalConflationOtherDevicesCount[suser])
   print "total overallCount "
   printSetAsTable(totalConflationOverallCount[suser])
   print "total semantic content count "
   printSetAsTable(totalSemanticContent[suser])
   print "total semantic content tokens "
   printSetAsTable(totalSemConTokens[suser])
   print "total token frequence "
   printSetAsTable(totalTokenFrequence[suser])
   print "tokensE"
   printSetAsTable(totalTokensE[suser])
   print "tokensV"
   printSetAsTable(totalTokensV[suser])
   print "tokenFrequenceE"
   printSetAsTable(totalTokenFrequenceE[suser])
   print "tokenFrqeuneceV"
   printSetAsTable(totalTokenFrequenceV[suser])

   print "-----------------------------"
	#!/usr/bin/python
	# -- coding: utf-8 --

	"""
	EXTRA TASK
	==============
	* nur cod zeilen (und nachfolgende) und außerung darüber, alles andere löschen.
	* alles in ein file, headers per file


	semantischer inhalt = +C+T



	ASV === neu: euro zeile
	------------------------

	alle auswertungen für:
	* nur mutter
	* nur len
	nur lan
	*

	Verbtypen
	=========
	dollar zeilen können unterschieden werden je nach vertyp. es gibt 4 verbtypen:
	* VDI, VDV (dynamic motion)
	* VMC, VKM (cause motion)

	alle auswertungen 3x:
	* alle dollar zeilen
	* nur dynamic
	* nur cause motion

	---------------------

	dollar zeile
	===============
	* beginnt mit $:
	* endet mit }\n


	euro special
	===============
	wenn nachfolgende dollar zeile euro enthält, zählt sie zu vorheriger dollar zeile dazu.

	semantic content var
	=====================
	sind folgende großbuchstaben
	* + davor
	* T, M, C, A
	* danach: + oder \|

	Density (dont)
	=========================
	wieviele unterschiedliche semantic content vars kommen pro dollar zeile vor

	V Feld
	=======
	feld nach $: bis zum nächsten \|, aber: V Feld nicht berücksichtigen wenn ASV in dollar zeile vorkommt.

	gibt nur eins pro dollar zeile

	E Feld
	========
	beginnt mit :E endet mit \|
	mehrere pro dollar zeile

	Other Devices Feld
	===============
	das sind:
	* alle E Felder

	AUSWERTUNG Locus 1
	=======
	* welche kombination von semconvars kommt in V Feld vor
	* welche kombin an semcons kommt in Other Devices Feld vor
	ZUSATZ: in E feldern kann auch ein +L vorkommen
	( "" ist auch eine - leere - kombination)

	pro file: wie oft kommen die kombinationen vor.

	AUSWERTUNG Locus 2
	====================
	In wievielen dollarzeilen kommt ein beliebiges semconvar in
	* V Feld AND in Other Devices, +V+X
	* in V Feld und NICHT in Other Devices, +V-X
	* in Other Devices und NICHT in V Feld, -V+X
	* ... -V-X

	Focus pro dollar zeile
	=======================
	welche kombination an semantic content vars kommt pro dollar zeile vor (z.b.: TMC, TAC,..).

	AUSERTUNG Focus pro File
	==================
	wieviele dollar zeilen gibt es für jede semantic content var kombination.
	( "" ist auch eine - leere - kombination)

	"""

	import sys
	import csv

	SEM_CON_VARS = ['T', 'C', 'M', 'A']
	SEM_CON_VARS_L = ['T', 'C', 'M', 'A', 'L']
	SEM_CON_VARS.sort()
	SEM_CON_VARS_L.sort()

	TOKEN_VARS = ['Tbo', 'Tve', 'Tpa', 'Tgo', 'Tso', 'Tdx', 'L', 'M', 'A', 'C']
	TOKEN_VARS.sort()

	def getTokenFrequenceVE(fields):
	total = 0
	for field in fields:
	for tokenVar in TOKEN_VARS:
	total += field.count('+' + tokenVar)
	return total * '+'

	def getTokenCountsVE(fields):
	counts = {}
	for field in fields:
	for tokenVar in TOKEN_VARS:
	k = '+' + tokenVar
	if field.find(k) > 1:
	if not counts.has_key(k):
	counts[k] = 0
	counts[k] += 1
	return counts

	def getTokenCounts(line):
	counts = {}

	k = ""
	for semConVar in SEM_CON_VARS:
	count = line.count('+' + semConVar)
	if count <=0: continue
	k += semConVar * count

	if not counts.has_key(k):
	counts[k] = 0
	counts[k] += 1
	return counts

	def getTokenFrequence(line):
	total = 0
	for semConVar in SEM_CON_VARS:
	count = line.count('+' + semConVar)
	total += count
	return total * '+';

	def getPatternInFields(fields, includeL = False):
	SCV = SEM_CON_VARS
	if includeL is True:
	SCV = SEM_CON_VARS_L
	combi = set()
	for field in fields:
	for semConVar in SCV:
	if field.find('+' + semConVar) > 1:
	combi.add(semConVar)
	return combi

	def setToStr(ss):
	ll = list(ss)
	ll.sort()
	return ''.join(ll)

	def getFields(line, fieldType):
	fields = line.split(':')
	return [field for field in fields if fieldType == '*' or field[0] == fieldType]

	#def getOtherDevicePattern(line):
	# fields = line.split(':')
	# totalCombi = set()
	# lookFor = 'E'
	# if fields[0][0] == '€':
	# lookFor = 'V'
	# for f in fields:
	# if f[0] == lookFor:
	# totalCombi = totalCombi.union(getPatternInField(f))

	# return setToStr(totalCombi)

	def getAllPattern(line):
	fields = line.split(':')
	totalCombi = set()
	for f in fields:
	totalCombi = totalCombi.union(getPatternInField(f))

	return setToStr(totalCombi)

	def isCodEnd(line):
	return line[0] == '*' or line[0:1].lower() == '@' or line[0:4].lower() == '%com' or line[0:1] == '%'

	def isCodStart(line):
	return line[0:4].lower() == '%cod'

	def isUserLine(line):
	return line[0] == '*'

	def codLines(lines, suser):
	inCod = False
	user = None
	for line in lines:
	if isCodStart(line):
	inCod = True
	elif isCodEnd(line):
	inCod = False

	if isUserLine(line):
	user = line[1:4]

	if user == suser and inCod:
	yield line

	def printSetAsTable(myset):
	for key in myset:
	print key + "\t" + str(myset[key])

	def increase(dct, key):
	key = setToStr(key)
	if not dct.has_key(key):
	dct[key] = 0
	dct[key] += 1

	def increaseSetCount(total, line):
	for k in line:
	if not k in total:
	total[k] = 0
	total[k] += line[k]
	return

	def increaseCounts(old, new):
	for k in new:
	if (not k in old):
	old[k] = 0
	old[k] += new[k]
	return

	def increaseFreq(d, k):
	if not k in d:
	d[k] = 0
	d[k] += 1

	codFile = None
	paths = sys.argv[1:]
	IGNORE_IF = False
	ONLY_SPONTAN = False
	ONLY_CAUSE = False
	if paths[0] == 'ignoreif':
	IGNORE_IF = True
	paths = paths[1:]
	if paths[0] == 'onlyspontan':
	ONLY_SPONTAN = True
	paths = paths[1:]
	if paths[0] == 'onlycause':
	ONLY_CAUSE = True
	paths = paths[1:]

	codOut = open('c:\\evala\\sim_analyse.cod', 'w')
	totalConflationVerbCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalConflationOtherDevicesCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalConflationOverallCount = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalSemanticContent = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalSemConTokens = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalTokenFrequence = {'LEN': {}, 'MUT': {}, 'JAN': {}}
	totalTokensV = {'LEN':{}, 'MUT':{}, 'JAN':{}}
	totalTokensE = {'LEN':{}, 'MUT':{}, 'JAN':{}}
	totalTokenFrequenceV = {'LEN':{}, 'MUT':{}, 'JAN':{}}
	totalTokenFrequenceE ={'LEN':{}, 'MUT':{}, 'JAN':{}}

	for COD_PATH in paths:
	codFile = open(COD_PATH)
	print codFile.name
	codOut.write('%com\t\t=======FILE: ' + codFile.name + '\n')
	lines = codFile.readlines()
	for suser in ['LEN', 'MUT', 'JAN']:
	codOut.write('%com\t\t=======\n')
	codOut.write('%com\t\t======= USER: ' + suser + '\n')
	codOut.write('%com\t\t=======\n')
	print ">>> " + suser
	conflationVerbCount = {}
	conflationOtherDevicesCount = {}
	conflationOverallCount = {}
	semanticContent = {}
	semConTokens = {}
	tokenFrequence = {}
	tokensV = {}
	tokensE = {}
	tokenFrequenceV = {}
	tokenFrequenceE = {}
	for cline in codLines(lines, suser):
	## ignore if's if in IGNORE_IF mode
	iftest = getFields(cline, '*')
	if IGNORE_IF and iftest[-1] and iftest[-1].find('{if') >= 0:
	continue
	if ONLY_CAUSE and not (cline.count('$:VMC') or cline.count('$:VK')): continue
	if ONLY_SPONTAN and not (cline.count('$:VMS') or cline.count('$:VMI')): continue

	## locus 1
	verbPattern = getPatternInFields(getFields(cline, 'V'))
	increase(conflationVerbCount, verbPattern)
	increase(totalConflationVerbCount[suser], verbPattern)

	otherDevicesPattern = getPatternInFields(getFields(cline, 'E'), True)
	increase(conflationOtherDevicesCount, otherDevicesPattern)
	increase(totalConflationOtherDevicesCount[suser], otherDevicesPattern)

	## focus
	allFields = getFields(cline, '*')
	overallPattern = getPatternInFields(allFields)
	increase(conflationOverallCount, overallPattern)
	increase(totalConflationOverallCount[suser], overallPattern)

	## locus 2
	verbPatternForLocus = getPatternInFields(getFields(cline, 'V'))
	otherDevicesPatternForLocus = getPatternInFields(getFields(cline, 'E'))
	semCon = ""
	if len(verbPatternForLocus) == 0:
	semCon = "-V"
	else:
	semCon = "+V"

	if len(otherDevicesPatternForLocus) == 0:
	semCon += "-E"
	else:
	semCon += "+E"
	semConPattern = {semCon: 1}
	increase(semanticContent, semConPattern)
	increase(totalSemanticContent[suser], semConPattern)

	#semcontokens

	semConTokenCounts = getTokenCounts(cline)
	increaseCounts(semConTokens, semConTokenCounts)
	increaseCounts(totalSemConTokens[suser], semConTokenCounts)

	#tokenFrequence
	freq = getTokenFrequence(cline)
	increaseFreq(tokenFrequence, freq)
	increaseFreq(totalTokenFrequence[suser], freq)

	#tokens E und V
	toksV = getTokenCountsVE(getFields(cline, 'V'))
	increaseSetCount(tokensV, toksV)
	increaseSetCount(totalTokensV[suser], toksV)
	toksE = getTokenCountsVE(getFields(cline, 'E'))
	increaseSetCount(tokensE, toksE)
	increaseSetCount(totalTokensE[suser], toksE)

	# tokens E und V frqeuence
	freqV = getTokenFrequenceVE(getFields(cline, 'V'))
	increaseFreq(tokenFrequenceV, freqV)
	increaseFreq(totalTokenFrequenceV[suser], freqV)
	freqE = getTokenFrequenceVE(getFields(cline, 'E'))
	increaseFreq(tokenFrequenceE, freqE)
	increaseFreq(totalTokenFrequenceE[suser], freqE)

	## out file
	codOut.write(cline)
	codOut.write('%com\t ' + setToStr(verbPattern) + ' \tverbPattern\n')
	codOut.write('%com\t ' + setToStr(otherDevicesPattern) + ' \totherDevicesPattern\n')
	codOut.write('%com\t ' + setToStr(overallPattern) + ' \toverallPattern\n')
	codOut.write('%com\t ' + setToStr(semConPattern) + ' \tsemConPattern\n')
	codOut.write('%com\t ' + setToStr(semConTokens) + ' \tsemConTokens\n')
	codOut.write('%com\t ' + setToStr(tokenFrequence) + ' \ttokenFrequence\n')
	codOut.write('%com\t ' + setToStr(toksE) + ' \ttokensE\n')
	codOut.write('%com\t ' + setToStr(toksV) + ' \ttokensV\n')


	print "conflationVerbCount "
	printSetAsTable (conflationVerbCount)
	print "conflationOtherDevicesCount "
	printSetAsTable(conflationOtherDevicesCount)
	print "conflationOverallCount "
	printSetAsTable(conflationOverallCount)
	print "semanticContentCount "
	printSetAsTable(semanticContent)
	print "semanticContentTokens "
	printSetAsTable(semConTokens)
	print "tokenfrequence "
	printSetAsTable(tokenFrequence)
	print "tokensE"
	printSetAsTable(tokensE)
	print "tokensV"
	printSetAsTable(tokensV)

	print "=============================="
	codFile.close()
	codOut.close()
	print "========= S U M O F A L L F I L E S ====================="
	print "IGNORE IF \t" + str(IGNORE_IF)
	print "ONLY CAUSE \t" + str(ONLY_CAUSE)
	print "ONLY SPONTAN \t" + str(ONLY_SPONTAN)
	print "==============================================="
	for suser in ['LEN', 'MUT', 'JAN']:
	print suser
	print "total conflationVerbCount "
	printSetAsTable(totalConflationVerbCount[suser])
	print "total otherDevicesPattern "
	printSetAsTable(totalConflationOtherDevicesCount[suser])
	print "total overallCount "
	printSetAsTable(totalConflationOverallCount[suser])
	print "total semantic content count "
	printSetAsTable(totalSemanticContent[suser])
	print "total semantic content tokens "
	printSetAsTable(totalSemConTokens[suser])
	print "total token frequence "
	printSetAsTable(totalTokenFrequence[suser])
	print "tokensE"
	printSetAsTable(totalTokensE[suser])
	print "tokensV"
	printSetAsTable(totalTokensV[suser])
	print "tokenFrequenceE"
	printSetAsTable(totalTokenFrequenceE[suser])
	print "tokenFrqeuneceV"
	printSetAsTable(totalTokenFrequenceV[suser])

	print "-----------------------------"