oberhamsi · June 28, 2010 19:38
diff --git a/evala.py b/evala.py
 """
 +M
 +P
 +C
 +A

 nur t%cod bis zum naechsten *
 sd3: 20, pro user *JAN, *MUT, *LEN

 danach + oder |

 sd3 ist eine utterance mit 3 verschiedenen semcons. wieviele sd3s, sd4s, sd2s, etc gibt es?

 -----------------------------

 pro sem. inhaltszeichen: in wievielen utterances kommt es min. 1x vor

 ----------

 pro ZEILE = von $ bis }

 +V-X: im V-feld min ein +, aber in den E-felder darf kein + sein. 
 +V+X: wenn in V min ein + und in min einem der E-felder min. 1 + ist.
 -V+X: im V-feld kein +, aber in min. 1 E-feld min. ein + sein

 """

 import sys
 import csv

 def utteranceDensity(line):
   """
      how many sem cons are there in this line
   """
   occurances = 0;
   for semConVar in ['P', 'C', 'M', 'A']:
      if informationFocus(line, semConVar):
         occurances += 1
   return occurances

 def informationFocus(line, semConVar):
   """
      is there a semcon in this line?
   """
   return line.find('+' + semConVar + '+') > -1 or line.find('+' + semConVar + '|') > 1

 def informationLocus(line): 
   """
      return weither there is a V and/or one of the semcons in this line
   """
   #parts = line.split('\t')
   fields = line.split(':')
   hasV = (fields[2][0] == 'V') and fields[2].find('+') > 1
   hasX = False
   eFields = ':'.join(fields[5:9])
   for semConVar in ['P', 'C', 'M', 'A']:
      if informationFocus(eFields, semConVar):
         hasX = True
   return (hasV, hasX)


 def anal_utterancesWithSemCon():
   semCons = {
      'P': 0,
      'C': 0,
      'L': 0,
      'M': 0,
      'A': 0
   }
   for [user, utter] in utterances():      
      for semConVar in semCons.keys():
         if informationFocus(utter, semConVar):
            semCons[semConVar] += 1
   return semCons

 def anal_semConDensity():
   densities = {
      0: 0,
      1: 0,
      2: 0,
      3: 0,
      4: 0
   }
   for [user, utter] in utterances():
      count = utteranceDensity(utter)
      densities[count] += 1
   return densities

 def anal_informationLocus():
   locs = {
      (True, False): 0,
      (True, True): 0,
      (False, True): 0,
      (False, False): 0
   
   }
   for [user, utterLine] in lines():
      if not utterLine: break
      
      locData = informationLocus(utterLine)
      locs[locData] += 1
   return locs


 def lines():
   inCod = False   
   for line in file.readlines():
      print line
      if line[0] == '*' or line[0:4].lower() == '@end':
         if inCod:
            inCod = False
         user = line[1:4]
      elif line[0:5].lower() == '%cod:':
         inCod = True
         parts = line.split('\t')
         if len(parts) > 1 and len(parts[1]) > 2:
            yield [user, parts[1]]
      elif line[0:1] == '%':
         inCod = False
      elif inCod:
         yield [user, line]
 #      elif line[0] == '%':
      else:
         pass
   yield [None, None];

 def utterances():
   utterance = ''
   user = None
   inCod = False
   for line in file.readlines():
      if line[0] == '*' or line[0:4].lower() == '@end':
         if inCod:
            inCod = False
            yield [user, utterance]
         user = line[1:4]
      elif line[0:5].lower() == '%cod:':
         inCod = True
         parts = line.split('\t')
         if len(parts) > 1:
            utterance = parts[1]
      elif line[0:1] == '%':
         inCod = False
      elif inCod:
         utterance += line
 #      elif line[0] == '%':
      else:
 #         print 'pass '+ line[0:5]
         pass
   yield [user, utterance]


 paths = sys.argv[1:]
 for COD_PATH in paths:
   file = open(COD_PATH)
   print '--------------'
   print 'DATEI: ' + file.name
   print 'How many utterances have a certain sem con'
   semAndCount = anal_utterancesWithSemCon()
   for semCon in semAndCount.keys():
         print semCon + ', ' + str(semAndCount[semCon])
   file.close()
   file = open(COD_PATH)
   print 'Information Locus'
   print 'V, X, Anzahl'
   infoLocus = anal_informationLocus()
   for table in infoLocus.keys():
      plusV = '+V' if table[0] == True else '-V'
      plusX = '+X' if table[1] == True else  '-X'
      print plusV + '' + plusX + ', ' + str(infoLocus[table])
   file.close()
   file = open(COD_PATH)
   print 'Number of Utterances per sem con Density'
   print 'number of semCons, number of utterances'
   density = anal_semConDensity()
   for numberOf in density.keys():
      print str(numberOf) + ', ' + str(density[numberOf])
   file.close()
   print '--------------'
	"""
	+M
	+P
	+C
	+A

	nur t%cod bis zum naechsten *
	sd3: 20, pro user JAN, MUT, *LEN

	danach + oder \|

	sd3 ist eine utterance mit 3 verschiedenen semcons. wieviele sd3s, sd4s, sd2s, etc gibt es?

	-----------------------------

	pro sem. inhaltszeichen: in wievielen utterances kommt es min. 1x vor

	----------

	pro ZEILE = von $ bis }

	+V-X: im V-feld min ein +, aber in den E-felder darf kein + sein.
	+V+X: wenn in V min ein + und in min einem der E-felder min. 1 + ist.
	-V+X: im V-feld kein +, aber in min. 1 E-feld min. ein + sein

	"""

	import sys
	import csv

	def utteranceDensity(line):
	"""
	how many sem cons are there in this line
	"""
	occurances = 0;
	for semConVar in ['P', 'C', 'M', 'A']:
	if informationFocus(line, semConVar):
	occurances += 1
	return occurances

	def informationFocus(line, semConVar):
	"""
	is there a semcon in this line?
	"""
	return line.find('+' + semConVar + '+') > -1 or line.find('+' + semConVar + '\|') > 1

	def informationLocus(line):
	"""
	return weither there is a V and/or one of the semcons in this line
	"""
	#parts = line.split('\t')
	fields = line.split(':')
	hasV = (fields[2][0] == 'V') and fields[2].find('+') > 1
	hasX = False
	eFields = ':'.join(fields[5:9])
	for semConVar in ['P', 'C', 'M', 'A']:
	if informationFocus(eFields, semConVar):
	hasX = True
	return (hasV, hasX)


	def anal_utterancesWithSemCon():
	semCons = {
	'P': 0,
	'C': 0,
	'L': 0,
	'M': 0,
	'A': 0
	}
	for [user, utter] in utterances():
	for semConVar in semCons.keys():
	if informationFocus(utter, semConVar):
	semCons[semConVar] += 1
	return semCons

	def anal_semConDensity():
	densities = {
	0: 0,
	1: 0,
	2: 0,
	3: 0,
	4: 0
	}
	for [user, utter] in utterances():
	count = utteranceDensity(utter)
	densities[count] += 1
	return densities

	def anal_informationLocus():
	locs = {
	(True, False): 0,
	(True, True): 0,
	(False, True): 0,
	(False, False): 0

	}
	for [user, utterLine] in lines():
	if not utterLine: break

	locData = informationLocus(utterLine)
	locs[locData] += 1
	return locs


	def lines():
	inCod = False
	for line in file.readlines():
	print line
	if line[0] == '*' or line[0:4].lower() == '@end':
	if inCod:
	inCod = False
	user = line[1:4]
	elif line[0:5].lower() == '%cod:':
	inCod = True
	parts = line.split('\t')
	if len(parts) > 1 and len(parts[1]) > 2:
	yield [user, parts[1]]
	elif line[0:1] == '%':
	inCod = False
	elif inCod:
	yield [user, line]
	# elif line[0] == '%':
	else:
	pass
	yield [None, None];

	def utterances():
	utterance = ''
	user = None
	inCod = False
	for line in file.readlines():
	if line[0] == '*' or line[0:4].lower() == '@end':
	if inCod:
	inCod = False
	yield [user, utterance]
	user = line[1:4]
	elif line[0:5].lower() == '%cod:':
	inCod = True
	parts = line.split('\t')
	if len(parts) > 1:
	utterance = parts[1]
	elif line[0:1] == '%':
	inCod = False
	elif inCod:
	utterance += line
	# elif line[0] == '%':
	else:
	# print 'pass '+ line[0:5]
	pass
	yield [user, utterance]


	paths = sys.argv[1:]
	for COD_PATH in paths:
	file = open(COD_PATH)
	print '--------------'
	print 'DATEI: ' + file.name
	print 'How many utterances have a certain sem con'
	semAndCount = anal_utterancesWithSemCon()
	for semCon in semAndCount.keys():
	print semCon + ', ' + str(semAndCount[semCon])
	file.close()
	file = open(COD_PATH)
	print 'Information Locus'
	print 'V, X, Anzahl'
	infoLocus = anal_informationLocus()
	for table in infoLocus.keys():
	plusV = '+V' if table[0] == True else '-V'
	plusX = '+X' if table[1] == True else '-X'
	print plusV + '' + plusX + ', ' + str(infoLocus[table])
	file.close()
	file = open(COD_PATH)
	print 'Number of Utterances per sem con Density'
	print 'number of semCons, number of utterances'
	density = anal_semConDensity()
	for numberOf in density.keys():
	print str(numberOf) + ', ' + str(density[numberOf])
	file.close()
	print '--------------'