Skip to content

Instantly share code, notes, and snippets.

@oberhamsi
Created June 28, 2010 19:38
Show Gist options
  • Save oberhamsi/456252 to your computer and use it in GitHub Desktop.
Save oberhamsi/456252 to your computer and use it in GitHub Desktop.
"""
+M
+P
+C
+A
nur t%cod bis zum naechsten *
sd3: 20, pro user *JAN, *MUT, *LEN
danach + oder |
sd3 ist eine utterance mit 3 verschiedenen semcons. wieviele sd3s, sd4s, sd2s, etc gibt es?
-----------------------------
pro sem. inhaltszeichen: in wievielen utterances kommt es min. 1x vor
----------
pro ZEILE = von $ bis }
+V-X: im V-feld min ein +, aber in den E-felder darf kein + sein.
+V+X: wenn in V min ein + und in min einem der E-felder min. 1 + ist.
-V+X: im V-feld kein +, aber in min. 1 E-feld min. ein + sein
"""
import sys
import csv
def utteranceDensity(line):
"""
how many sem cons are there in this line
"""
occurances = 0;
for semConVar in ['P', 'C', 'M', 'A']:
if informationFocus(line, semConVar):
occurances += 1
return occurances
def informationFocus(line, semConVar):
"""
is there a semcon in this line?
"""
return line.find('+' + semConVar + '+') > -1 or line.find('+' + semConVar + '|') > 1
def informationLocus(line):
"""
return weither there is a V and/or one of the semcons in this line
"""
#parts = line.split('\t')
fields = line.split(':')
hasV = (fields[2][0] == 'V') and fields[2].find('+') > 1
hasX = False
eFields = ':'.join(fields[5:9])
for semConVar in ['P', 'C', 'M', 'A']:
if informationFocus(eFields, semConVar):
hasX = True
return (hasV, hasX)
def anal_utterancesWithSemCon():
semCons = {
'P': 0,
'C': 0,
'L': 0,
'M': 0,
'A': 0
}
for [user, utter] in utterances():
for semConVar in semCons.keys():
if informationFocus(utter, semConVar):
semCons[semConVar] += 1
return semCons
def anal_semConDensity():
densities = {
0: 0,
1: 0,
2: 0,
3: 0,
4: 0
}
for [user, utter] in utterances():
count = utteranceDensity(utter)
densities[count] += 1
return densities
def anal_informationLocus():
locs = {
(True, False): 0,
(True, True): 0,
(False, True): 0,
(False, False): 0
}
for [user, utterLine] in lines():
if not utterLine: break
locData = informationLocus(utterLine)
locs[locData] += 1
return locs
def lines():
inCod = False
for line in file.readlines():
print line
if line[0] == '*' or line[0:4].lower() == '@end':
if inCod:
inCod = False
user = line[1:4]
elif line[0:5].lower() == '%cod:':
inCod = True
parts = line.split('\t')
if len(parts) > 1 and len(parts[1]) > 2:
yield [user, parts[1]]
elif line[0:1] == '%':
inCod = False
elif inCod:
yield [user, line]
# elif line[0] == '%':
else:
pass
yield [None, None];
def utterances():
utterance = ''
user = None
inCod = False
for line in file.readlines():
if line[0] == '*' or line[0:4].lower() == '@end':
if inCod:
inCod = False
yield [user, utterance]
user = line[1:4]
elif line[0:5].lower() == '%cod:':
inCod = True
parts = line.split('\t')
if len(parts) > 1:
utterance = parts[1]
elif line[0:1] == '%':
inCod = False
elif inCod:
utterance += line
# elif line[0] == '%':
else:
# print 'pass '+ line[0:5]
pass
yield [user, utterance]
paths = sys.argv[1:]
for COD_PATH in paths:
file = open(COD_PATH)
print '--------------'
print 'DATEI: ' + file.name
print 'How many utterances have a certain sem con'
semAndCount = anal_utterancesWithSemCon()
for semCon in semAndCount.keys():
print semCon + ', ' + str(semAndCount[semCon])
file.close()
file = open(COD_PATH)
print 'Information Locus'
print 'V, X, Anzahl'
infoLocus = anal_informationLocus()
for table in infoLocus.keys():
plusV = '+V' if table[0] == True else '-V'
plusX = '+X' if table[1] == True else '-X'
print plusV + '' + plusX + ', ' + str(infoLocus[table])
file.close()
file = open(COD_PATH)
print 'Number of Utterances per sem con Density'
print 'number of semCons, number of utterances'
density = anal_semConDensity()
for numberOf in density.keys():
print str(numberOf) + ', ' + str(density[numberOf])
file.close()
print '--------------'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment