Created
June 28, 2010 19:38
-
-
Save oberhamsi/456252 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
+M | |
+P | |
+C | |
+A | |
nur t%cod bis zum naechsten * | |
sd3: 20, pro user *JAN, *MUT, *LEN | |
danach + oder | | |
sd3 ist eine utterance mit 3 verschiedenen semcons. wieviele sd3s, sd4s, sd2s, etc gibt es? | |
----------------------------- | |
pro sem. inhaltszeichen: in wievielen utterances kommt es min. 1x vor | |
---------- | |
pro ZEILE = von $ bis } | |
+V-X: im V-feld min ein +, aber in den E-felder darf kein + sein. | |
+V+X: wenn in V min ein + und in min einem der E-felder min. 1 + ist. | |
-V+X: im V-feld kein +, aber in min. 1 E-feld min. ein + sein | |
""" | |
import sys | |
import csv | |
def utteranceDensity(line): | |
""" | |
how many sem cons are there in this line | |
""" | |
occurances = 0; | |
for semConVar in ['P', 'C', 'M', 'A']: | |
if informationFocus(line, semConVar): | |
occurances += 1 | |
return occurances | |
def informationFocus(line, semConVar): | |
""" | |
is there a semcon in this line? | |
""" | |
return line.find('+' + semConVar + '+') > -1 or line.find('+' + semConVar + '|') > 1 | |
def informationLocus(line): | |
""" | |
return weither there is a V and/or one of the semcons in this line | |
""" | |
#parts = line.split('\t') | |
fields = line.split(':') | |
hasV = (fields[2][0] == 'V') and fields[2].find('+') > 1 | |
hasX = False | |
eFields = ':'.join(fields[5:9]) | |
for semConVar in ['P', 'C', 'M', 'A']: | |
if informationFocus(eFields, semConVar): | |
hasX = True | |
return (hasV, hasX) | |
def anal_utterancesWithSemCon(): | |
semCons = { | |
'P': 0, | |
'C': 0, | |
'L': 0, | |
'M': 0, | |
'A': 0 | |
} | |
for [user, utter] in utterances(): | |
for semConVar in semCons.keys(): | |
if informationFocus(utter, semConVar): | |
semCons[semConVar] += 1 | |
return semCons | |
def anal_semConDensity(): | |
densities = { | |
0: 0, | |
1: 0, | |
2: 0, | |
3: 0, | |
4: 0 | |
} | |
for [user, utter] in utterances(): | |
count = utteranceDensity(utter) | |
densities[count] += 1 | |
return densities | |
def anal_informationLocus(): | |
locs = { | |
(True, False): 0, | |
(True, True): 0, | |
(False, True): 0, | |
(False, False): 0 | |
} | |
for [user, utterLine] in lines(): | |
if not utterLine: break | |
locData = informationLocus(utterLine) | |
locs[locData] += 1 | |
return locs | |
def lines(): | |
inCod = False | |
for line in file.readlines(): | |
print line | |
if line[0] == '*' or line[0:4].lower() == '@end': | |
if inCod: | |
inCod = False | |
user = line[1:4] | |
elif line[0:5].lower() == '%cod:': | |
inCod = True | |
parts = line.split('\t') | |
if len(parts) > 1 and len(parts[1]) > 2: | |
yield [user, parts[1]] | |
elif line[0:1] == '%': | |
inCod = False | |
elif inCod: | |
yield [user, line] | |
# elif line[0] == '%': | |
else: | |
pass | |
yield [None, None]; | |
def utterances(): | |
utterance = '' | |
user = None | |
inCod = False | |
for line in file.readlines(): | |
if line[0] == '*' or line[0:4].lower() == '@end': | |
if inCod: | |
inCod = False | |
yield [user, utterance] | |
user = line[1:4] | |
elif line[0:5].lower() == '%cod:': | |
inCod = True | |
parts = line.split('\t') | |
if len(parts) > 1: | |
utterance = parts[1] | |
elif line[0:1] == '%': | |
inCod = False | |
elif inCod: | |
utterance += line | |
# elif line[0] == '%': | |
else: | |
# print 'pass '+ line[0:5] | |
pass | |
yield [user, utterance] | |
paths = sys.argv[1:] | |
for COD_PATH in paths: | |
file = open(COD_PATH) | |
print '--------------' | |
print 'DATEI: ' + file.name | |
print 'How many utterances have a certain sem con' | |
semAndCount = anal_utterancesWithSemCon() | |
for semCon in semAndCount.keys(): | |
print semCon + ', ' + str(semAndCount[semCon]) | |
file.close() | |
file = open(COD_PATH) | |
print 'Information Locus' | |
print 'V, X, Anzahl' | |
infoLocus = anal_informationLocus() | |
for table in infoLocus.keys(): | |
plusV = '+V' if table[0] == True else '-V' | |
plusX = '+X' if table[1] == True else '-X' | |
print plusV + '' + plusX + ', ' + str(infoLocus[table]) | |
file.close() | |
file = open(COD_PATH) | |
print 'Number of Utterances per sem con Density' | |
print 'number of semCons, number of utterances' | |
density = anal_semConDensity() | |
for numberOf in density.keys(): | |
print str(numberOf) + ', ' + str(density[numberOf]) | |
file.close() | |
print '--------------' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment