Created
July 5, 2013 00:22
-
-
Save azmfaridee/5930937 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# coding=utf-8 | |
# -*- encoding: utf-8 -*- | |
import sys; | |
# À Bunifaziu si cunserva l'usu di una varietà di a lingua ligura [[Dialettu bunifazincu|bunifazinca]], un dialettu ghjinuvesu arcaicu chì currispondi à a parlata impurtata da i culoni à a fini di u XIIIimu seculu. | |
# Si stima chì u corsu sia parlatu in [[Corsica]] da circa 90/100.000 lucutori frà i 275.000 abitanti di l'isola, puru ch'elli sianu parechji frà quessi à esse di lingua materna francese (dati da una ricerca di l'INSEE di u 2004 [http://www.insee.fr/fr/insee_regions/corse/rfc/docs/ecoc1053.htm]), à quelli ci hè quantunque da aghjustà e pupulazione emigrate in [[Francia]] (par un tutale di 133.000 individui in Francia) è in altre nazione. | |
urls = []; | |
url_counter = 0; | |
for line in sys.stdin.readlines(): #{ | |
brack_max = 0; | |
brack_count = 0; | |
apos_count = 0; | |
buf = ''; | |
newline = ''; | |
line = line.strip(); | |
if line.count('[') != line.count(']'): #{ | |
continue; | |
#} | |
for c in line: #{ | |
#print(apos_count, brack_count, brack_max, c, newline); | |
if c == '[': | |
brack_count = brack_count + 1; | |
if c == ']': | |
brack_count = brack_count - 1; | |
if brack_count > brack_max: #{ | |
brack_max = brack_count; | |
elif brack_count == 0 and buf == '': #{ | |
brack_max = 0; | |
elif brack_count == 0 and buf != '': #{ | |
if brack_max == 2: #{ | |
link = ''; | |
if buf.count('|') > 0: #{ | |
row = buf.split('|'); | |
link = row[1].strip(']'); | |
else: #{ | |
link = buf.strip('[]'); | |
#} | |
newline = newline + link; | |
elif brack_max == 1: #{ | |
url = ''; | |
if buf.count(' ') > 0: #{ | |
row = buf.split(' '); | |
url = row[1].strip(']'); | |
else: #{ | |
url = buf.strip('[]'); | |
#} | |
urls.append((url_counter, url)); | |
url_counter = url_counter + 1; | |
newline = newline + '[' + str(url_counter) + ']'; | |
#} | |
buf = ''; | |
continue; | |
#} | |
if c == '\'' and apos_count == 0: | |
apos_count = apos_count + 1; | |
continue; | |
if c == '\'' and apos_count == 1: | |
apos_count = apos_count + 1; | |
continue; | |
if c == '\'' and apos_count == 2: | |
apos_count = apos_count + 1; | |
continue; | |
if c == '\'' and apos_count == 3: | |
apos_count = apos_count + 1; | |
continue; | |
if c == '\'' and apos_count == 4: | |
apos_count = apos_count + 1; | |
continue; | |
if apos_count == 5 or apos_count == 3 or apos_count == 2: #{ | |
apos_count = 0; | |
elif apos_count == 1 or apos_count == 4: #{ | |
newline = newline + "'"; | |
apos_count = 0; | |
#} | |
if brack_count > 0: #{ | |
buf = buf + c; | |
else: #{ | |
newline = newline + c; | |
#} | |
#} | |
print(newline); | |
#} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment