Skip to content

Instantly share code, notes, and snippets.

@azmfaridee
Created July 5, 2013 00:22
Show Gist options
  • Save azmfaridee/5930937 to your computer and use it in GitHub Desktop.
Save azmfaridee/5930937 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# coding=utf-8
# -*- encoding: utf-8 -*-
import sys;
# À Bunifaziu si cunserva l'usu di una varietà di a lingua ligura [[Dialettu bunifazincu|bunifazinca]], un dialettu ghjinuvesu arcaicu chì currispondi à a parlata impurtata da i culoni à a fini di u XIIIimu seculu.
# Si stima chì u corsu sia parlatu in [[Corsica]] da circa 90/100.000 lucutori frà i 275.000 abitanti di l'isola, puru ch'elli sianu parechji frà quessi à esse di lingua materna francese (dati da una ricerca di l'INSEE di u 2004 [http://www.insee.fr/fr/insee_regions/corse/rfc/docs/ecoc1053.htm]), à quelli ci hè quantunque da aghjustà e pupulazione emigrate in [[Francia]] (par un tutale di 133.000 individui in Francia) è in altre nazione.
urls = [];
url_counter = 0;
for line in sys.stdin.readlines(): #{
brack_max = 0;
brack_count = 0;
apos_count = 0;
buf = '';
newline = '';
line = line.strip();
if line.count('[') != line.count(']'): #{
continue;
#}
for c in line: #{
#print(apos_count, brack_count, brack_max, c, newline);
if c == '[':
brack_count = brack_count + 1;
if c == ']':
brack_count = brack_count - 1;
if brack_count > brack_max: #{
brack_max = brack_count;
elif brack_count == 0 and buf == '': #{
brack_max = 0;
elif brack_count == 0 and buf != '': #{
if brack_max == 2: #{
link = '';
if buf.count('|') > 0: #{
row = buf.split('|');
link = row[1].strip(']');
else: #{
link = buf.strip('[]');
#}
newline = newline + link;
elif brack_max == 1: #{
url = '';
if buf.count(' ') > 0: #{
row = buf.split(' ');
url = row[1].strip(']');
else: #{
url = buf.strip('[]');
#}
urls.append((url_counter, url));
url_counter = url_counter + 1;
newline = newline + '[' + str(url_counter) + ']';
#}
buf = '';
continue;
#}
if c == '\'' and apos_count == 0:
apos_count = apos_count + 1;
continue;
if c == '\'' and apos_count == 1:
apos_count = apos_count + 1;
continue;
if c == '\'' and apos_count == 2:
apos_count = apos_count + 1;
continue;
if c == '\'' and apos_count == 3:
apos_count = apos_count + 1;
continue;
if c == '\'' and apos_count == 4:
apos_count = apos_count + 1;
continue;
if apos_count == 5 or apos_count == 3 or apos_count == 2: #{
apos_count = 0;
elif apos_count == 1 or apos_count == 4: #{
newline = newline + "'";
apos_count = 0;
#}
if brack_count > 0: #{
buf = buf + c;
else: #{
newline = newline + c;
#}
#}
print(newline);
#}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment