holgerd77 · August 29, 2015 14:01
diff --git a/get_farmsubsidy_data.sh b/get_farmsubsidy_data.sh
 # Fixes for Farmsubsidy data
 # Order of countries is taken form the website:
 # http://farmsubsidy.openspending.org/countries/
 set -x verbose
 #wget -O AT.zip "http://data.farmsubsidy.org/web/at20130501.zip"
 #unzip -d AT AT.zip

 # Generic fixes/helpers
 # HTML encoding fix
 sed 's/\&ouml;/ö/g; s/&Ouml;/Ö/g; s/&uuml;/ü/g; s/&Uuml;/Ü/g; s/&auml;/ä/g; s/&Auml;/Ä/g; s/&szlig;/ß/g; s/&quot;/"/g; s/&lt;/</g; s/&gt;/>/g; s/&amp;/\&/g; s/&#[0-9]*;//g' [COUNTRY_CODE]/recipient_bad.txt > [COUNTRY_CODE]/recipient.txt
 # Finding DOS formatted files
 find DK -type f -print0 | xargs -0 grep -l `printf '\r\n'`

 # AT (Austria)
 # 2013 Website : Done
 # 2013 Data    : Done 
 # HTML coded german umlaute (like "&ouml") leading to wrong column numbers in payment.txt
 mv AT/recipient.txt AT/recipient_bad.txt
 # Apply generic HTML encoding fix

 # BE (Belgium)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Some double entries in recipient.txt file
 mv BE/recipient.txt BE/recipient_bad.txt
 cat BE/recipient_bad.txt | uniq > BE/recipient.txt

 # BG (Bulgaria)
 # 2013 Website : Done
 # 2013 Data    : Done

 #CY (Cyprus)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Tried, went through without errors but no data shown!
 # Recipients in DB but no Payments or Schemes
 # Add english name in scheme.txt as national language name where entry is missing
 # Add some missing globalPaymentIds in payment.txt 
 mv CY/scheme.txt CY/scheme_bad.txt
 sed 's/^\([^;]*\);;\([^;]*\);/\1;\2;\2;/g' CY/scheme_bad.txt > CY/scheme.txt
 mv CY/payment.txt CY/payment_bad.txt
 sed 's/^\([^;]*\);;/\1;"CY\1";/g' CY/payment_bad.txt > CY/payment.txt

 #CZ (Czech Republic)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Remove 2014 entries misleading whole DB
 mv CZ/payment.txt CZ/payment_bad.txt
 awk < CZ/payment_bad.txt -F\; ' ($8 != "\"2014\""){ print $0 }' > CZ/payment.txt

 # DK (Denmark)
 # 2013 Website : Done
 # 2013 Data    : Done
 # (ignored: negative payments, to be further investigated!)
 # payment.txt ---
 # payment.txt file coming in two parts, concatenating together
 dos2unix DK/payment1.txt DK/payment.txt
 tail -n +2 DK/payment1.txt >> DK/payment.txt
 # recipient.txt ---
 # Some recipients represent empty lat, long values at the end of column as empty string, replacing
 cat DK/recipient.txt | dos2unix > DK/recipient_bad.txt
 sed 's/;"";""$/;;/g' DK/recipient_bad.txt > DK/recipient.txt
 # scheme.txt ---
 cat DK/scheme.txt | dos2unix > DK/scheme_bad.txt
 # Removing integer id from beginning of lines
 sed 's/^[^;]*;\(.*\)/\1/g' DK/scheme_bad.txt > DK/scheme_bad2.txt
 # Adding missing "DK" at end of lines
 sed 's/\;$/\;"DK"/g' DK/scheme_bad2.txt > DK/scheme.txt

 # EE (Estonia)
 # 2013 Website : Done
 # 2013 Data    : Done
 # No modification needed.

 # FI (Finland)
 # 2013 Website : Data missing (only raw.txt recipient file)
 # 2013 Data    : -
 # Coming with a raw.txt recipient file, not clear yet how to procede...

 # FR (France)
 # 2013 Website : Done 
 # 2013 Data    : Done

 # DE (Germany)
 # 2013 Website : Done
 # 2013 Data    : Done

 # GR (Greece)
 # 2013 Website : Data missing
 # 2013 Data    : -

 # HU (Hungary)
 # 2013 Website : Done
 # 2013 Data    : Done

 # IT (Italy)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Manual correction of 12 recipient entries (";" char in name)

 # IE (Ireland)
 # 2013 Website : Done
 # 2013 Data    : Done

 # LV (Latvia)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Adds national language name in scheme.txt as english name where entry is missing 
 mv LV/scheme.txt LV/scheme_bad.txt
 sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' LV/scheme_bad.txt > LV/scheme.txt

 # LT (Lithuania)
 # 2013 Website : Done
 # 2013 Data    : Done
 # scheme.txt ---
 # Deletes schemes with no IDs from scheme.txt
 dos2unix LT/*.txt
 mv LT/scheme.txt LT/scheme_bad.txt
 grep "^\"" LT/scheme_bad.txt > LT/scheme.txt
 # payment.txt ---
 # Remove payments with no globalSchemeId
 mv LT/payment.txt LT/payment_bad.txt
 awk < LT/payment_bad.txt -F \; '($5 != ""){ print $0 }' > LT/payment.txt

 # LU (Luxembourg)
 # 2013 Website : Done
 # 2013 Data    : Done

 # MA (Malta)
 # 2013 Website : Data missing (wrong data files)
 # 2013 Data    : -
 # Files containing data from Ireland

 # NL (Netherlands)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Copy national language name to english name for empty fields in scheme.txt
 mv NL/scheme.txt NL/scheme_bad.txt
 sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' NL/scheme_bad.txt > NL/scheme.txt
 # Super tricky mix of newlines and carriage returns in recipient.txt
 # Additionally deleted ; from line 819434 manually
 cp NL/recipient.txt NL/recipient_bad.txt
 # DON'T COPY PASTE BUT CREATE ^M CHARACTER WITH CTRL-V AND THEN CTRL-M!!!
 sed 's/^M/NEWLINEPLACEHOLDER/g' NL/recipient_bad.txt > NL/recipient.txt
 dos2unix NL/recipient.txt
 tr -d '\n' < NL/recipient.txt > NL/recipient_tmp.txt
 sed 's/NEWLINEPLACEHOLDER/\n/g' NL/recipient_tmp.txt > NL/recipient.txt
 rm NL/recipient_tmp.txt

 # PL (Poland)
 # 2013 Website : Done
 # 2013 Data    : Done

 # PT (Portugal)
 # 2013 Website : Done
 # 2013 Data    : Done

 # RO (Romania)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Went through, but no data, no years are showing, debugging needed!
 # recipient.txt ---
 # Many HTML encoded strings, generic fix, some manual fixes afterwards
 mv RO/recipient.txt RO/recipient_bad.txt
 # Apply generic HTML encoding fix
 # => recipient_bad2.txt
 # Manually edited recipients "RO1340283", "RO1421819" deletion of " inside name
 awk < RO/recipient_bad2.txt -F \; '{ field=$5;cnt=gsub(/"/,"\"",field) } (cnt>2){ str=""; for(i=1;i<=NF;i++){ if(i>1){ printf ";"; } if(i==5){ gsub(/"/, "", $i); printf "\""$i"\""; } else { printf "%s", $i; } }; printf "\n" } (cnt<=2){ print $0 }' > RO/recipient.txt

 # SK (Slovakia)
 # 2013 Website : Done
 # 2013 Data    : Done
 # recipient.txt ---
 # Manual normalization of one recipient entry (";" char in name)
 # scheme.txt ---
 # Copy national language name to english name for empty fields in scheme.txt
 mv SK/scheme.txt SK/scheme_bad.txt
 sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' SK/scheme_bad.txt > SK/scheme.txt

 # SI (Slovenia)
 # 2013 Website : Done
 # 2013 Data    : Done
 mv SI/scheme.txt SI/scheme_bad.txt
 sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' SI/scheme_bad.txt > SI/scheme.txt
 mv SI/payment.txt SI/payment_bad.txt
 # Somehow the country SL (Sierra Leone) appears in there instead of SI
 # Fix it by replacing with SI
 sed 's/^\([^;]*\);;/\1;"SI\1";/g' SI/payment_bad.txt | sed 's/"SL/"SI/g' > SI/payment.txt
 mv SI/recipient.txt SI/recipient_bad.txt
 sed 's/"SL/"SI/g' SI/recipient_bad.txt > SI/recipient.txt

 # ES (Spain)
 # 2013 Website : Done
 # 2013 Data    : Done
 # scheme.txt ---
 mv ES/scheme.txt ES/scheme_bad.txt
 tr -d '\n' < ES/scheme_bad.txt | tr '\r' '\n' | sed 's/^\([^;]*\);\([^;]*\);;/\1;\2;\2;/g' > ES/scheme.txt
 # recipient.txt ---
 # Concatenate separated lines
 mv ES/recipient.txt ES/recipient_bad.txt
 awk < ES/recipient_bad.txt -F \; 'BEGIN { p=0;cl="";} (NF<21 && p+NF<21){ p=p+NF;cl=cl$0; } (NF<21 && p+NF>=21){ print cl$0;p=0;cl="";} (NF>=21) { print $0;p=0;cl=""; }' > ES/recipient_bad2.txt
 # Concatenate name entries with one or several unescaped ;
 awk < ES/recipient_bad2.txt -F \; '( NF==21 ){ print $0 } ( NF==22){ print $1";"$2";"$3";"$4";"$5 $6";"$7";"$8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==23){ print $1";"$2";"$3";"$4";"$5 $6 $7";"$8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==24){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==25){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8 $9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==26){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8 $9 $10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 }' > ES/recipient.txt
 # Test for wrong line counts at the end
 awk < ES/recipient.txt -F \; '( NF<21 ){ print $0 }'

 # SE (Sweden)
 # 2013 Website : Done
 # 2013 Data    : Done
 # recipient.txt ---
 cat SE/recipient.txt | dos2unix > SE/recipient_bad.txt
 # Remove empty string on several fields and entries of recipient file
 sed 's/;""/;/g' SE/recipient_bad.txt > SE/recipient.txt
 #OLD Remove "" in latitude field of several recipients
 #OLD sed 's/;;"";$/;;;/g' SE/recipient_bad.txt > SE/recipient_bad2.txt
 #OLD sed 's/;"";""$/;;/g' SE/recipient_bad2.txt > SE/recipient.txt
 # scheme.txt ---
 mv SE/scheme.txt SE/scheme_bad.txt
 # Use scheme ID as english name if no name is provided
 sed 's/^\([^;]*\);;;/\1;;\1;/g' SE/scheme_bad.txt > SE/scheme.txt

 # GB (United Kingdom)
 # 2013 Website : Done
 # 2013 Data    : Done
 # Pre-Processing to get validatable files
 cp GB/payments1.txt GB/payment.txt
 tail -n +2 GB/payments2.txt >> GB/payment.txt
 tail -n +2 GB/payment3.txt >> GB/payment.txt
 mv GB/schemes.txt GB/scheme.txt
 # payment.txt ---
 mv GB/payment.txt GB/payment_bad.txt
 dos2unix GB/payment_bad.txt
 # Fix doubled country code in payment file
 sed 's/;"GBGB/;"GB/g' GB/payment_bad.txt > GB/payment.txt
 # recipient.txt ---
 # Manually corrected two lines 475111 475112 and 493001 493002 split into two (wrong number of columns)
 # scheme.txt ---
 mv GB/scheme.txt GB/scheme_bad.txt
 # Use scheme ID as english name if no name is provided
 sed 's/^\([^;]*\);;;/\1;;\1;/g' GB/scheme_bad.txt | dos2unix > GB/scheme.txt
 printf '"GB200450302032";;"Unknown";;"GB"\n' >> GB/scheme.txt
 printf '"GB200450302052";;"Unknown";;"GB"\n' >> GB/scheme.txt
	# Fixes for Farmsubsidy data
	# Order of countries is taken form the website:
	# http://farmsubsidy.openspending.org/countries/
	set -x verbose
	#wget -O AT.zip "http://data.farmsubsidy.org/web/at20130501.zip"
	#unzip -d AT AT.zip

	# Generic fixes/helpers
	# HTML encoding fix
	sed 's/\ö/ö/g; s/Ö/Ö/g; s/ü/ü/g; s/Ü/Ü/g; s/ä/ä/g; s/Ä/Ä/g; s/ß/ß/g; s/"/"/g; s/</</g; s/>/>/g; s/&/\&/g; s/&#[0-9]*;//g' [COUNTRY_CODE]/recipient_bad.txt > [COUNTRY_CODE]/recipient.txt
	# Finding DOS formatted files
	find DK -type f -print0 \| xargs -0 grep -l `printf '\r\n'`

	# AT (Austria)
	# 2013 Website : Done
	# 2013 Data : Done
	# HTML coded german umlaute (like "&ouml") leading to wrong column numbers in payment.txt
	mv AT/recipient.txt AT/recipient_bad.txt
	# Apply generic HTML encoding fix

	# BE (Belgium)
	# 2013 Website : Done
	# 2013 Data : Done
	# Some double entries in recipient.txt file
	mv BE/recipient.txt BE/recipient_bad.txt
	cat BE/recipient_bad.txt \| uniq > BE/recipient.txt

	# BG (Bulgaria)
	# 2013 Website : Done
	# 2013 Data : Done

	#CY (Cyprus)
	# 2013 Website : Done
	# 2013 Data : Done
	# Tried, went through without errors but no data shown!
	# Recipients in DB but no Payments or Schemes
	# Add english name in scheme.txt as national language name where entry is missing
	# Add some missing globalPaymentIds in payment.txt
	mv CY/scheme.txt CY/scheme_bad.txt
	sed 's/^\([^;]\);;\([^;]\);/\1;\2;\2;/g' CY/scheme_bad.txt > CY/scheme.txt
	mv CY/payment.txt CY/payment_bad.txt
	sed 's/^\([^;]*\);;/\1;"CY\1";/g' CY/payment_bad.txt > CY/payment.txt

	#CZ (Czech Republic)
	# 2013 Website : Done
	# 2013 Data : Done
	# Remove 2014 entries misleading whole DB
	mv CZ/payment.txt CZ/payment_bad.txt
	awk < CZ/payment_bad.txt -F\; ' ($8 != "\"2014\""){ print $0 }' > CZ/payment.txt

	# DK (Denmark)
	# 2013 Website : Done
	# 2013 Data : Done
	# (ignored: negative payments, to be further investigated!)
	# payment.txt ---
	# payment.txt file coming in two parts, concatenating together
	dos2unix DK/payment1.txt DK/payment.txt
	tail -n +2 DK/payment1.txt >> DK/payment.txt
	# recipient.txt ---
	# Some recipients represent empty lat, long values at the end of column as empty string, replacing
	cat DK/recipient.txt \| dos2unix > DK/recipient_bad.txt
	sed 's/;"";""$/;;/g' DK/recipient_bad.txt > DK/recipient.txt
	# scheme.txt ---
	cat DK/scheme.txt \| dos2unix > DK/scheme_bad.txt
	# Removing integer id from beginning of lines
	sed 's/^[^;];\(.\)/\1/g' DK/scheme_bad.txt > DK/scheme_bad2.txt
	# Adding missing "DK" at end of lines
	sed 's/\;$/\;"DK"/g' DK/scheme_bad2.txt > DK/scheme.txt

	# EE (Estonia)
	# 2013 Website : Done
	# 2013 Data : Done
	# No modification needed.

	# FI (Finland)
	# 2013 Website : Data missing (only raw.txt recipient file)
	# 2013 Data : -
	# Coming with a raw.txt recipient file, not clear yet how to procede...

	# FR (France)
	# 2013 Website : Done
	# 2013 Data : Done

	# DE (Germany)
	# 2013 Website : Done
	# 2013 Data : Done

	# GR (Greece)
	# 2013 Website : Data missing
	# 2013 Data : -

	# HU (Hungary)
	# 2013 Website : Done
	# 2013 Data : Done

	# IT (Italy)
	# 2013 Website : Done
	# 2013 Data : Done
	# Manual correction of 12 recipient entries (";" char in name)

	# IE (Ireland)
	# 2013 Website : Done
	# 2013 Data : Done

	# LV (Latvia)
	# 2013 Website : Done
	# 2013 Data : Done
	# Adds national language name in scheme.txt as english name where entry is missing
	mv LV/scheme.txt LV/scheme_bad.txt
	sed 's/^\([^;]\);\([^;]\);;/\1;\2;\2;/g' LV/scheme_bad.txt > LV/scheme.txt

	# LT (Lithuania)
	# 2013 Website : Done
	# 2013 Data : Done
	# scheme.txt ---
	# Deletes schemes with no IDs from scheme.txt
	dos2unix LT/*.txt
	mv LT/scheme.txt LT/scheme_bad.txt
	grep "^\"" LT/scheme_bad.txt > LT/scheme.txt
	# payment.txt ---
	# Remove payments with no globalSchemeId
	mv LT/payment.txt LT/payment_bad.txt
	awk < LT/payment_bad.txt -F \; '($5 != ""){ print $0 }' > LT/payment.txt

	# LU (Luxembourg)
	# 2013 Website : Done
	# 2013 Data : Done

	# MA (Malta)
	# 2013 Website : Data missing (wrong data files)
	# 2013 Data : -
	# Files containing data from Ireland

	# NL (Netherlands)
	# 2013 Website : Done
	# 2013 Data : Done
	# Copy national language name to english name for empty fields in scheme.txt
	mv NL/scheme.txt NL/scheme_bad.txt
	sed 's/^\([^;]\);\([^;]\);;/\1;\2;\2;/g' NL/scheme_bad.txt > NL/scheme.txt
	# Super tricky mix of newlines and carriage returns in recipient.txt
	# Additionally deleted ; from line 819434 manually
	cp NL/recipient.txt NL/recipient_bad.txt
	# DON'T COPY PASTE BUT CREATE ^M CHARACTER WITH CTRL-V AND THEN CTRL-M!!!
	sed 's/^M/NEWLINEPLACEHOLDER/g' NL/recipient_bad.txt > NL/recipient.txt
	dos2unix NL/recipient.txt
	tr -d '\n' < NL/recipient.txt > NL/recipient_tmp.txt
	sed 's/NEWLINEPLACEHOLDER/\n/g' NL/recipient_tmp.txt > NL/recipient.txt
	rm NL/recipient_tmp.txt

	# PL (Poland)
	# 2013 Website : Done
	# 2013 Data : Done

	# PT (Portugal)
	# 2013 Website : Done
	# 2013 Data : Done

	# RO (Romania)
	# 2013 Website : Done
	# 2013 Data : Done
	# Went through, but no data, no years are showing, debugging needed!
	# recipient.txt ---
	# Many HTML encoded strings, generic fix, some manual fixes afterwards
	mv RO/recipient.txt RO/recipient_bad.txt
	# Apply generic HTML encoding fix
	# => recipient_bad2.txt
	# Manually edited recipients "RO1340283", "RO1421819" deletion of " inside name
	awk < RO/recipient_bad2.txt -F \; '{ field=$5;cnt=gsub(/"/,"\"",field) } (cnt>2){ str=""; for(i=1;i<=NF;i++){ if(i>1){ printf ";"; } if(i==5){ gsub(/"/, "", $i); printf "\""$i"\""; } else { printf "%s", $i; } }; printf "\n" } (cnt<=2){ print $0 }' > RO/recipient.txt

	# SK (Slovakia)
	# 2013 Website : Done
	# 2013 Data : Done
	# recipient.txt ---
	# Manual normalization of one recipient entry (";" char in name)
	# scheme.txt ---
	# Copy national language name to english name for empty fields in scheme.txt
	mv SK/scheme.txt SK/scheme_bad.txt
	sed 's/^\([^;]\);\([^;]\);;/\1;\2;\2;/g' SK/scheme_bad.txt > SK/scheme.txt

	# SI (Slovenia)
	# 2013 Website : Done
	# 2013 Data : Done
	mv SI/scheme.txt SI/scheme_bad.txt
	sed 's/^\([^;]\);\([^;]\);;/\1;\2;\2;/g' SI/scheme_bad.txt > SI/scheme.txt
	mv SI/payment.txt SI/payment_bad.txt
	# Somehow the country SL (Sierra Leone) appears in there instead of SI
	# Fix it by replacing with SI
	sed 's/^\([^;]*\);;/\1;"SI\1";/g' SI/payment_bad.txt \| sed 's/"SL/"SI/g' > SI/payment.txt
	mv SI/recipient.txt SI/recipient_bad.txt
	sed 's/"SL/"SI/g' SI/recipient_bad.txt > SI/recipient.txt

	# ES (Spain)
	# 2013 Website : Done
	# 2013 Data : Done
	# scheme.txt ---
	mv ES/scheme.txt ES/scheme_bad.txt
	tr -d '\n' < ES/scheme_bad.txt \| tr '\r' '\n' \| sed 's/^\([^;]\);\([^;]\);;/\1;\2;\2;/g' > ES/scheme.txt
	# recipient.txt ---
	# Concatenate separated lines
	mv ES/recipient.txt ES/recipient_bad.txt
	awk < ES/recipient_bad.txt -F \; 'BEGIN { p=0;cl="";} (NF<21 && p+NF<21){ p=p+NF;cl=cl$0; } (NF<21 && p+NF>=21){ print cl$0;p=0;cl="";} (NF>=21) { print $0;p=0;cl=""; }' > ES/recipient_bad2.txt
	# Concatenate name entries with one or several unescaped ;
	awk < ES/recipient_bad2.txt -F \; '( NF==21 ){ print $0 } ( NF==22){ print $1";"$2";"$3";"$4";"$5 $6";"$7";"$8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==23){ print $1";"$2";"$3";"$4";"$5 $6 $7";"$8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==24){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8";"$9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==25){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8 $9";"$10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 } ( NF==26){ print $1";"$2";"$3";"$4";"$5 $6 $7 $8 $9 $10";"$11";"$12";"$13";"$14";"$15";"$16";"$17";"$18";"$19";"$20";"$21";"$22 }' > ES/recipient.txt
	# Test for wrong line counts at the end
	awk < ES/recipient.txt -F \; '( NF<21 ){ print $0 }'

	# SE (Sweden)
	# 2013 Website : Done
	# 2013 Data : Done
	# recipient.txt ---
	cat SE/recipient.txt \| dos2unix > SE/recipient_bad.txt
	# Remove empty string on several fields and entries of recipient file
	sed 's/;""/;/g' SE/recipient_bad.txt > SE/recipient.txt
	#OLD Remove "" in latitude field of several recipients
	#OLD sed 's/;;"";$/;;;/g' SE/recipient_bad.txt > SE/recipient_bad2.txt
	#OLD sed 's/;"";""$/;;/g' SE/recipient_bad2.txt > SE/recipient.txt
	# scheme.txt ---
	mv SE/scheme.txt SE/scheme_bad.txt
	# Use scheme ID as english name if no name is provided
	sed 's/^\([^;]*\);;;/\1;;\1;/g' SE/scheme_bad.txt > SE/scheme.txt

	# GB (United Kingdom)
	# 2013 Website : Done
	# 2013 Data : Done
	# Pre-Processing to get validatable files
	cp GB/payments1.txt GB/payment.txt
	tail -n +2 GB/payments2.txt >> GB/payment.txt
	tail -n +2 GB/payment3.txt >> GB/payment.txt
	mv GB/schemes.txt GB/scheme.txt
	# payment.txt ---
	mv GB/payment.txt GB/payment_bad.txt
	dos2unix GB/payment_bad.txt
	# Fix doubled country code in payment file
	sed 's/;"GBGB/;"GB/g' GB/payment_bad.txt > GB/payment.txt
	# recipient.txt ---
	# Manually corrected two lines 475111 475112 and 493001 493002 split into two (wrong number of columns)
	# scheme.txt ---
	mv GB/scheme.txt GB/scheme_bad.txt
	# Use scheme ID as english name if no name is provided
	sed 's/^\([^;]*\);;;/\1;;\1;/g' GB/scheme_bad.txt \| dos2unix > GB/scheme.txt
	printf '"GB200450302032";;"Unknown";;"GB"\n' >> GB/scheme.txt
	printf '"GB200450302052";;"Unknown";;"GB"\n' >> GB/scheme.txt