import string
def printWst(s):
lookup = string.ascii_uppercase
b = [x.split() for x in input.split('\n')]
b = filter(None, b)
rst = {}
for i, x in enumerate(b):
key, value = x[0], x[1]
if i == 0:
print key, value
else:
current = lookup.index(value)
if key not in rst or (key in rst and current > lookup.index(rst[key])):
rst[key] = value
for k in sorted(rst):
print k, rst[k]
if __name__ == "__main__":
test1 = """
Account_number Flag
1 A
1 B
2 A
2 B
2 C
3 A
3 C
"""
printWst(test1)
#!/usr/bin/env python
# coding=utf-8
from string import ascii_uppercase
def lookup(x):
return ascii_uppercase.index(x)
with open('c:/temp/test.txt') as infile:
carry = ['-1', '99']
for i, line in enumerate(infile):
lineLst = line.split()
key, value = lineLst[0], lineLst[1]
if key != carry[0]:
if i != 0:
print carry
carry = lineLst
else:
if lookup(value) > lookup(carry[1]):
carry[1] = value
print carry
Want to find the rating for each account number.
sample.txt
Account_number Rating
1 A
1 B
2 A
2 B
2 C
3 A
3 C
import pyspark
sc = pyspark.SparkContext()
rdd = sc.textFile('sample.txt')
result = rdd.map(lambda x: x.split()).filter(x: x[0].isdigit()).reduceByKey(max)
for x in result.collect():
print x
sc.stop()