Skip to content

Instantly share code, notes, and snippets.

@ankitml
Created January 7, 2016 07:21
Show Gist options
  • Save ankitml/973057094b3a2f1fc49f to your computer and use it in GitHub Desktop.
Save ankitml/973057094b3a2f1fc49f to your computer and use it in GitHub Desktop.
CSV transformations
from true_type import get_type
import csv
def get_percentile(data_list, score, kind='weak'):
"""
The percentile rank of a score relative to a list of scores.
A percentile of, for example, 80 percent means that 80 percent of the
scores in the data_list are below the given score.
In the case of gaps or ties, the exact definition depends on the type
of the calculation stipulated by the kind keyword argument.
This function is a modification of scipy.stats.percentileofscore. The
only major difference is that I eliminated the numpy dependency, and
omitted the rank kwarg option until I can get more time to translate
the numpy parts out.
h3. Parameters
* data_list: list
* A list of scores to which the score argument is compared.
* score: int or float
* Value that is compared to the elements in the data_list.
* kind: {'rank', 'weak', 'strict', 'mean'}, optional
* This optional parameter specifies the interpretation of the resulting score:
* "weak": This kind corresponds to the definition of a cumulative
distribution function. A percentileofscore of 80%
means that 80% of values are less than or equal
to the provided score.
* "strict": Similar to "weak", except that only values that are
strictly less than the given score are counted.
* "mean": The average of the "weak" and "strict" scores, often used in
testing. See
h3. Documentation
* "Percentile rank":http://en.wikipedia.org/wiki/Percentile_rank
* "scipy.stats":http://www.scipy.org/SciPyPackages/Stats
Example usage::
Three-quarters of the given values lie below a given score:
>>> percentileofscore([1, 2, 3, 4], 3)
75.0
Only 2/5 values are strictly less than 3:
>>> percentile([1, 2, 3, 3, 4], 3, kind='strict')
40.0
But 4/5 values are less than or equal to 3:
>>> percentile([1, 2, 3, 3, 4], 3, kind='weak')
80.0
The average between the weak and the strict scores is
>>> percentile([1, 2, 3, 3, 4], 3, kind='mean')
60.0
"""
n = len(data_list)
if kind == 'strict':
return len([i for i in data_list if i < score]) / float(n) * 100
elif kind == 'weak':
return len([i for i in data_list if i <= score]) / float(n) * 100
elif kind == 'mean':
return (len([i for i in data_list if i < score]) + len([i for i in data_list if i <= score])) * 50 / float(n)
else:
raise ValueError("The kind kwarg must be 'strict', 'weak' or 'mean'. You can also opt to leave it out and rely on the default method.")
def assignments():
csv_file = open('assignments.csv')
headers_list = csv_file.next().strip().split(',')
headers_list.append('assignments_submitted')
lines = [headers_list]
for line in csv_file:
splits = line.split(',')
splits = [f.strip() for f in splits]
zeroes = len([f for f in splits if f =='0'])
assignments_submitted = 3 - zeroes
splits.append(str(assignments_submitted))
lines.append(splits)
csv_file.close()
with open('qq.csv', 'w') as fp:
a = csv.writer(fp, delimiter=',')
a.writerows(lines)
def questions():
csv_file = open('questions.csv')
headers_list = csv_file.next().strip().split(',')
headers_list.append('questions_attempted')
lines = [headers_list]
for line in csv_file:
splits = line.split(',')
splits = [f.strip() for f in splits]
zeroes = len([f for f in splits if f =='0'])
questions_submitted = 14 - zeroes
splits.append(str(questions_submitted))
lines.append(splits)
csv_file.close()
with open('qqq.csv', 'w') as fp:
a = csv.writer(fp, delimiter=',')
a.writerows(lines)
def final():
csv_file = open('grades.csv')
headers_list = csv_file.next().strip().split(',')
headers_list.append('percentile')
lines = [headers_list]
final_lines = [headers_list]
scores = []
for line in csv_file:
splits = line.split(',')
splits = [f.strip() for f in splits]
scores.append(int(splits[11]))
lines.append(splits)
for splits in lines[1:]:
score_percentile = get_percentile(scores, int(splits[11]))
splits.append(score_percentile)
final_lines.append(splits)
#splits.append(str(questions_submitted))
#lines.append(splits)
csv_file.close()
with open('ggg.csv', 'w') as fp:
a = csv.writer(fp, delimiter=',')
a.writerows(final_lines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment