Created
May 16, 2014 14:12
-
-
Save 1328/9263ae8cfb4cdd342461 to your computer and use it in GitHub Desktop.
Poor mans csv readers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from pprint import pprint | |
| def option1(fn): | |
| '''the screwy quoting break the csv module, so we just have to do it ourselves''' | |
| data = [] | |
| with open(fn, mode='r') as fh: | |
| for r in fh: | |
| if r: | |
| data.append(r.strip().split(',')) | |
| return data | |
| '''generators are just like functions, except they return piece by piece''' | |
| '''this is really useful in processing files''' | |
| '''so let's do this again, using a generator to slice through the file''' | |
| def gen_option2(fn): | |
| '''generator for option 2''' | |
| with open(fn, mode='r') as fh: | |
| for r in fh: | |
| if r: | |
| yield [i.strip('[\'\"]') for i in r.strip().split(',')] | |
| def option2(fn): | |
| '''this option uses a generator to make things a bit cleaner''' | |
| data = [] | |
| for row in gen_option2(fn): | |
| data.append(row) | |
| return data | |
| '''ok, now let's really clean up the data''' | |
| '''this option uses a helper function cleaner(item) that cleans item''' | |
| '''the cleaner function takes in a string and: | |
| 1. changes blanks to a Nonetype | |
| 2. changes strings that are floats, like "1.23" into actual floats, e.g. 1.23 | |
| 3. changes ints into ints, so if 1.0 == 1, it returns just int(1) | |
| 4. removes extra spaces from strings | |
| ''' | |
| def gen_option3(fn): | |
| '''generator for option 3''' | |
| '''just like option 2, but uses cleaner on each (i)''' | |
| with open(fn, mode='r') as fh: | |
| for r in fh: | |
| if r: | |
| yield [cleaner(i) for i in r.strip().split(',')] | |
| def cleaner(i): | |
| '''this is the cleaner function described above''' | |
| i = i.strip('[\'\"]') | |
| if not i: | |
| return None | |
| try: | |
| i = float(i) | |
| if int(i) == i: | |
| i = int(i) | |
| except ValueError: | |
| # ok we have a string | |
| # let's get rid of excess spaces | |
| i = re.sub('\s+',' ',i) | |
| return i | |
| def option3(fn): | |
| '''ok, let's extend the generator and add in a cleaner function''' | |
| '''to pretty things up some more''' | |
| data = [] | |
| for row in gen_option3(fn): | |
| data.append(row) | |
| return data | |
| def main(): | |
| x = option3('FILE2.COMPAS') | |
| x = option3('file1.REAL') | |
| pprint(x) | |
| print(x[0][0]) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment