Skip to content

Instantly share code, notes, and snippets.

@mikkkee
Created July 1, 2015 03:42
Show Gist options
  • Save mikkkee/cb9c76167940590b86dd to your computer and use it in GitHub Desktop.
Save mikkkee/cb9c76167940590b86dd to your computer and use it in GitHub Desktop.
How to parse a file using Python
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 16, "cell_type": "code", "source": "# How file.readlines() works.\n# Suppose we have a input file with content as the string test represents, that is, 'abc\\ndef\\n123\\n'.\n\nimport StringIO\n\ntest = 'abc\\ndef\\n123\\n'\n\n# Convert a string into a StringIO object, which can be read / write as files.\ntest = StringIO.StringIO(test) \n\n# file.readlines() can read file content by line into a list of strings.\nlines = test.readlines()\n\nprint lines\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "['abc\\n', 'def\\n', '123\\n']\n\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "# Read and parse input file into chunks of libraries.\n\ndef read_lib(fileopened):\n \"\"\"Read input data file and return a list of libraries for further processing.\"\"\"\n # Init output list.\n output = []\n current_lib = []\n \n # Read fileopened by line.\n lines = fileopened.readlines()\n \n # Iterate through lines to parse template libraries.\n for line in lines:\n # Discard leading and trailing whitespace characters in each line,\n # e.g. ' abc\\n'.strip() returns 'abc'\n line = line.strip()\n \n # Detect beginning of new library template.\n if line.startswith('Template Library ID'):\n # Append previous library to output.\n output.append(current_lib)\n # Start new library from this line.\n current_lib = [line]\n else: # Non-library headers of library contents.\n if line: # line may be empty string after discarding whitespaces.\n current_lib.append(line)\n \n # Append last library to output.\n output.append(current_lib)\n \n return output", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 18, "cell_type": "code", "source": "# Sample run using sample.dat\n\nsample = \"\"\"\nThis is header area\nHeader tells you something\n\nTemplate Library ID: 1\n=======================\nNumber of Friends: 3\nNumber of Enemies: 1\n\nLevel: 15\n\n\nTemplate Library ID: 2\n=======================\nNumber of Friends: 5\nNumber of Enemies: 3\n\nLevel: 20\n\"\"\"\n\nimport StringIO\n\nsample = StringIO.StringIO(sample)\n\nlibraries = read_lib(sample)\n\nprint libraries[0] # Header\nprint libraries[1] # Template Library 1\nprint libraries[2] # Template Library 2\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "['This is header area', 'Header tells you something']\n['Template Library ID: 1', '=======================', 'Number of Friends: 3', 'Number of Enemies: 1', 'Level: 15']\n['Template Library ID: 2', '=======================', 'Number of Friends: 5', 'Number of Enemies: 3', 'Level: 20']\n\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 31, "cell_type": "code", "source": "# Further processing\n# We can write parsed libraries into separated files or into Python Classes.\n\n# Write to file.\nlib1 = open('lib1.dat', 'w')\nfor line in libraries[1]:\n lib1.write(line + '\\n')\nlib1.close()\n\n\n# Parse into Classes\nclass TemplateLibrary(object):\n def __init__(self, lib):\n self.lib_id = int(lib[0].split()[-1])\n self.n_friends = int(lib[2].split()[-1])\n self.n_enemies = int(lib[3].split()[-1])\n self.level = int(lib[4].split()[-1])\n \n def __repr__(self):\n return 'Template Library ' + str(self.lib_id)\n \n \nlib_classes = []\nfor lib in libraries[1:]: # Discard nonl-library header\n lib_classes.append(TemplateLibrary(lib))\n \nprint lib_classes\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[Template Library 1, Template Library 2]\n\n\n-----------------------\nContent of lib1.dat:\n\nTemplate Library ID: 1\n=======================\nNumber of Friends: 3\nNumber of Enemies: 1\nLevel: 15\n"}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment