Created
July 1, 2015 03:42
-
-
Save mikkkee/cb9c76167940590b86dd to your computer and use it in GitHub Desktop.
How to parse a file using Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"execution_count": 16, "cell_type": "code", "source": "# How file.readlines() works.\n# Suppose we have a input file with content as the string test represents, that is, 'abc\\ndef\\n123\\n'.\n\nimport StringIO\n\ntest = 'abc\\ndef\\n123\\n'\n\n# Convert a string into a StringIO object, which can be read / write as files.\ntest = StringIO.StringIO(test) \n\n# file.readlines() can read file content by line into a list of strings.\nlines = test.readlines()\n\nprint lines\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "['abc\\n', 'def\\n', '123\\n']\n\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "# Read and parse input file into chunks of libraries.\n\ndef read_lib(fileopened):\n \"\"\"Read input data file and return a list of libraries for further processing.\"\"\"\n # Init output list.\n output = []\n current_lib = []\n \n # Read fileopened by line.\n lines = fileopened.readlines()\n \n # Iterate through lines to parse template libraries.\n for line in lines:\n # Discard leading and trailing whitespace characters in each line,\n # e.g. ' abc\\n'.strip() returns 'abc'\n line = line.strip()\n \n # Detect beginning of new library template.\n if line.startswith('Template Library ID'):\n # Append previous library to output.\n output.append(current_lib)\n # Start new library from this line.\n current_lib = [line]\n else: # Non-library headers of library contents.\n if line: # line may be empty string after discarding whitespaces.\n current_lib.append(line)\n \n # Append last library to output.\n output.append(current_lib)\n \n return output", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 18, "cell_type": "code", "source": "# Sample run using sample.dat\n\nsample = \"\"\"\nThis is header area\nHeader tells you something\n\nTemplate Library ID: 1\n=======================\nNumber of Friends: 3\nNumber of Enemies: 1\n\nLevel: 15\n\n\nTemplate Library ID: 2\n=======================\nNumber of Friends: 5\nNumber of Enemies: 3\n\nLevel: 20\n\"\"\"\n\nimport StringIO\n\nsample = StringIO.StringIO(sample)\n\nlibraries = read_lib(sample)\n\nprint libraries[0] # Header\nprint libraries[1] # Template Library 1\nprint libraries[2] # Template Library 2\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "['This is header area', 'Header tells you something']\n['Template Library ID: 1', '=======================', 'Number of Friends: 3', 'Number of Enemies: 1', 'Level: 15']\n['Template Library ID: 2', '=======================', 'Number of Friends: 5', 'Number of Enemies: 3', 'Level: 20']\n\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 31, "cell_type": "code", "source": "# Further processing\n# We can write parsed libraries into separated files or into Python Classes.\n\n# Write to file.\nlib1 = open('lib1.dat', 'w')\nfor line in libraries[1]:\n lib1.write(line + '\\n')\nlib1.close()\n\n\n# Parse into Classes\nclass TemplateLibrary(object):\n def __init__(self, lib):\n self.lib_id = int(lib[0].split()[-1])\n self.n_friends = int(lib[2].split()[-1])\n self.n_enemies = int(lib[3].split()[-1])\n self.level = int(lib[4].split()[-1])\n \n def __repr__(self):\n return 'Template Library ' + str(self.lib_id)\n \n \nlib_classes = []\nfor lib in libraries[1:]: # Discard nonl-library header\n lib_classes.append(TemplateLibrary(lib))\n \nprint lib_classes\nprint\nprint", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[Template Library 1, Template Library 2]\n\n\n-----------------------\nContent of lib1.dat:\n\nTemplate Library ID: 1\n=======================\nNumber of Friends: 3\nNumber of Enemies: 1\nLevel: 15\n"}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment