Created
October 2, 2018 04:54
-
-
Save SimonGoring/efd8df78673077f0137bcd31d84e2fb7 to your computer and use it in GitHub Desktop.
Loading yaml, csv, json and json-ld files in Python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Working with data formats\n", | |
"\n", | |
"Some work with YAML, CSV, JSON and JSON-LD" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{ 'gene': { 'annotation': { 'interaction': { 'genetic': '...',\n", | |
" 'physical': '...'},\n", | |
" 'ontology': { 'biological_process': ['...'],\n", | |
" 'molecular_function': [ { 'annotation_extension': '...',\n", | |
" 'evidence': { 'evidence_code': 'IEA',\n", | |
" 'with': 'UniProtKB-KW:KW-0067'},\n", | |
" 'reference': { 'id': 'GO_REF:0000037'},\n", | |
" 'term': { 'id': 'GO:0005524',\n", | |
" 'name': 'ATP '\n", | |
" 'binding'}},\n", | |
" { 'evidence': 'TAS',\n", | |
" 'reference': { 'author': 'JG, '\n", | |
" 'Goodrich '\n", | |
" 'KJ, '\n", | |
" 'Bähler '\n", | |
" 'J, '\n", | |
" 'Cech '\n", | |
" 'TR.',\n", | |
" 'citation': 'J '\n", | |
" 'Biol '\n", | |
" 'Chem '\n", | |
" '280:5249-5257 '\n", | |
" '2005',\n", | |
" 'id': 'PMID:15591066',\n", | |
" 'title': 'Expression '\n", | |
" 'of '\n", | |
" 'a '\n", | |
" 'RecQ '\n", | |
" 'helicase '\n", | |
" 'homolog '\n", | |
" 'affects '\n", | |
" 'progression '\n", | |
" 'through '\n", | |
" 'crisis '\n", | |
" 'in '\n", | |
" 'fission '\n", | |
" 'yeast '\n", | |
" 'lacking '\n", | |
" 'telomerase.'},\n", | |
" 'term': { 'id': 'GO:0043140',\n", | |
" 'name': 'ATP-dependent '\n", | |
" \"3'-5' \"\n", | |
" 'DNA '\n", | |
" 'helicase '\n", | |
" 'activity'}}]}},\n", | |
" 'gene_type': 'protein_coding',\n", | |
" 'location': '...',\n", | |
" 'name': 'tlh1',\n", | |
" 'organism': {'genus': 'Schizosaccharomyces', 'species': 'pombe'},\n", | |
" 'orthologs': None,\n", | |
" 'product': { 'name': 'RecQ type DNA helicase',\n", | |
" 'sequence': '...',\n", | |
" 'size': '297aa',\n", | |
" 'weight': '34.36kDa'},\n", | |
" 'protein_features': '...',\n", | |
" 'sequence': '...',\n", | |
" 'transcripts': [{'exons': ['...', '...'], 'uniquename': '...'}],\n", | |
" 'uniquename': 'SPAC212.11'}}\n" | |
] | |
} | |
], | |
"source": [ | |
"import pprint\n", | |
"import yaml\n", | |
"\n", | |
"pp = pprint.PrettyPrinter(indent=2)\n", | |
"\n", | |
"yaml_file = \"\"\"\n", | |
" gene:\n", | |
" uniquename: \"SPAC212.11\"\n", | |
" name: \"tlh1\"\n", | |
" organism:\n", | |
" genus: \"Schizosaccharomyces\"\n", | |
" species: \"pombe\"\n", | |
" product:\n", | |
" name: \"RecQ type DNA helicase\"\n", | |
" size: \"297aa\"\n", | |
" weight: \"34.36kDa\"\n", | |
" sequence: \"...\"\n", | |
" location: ...\n", | |
" transcripts:\n", | |
" - uniquename: ...\n", | |
" exons:\n", | |
" - ...\n", | |
" - ...\n", | |
" gene_type: \"protein_coding\"\n", | |
" annotation:\n", | |
" ontology:\n", | |
" molecular_function:\n", | |
" - term:\n", | |
" name: \"ATP binding\"\n", | |
" id: \"GO:0005524\"\n", | |
" evidence:\n", | |
" evidence_code: \"IEA\"\n", | |
" with: \"UniProtKB-KW:KW-0067\"\n", | |
" reference:\n", | |
" id: \"GO_REF:0000037\"\n", | |
" annotation_extension:\n", | |
" ...\n", | |
" - term:\n", | |
" name: \"ATP-dependent 3'-5' DNA helicase activity\"\n", | |
" id: \"GO:0043140\"\n", | |
" evidence: \"TAS\"\n", | |
" reference:\n", | |
" id: \"PMID:15591066\"\n", | |
" title: \"Expression of a RecQ helicase homolog affects progression through crisis in fission yeast lacking telomerase.\"\n", | |
" citation: \"J Biol Chem 280:5249-5257 2005\"\n", | |
" author: \"JG, Goodrich KJ, Bähler J, Cech TR.\"\n", | |
" biological_process:\n", | |
" - ...\n", | |
" interaction:\n", | |
" genetic:\n", | |
" ...\n", | |
" physical:\n", | |
" ...\n", | |
" protein_features:\n", | |
" ...\n", | |
" sequence:\n", | |
" ...\n", | |
" orthologs:\n", | |
"...\n", | |
"\"\"\"\n", | |
"\n", | |
"data = yaml.load(yaml_file)\n", | |
"pp.pprint(data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Importing CSV" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['firstcolumn', 'secondcolumn', 'thirdcolumn', 'fourthcolumn']\n", | |
"[' This Field', ' 12', ' That Field', ' 12.76']\n", | |
"[' children', ' 10', ' 12', ' peanut']\n" | |
] | |
} | |
], | |
"source": [ | |
"import csv\n", | |
"\n", | |
"csv_file = \"\"\"firstcolumn,secondcolumn,thirdcolumn,fourthcolumn\n", | |
" This Field, 12, That Field, 12.76\n", | |
" children, 10, 12, peanut\"\"\"\n", | |
"\n", | |
"csv_import = csv.reader(csv_file.split('\\n'), delimiter = ',')\n", | |
"\n", | |
"for rows in csv_import:\n", | |
" print(rows)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Importing JSON\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ { 'avgCostPrice': None,\n", | |
" 'costPrice': 0,\n", | |
" 'dividend': 0,\n", | |
" 'lastUpdate': '2017-10-08',\n", | |
" 'marketValue': 0.0,\n", | |
" 'marketValueDateTime': '2017-10-06T16:30:00.000Z',\n", | |
" 'marketValuePerUnit': 78.0,\n", | |
" 'marketValueSource': 'XOSL',\n", | |
" 'profit': -100.16,\n", | |
" 'realizedProfit': -100.16,\n", | |
" 'redeemedVolume': 0,\n", | |
" 'security': { 'isin': 'NO0003097503',\n", | |
" 'securityGroup': 'AK',\n", | |
" 'securityName': 'AKVA GROUP ASA',\n", | |
" 'securityName20': 'AKVA GROUP',\n", | |
" 'securityName34': 'AKVA GROUP ASA',\n", | |
" 'securityTicker': 'AKVA',\n", | |
" 'securityType': 'stock',\n", | |
" 'uri': 'json/0/securities/NO0003097503'},\n", | |
" 'tradeAmountMissingForCostPrice': False,\n", | |
" 'transactionFee': 100.16,\n", | |
" 'unrealizedProfit': 0.0,\n", | |
" 'uri': 'json/0/positions/csdAccounts/097141071619/securities/NO0003097503',\n", | |
" 'volume': 0.0}]\n" | |
] | |
} | |
], | |
"source": [ | |
"import json\n", | |
"\n", | |
"json_file = \"\"\"\n", | |
"[\n", | |
" {\n", | |
" \"profit\": -100.16,\n", | |
" \"costPrice\": 0,\n", | |
" \"realizedProfit\": -100.1600,\n", | |
" \"dividend\": 0,\n", | |
" \"lastUpdate\": \"2017-10-08\",\n", | |
" \"security\": {\n", | |
" \"securityName20\": \"AKVA GROUP\",\n", | |
" \"securityType\": \"stock\",\n", | |
" \"securityGroup\": \"AK\",\n", | |
" \"securityTicker\": \"AKVA\",\n", | |
" \"securityName\": \"AKVA GROUP ASA\",\n", | |
" \"uri\": \"json/0/securities/NO0003097503\",\n", | |
" \"securityName34\": \"AKVA GROUP ASA\",\n", | |
" \"isin\": \"NO0003097503\"\n", | |
" },\n", | |
" \"uri\": \"json/0/positions/csdAccounts/097141071619/securities/NO0003097503\",\n", | |
" \"redeemedVolume\": 0,\n", | |
" \"marketValueDateTime\": \"2017-10-06T16:30:00.000Z\",\n", | |
" \"marketValueSource\": \"XOSL\",\n", | |
" \"volume\": 0E-10,\n", | |
" \"marketValuePerUnit\": 78.0,\n", | |
" \"transactionFee\": 100.16,\n", | |
" \"unrealizedProfit\": 0.00,\n", | |
" \"marketValue\": 0.00,\n", | |
" \"avgCostPrice\": null,\n", | |
" \"tradeAmountMissingForCostPrice\": false\n", | |
"}]\"\"\"\n", | |
" \n", | |
"new_json = json.loads(json_file)\n", | |
"pp.pprint(new_json)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ { '@type': ['http://schema.org/Person'],\n", | |
" 'http://schema.org/address': [ { '@type': [ 'http://schema.org/PostalAddress'],\n", | |
" 'http://schema.org/addressLocality': [ { '@value': 'Vancouver'}],\n", | |
" 'http://schema.org/addressRegion': [ { '@value': 'BC'}],\n", | |
" 'http://schema.org/postalCode': [ { '@value': 'V5N4E8'}]}],\n", | |
" 'http://schema.org/alumniOf': [ {'@value': 'Simon Fraser University'},\n", | |
" { '@value': 'University of Northern '\n", | |
" 'British Columbia'},\n", | |
" { '@value': 'University of Wisconsin - '\n", | |
" 'Madison'}],\n", | |
" 'http://schema.org/colleague': [ { '@id': 'http://www.geography.wisc.edu/faculty/williams/lab/People.html'},\n", | |
" {'@id': 'http://www.andriadawson.org'}],\n", | |
" 'http://schema.org/email': [{'@value': 'mailto:[email protected]'}],\n", | |
" 'http://schema.org/familyName': [{'@value': 'Goring'}],\n", | |
" 'http://schema.org/givenName': [{'@value': 'Simon'}],\n", | |
" 'http://schema.org/image': [ { '@id': 'https://i1.rgstatic.net/ii/profile.image/AS%3A321585720823810%401453683418273_l/Simon_Goring.png'}],\n", | |
" 'http://schema.org/jobTitle': [{'@value': 'Assistant Scientist'}],\n", | |
" 'http://schema.org/name': [{'@value': 'Simon Goring'}],\n", | |
" 'http://schema.org/sameAs': [ { '@id': 'http://www.orcid.org/0000-0002-2700-4605#person'},\n", | |
" {'@id': 'http://twitter.com/sjGoring'},\n", | |
" {'@id': 'http://github.com/SimonGoring'}],\n", | |
" 'http://schema.org/url': [{'@id': 'http://www.goring.org'}],\n", | |
" 'http://schema.org/worksFor': [ { '@value': 'University of Wisconsin - '\n", | |
" 'Madison'}]}]\n" | |
] | |
} | |
], | |
"source": [ | |
"import pyld\n", | |
"\n", | |
"jsonld_file = \"\"\"\n", | |
" {\n", | |
" \"@context\": \"http://schema.org\",\n", | |
" \"@type\": \"Person\",\n", | |
" \"address\": {\n", | |
" \"@type\": \"PostalAddress\",\n", | |
" \"addressLocality\": \"Vancouver\",\n", | |
" \"addressRegion\": \"BC\",\n", | |
" \"postalCode\": \"V5N4E8\"\n", | |
" },\n", | |
" \"colleague\": [\n", | |
" \"http://www.geography.wisc.edu/faculty/williams/lab/People.html\",\n", | |
" \"http://www.andriadawson.org\"\n", | |
" ],\n", | |
" \"email\": \"mailto:[email protected]\",\n", | |
" \"image\": \"https://i1.rgstatic.net/ii/profile.image/AS%3A321585720823810%401453683418273_l/Simon_Goring.png\",\n", | |
" \"jobTitle\": \"Assistant Scientist\",\n", | |
" \"name\": \"Simon Goring\",\n", | |
" \"familyName\": \"Goring\",\n", | |
" \"givenName\": \"Simon\",\n", | |
" \"worksFor\": \"University of Wisconsin - Madison\",\n", | |
" \"alumniOf\": [\"Simon Fraser University\", \"University of Northern British Columbia\", \"University of Wisconsin - Madison\"],\n", | |
" \"url\": \"http://www.goring.org\",\n", | |
" \"sameAs\" : [ \"http://www.orcid.org/0000-0002-2700-4605#person\",\n", | |
" \"http://twitter.com/sjGoring\",\n", | |
" \"http://github.com/SimonGoring\"]\n", | |
" }\"\"\"\n", | |
"\n", | |
"ld_in = json.loads(jsonld_file)\n", | |
"\n", | |
"pp.pprint(pyld.jsonld.expand(ld_in))\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment