Last active
January 25, 2021 13:27
-
-
Save drcjar/d993b80c464020445e45ad4049a76b70 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_table('allen_et_al_2019_ipf_meta_gwas_summary_statistics_public_download.txt', sep=' ')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df1 = pd.read_csv('smoking_exposure_snps.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df1.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df2 = df[df.rsid.isin(df1['SNP'])]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df3 = df1[df1['SNP'].isin(df2['rsid'])]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df2 = df2.sort_values(by='rsid')\n", | |
"df3 = df3.sort_values(by='SNP')\n", | |
"df3['rsid'] = df3['SNP']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert df2['rsid'].values.all() == df3['SNP'].values.all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df4 = pd.merge(df2, df3, on='rsid')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df4.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df4.to_csv('2samplemr.csv',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# how R TwoSampleMR library would like data formatted\n", | |
"\n", | |
"# outcome_dat: - SNP - beta.outcome - se.outcome - effect_allele.outcome - other_allele.outcome - eaf.outcome - outcome\n", | |
"\n", | |
"# exposure_dat: - SNP - beta.exposure - se.exposure - effect_allele.exposure - other_allele.exposure - eaf.exposure\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat = df4[['rsid', 'chromosome', 'position', 'non_effect_allele', 'effect_allele',\n", | |
" 'effect_allele_frequency', 'studies_included',\n", | |
" 'imputation_quality_by_study', 'effect_direction_by_study', 'beta',\n", | |
" 'standard_error', 'p']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"exposure_dat = df4[['SNP', 'CHR', 'BP', 'EA', 'NonEA', 'EAF', 'Beta', 'SE', 'p-value']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat = outcome_dat[['rsid', 'beta', 'standard_error', 'effect_allele', 'non_effect_allele', 'effect_allele_frequency']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat.columns = ['SNP', 'beta.outcome', 'se.outcome', 'effect_allele.outcome', 'other_allele.outcome', 'eaf.outcome']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat['outcome'] = 'IPF'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat['id.outcome'] = 'IPF'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"exposure_dat = exposure_dat[['SNP', 'Beta', 'SE', 'EA', 'NonEA', 'EAF']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"exposure_dat.columns = ['SNP', 'beta.exposure', 'se.exposure', 'effect_allele.exposure', 'other_allele.exposure', 'eaf.exposure']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"exposure_dat['id.exposure'] = 'smoking'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"exposure_dat['exposure'] = 'smoking'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat.isnull().sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"outcome_dat.to_csv('outcome_dat.csv', index=False)\n", | |
"exposure_dat.to_csv('exposure_dat.csv', index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment