Skip to content

Instantly share code, notes, and snippets.

@drcjar
Last active September 10, 2019 08:57
Show Gist options
  • Save drcjar/ec4a5c061e5cb06319c3aed1c5035030 to your computer and use it in GitHub Desktop.
Save drcjar/ec4a5c061e5cb06319c3aed1c5035030 to your computer and use it in GitHub Desktop.
vyshnavi_work
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('vyshnavi.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['PatientRK', 'Typical HRCT pattern Y/N',\n",
" 'What the pattern is (UIP/NSIP/CPFE/OP/Other (specify))',\n",
" 'Radiologist impression based on CT (HP, CHP, other (specify))',\n",
" 'inducer (N/Y (what?)', 'r/o tb/ntm', 'ssig (N/Y(what?)',\n",
" 'bx (N/Y(result)', 'dx from letters (HP, CHP, other (specify)'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"650"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['PatientRK'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True 0.695385\n",
"False 0.304615\n",
"Name: What the pattern is (UIP/NSIP/CPFE/OP/Other (specify)), dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['What the pattern is (UIP/NSIP/CPFE/OP/Other (specify))'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"UIP 64\n",
"NSIP 44\n",
"NSIP/UIP 16\n",
"FIBROTIC NSIP 8\n",
"UIP 8\n",
"OP 6\n",
"FIBROSING OP 5\n",
"fibrosing NSIP 4\n",
"DIP 3\n",
"FIBROSING NSIP 2\n",
"fibrotic NSIP 2\n",
"UIP/IPF 2\n",
"FIBROTIC NSIP/UIP 2\n",
"IPF/UIP 2\n",
"NSIP 2\n",
"NSIP/OP 2\n",
"UIP/OP 1\n",
"atypical UIP OR possibility of a NSIP/OP overlap 1\n",
"OP/NSIP 1\n",
"Fibrotic NSIP with superadded OP 1\n",
"MILD NSIP 1\n",
"NSIP/LIP 1\n",
"IDIOPATHIC UIP/IPF 1\n",
"fibrotic NSIP related connective tissue disease 1\n",
"Interstitial lung disease with a degree of fibrosis + OP 1\n",
"POSSIBLE UIP 1\n",
"OP/NSIP overlap 1\n",
"SARCODOSIS/CHP 1\n",
"fibrotic NSIP and UIP 1\n",
"SUBPLEURAL UIP PATTERN 1\n",
"NIP/UIP 1\n",
"rapidly deteriorating IPF/UIP 1\n",
"FIBROTIC NSIP/ATYPICAL UIP 1\n",
"UIP/IPF or fibrosing NSIP cannot be excluded 1\n",
"DRUG-RELATED NSIP 1\n",
"POSSIBLE NSIP 1\n",
"LIP 1\n",
"OP/FIBROSING NSIP 1\n",
"OP/fibrotic NSIP 1\n",
"probable NSIP 1\n",
" OP 1\n",
"UIP/NSIP 1\n",
"Name: What the pattern is (UIP/NSIP/CPFE/OP/Other (specify)), dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['What the pattern is (UIP/NSIP/CPFE/OP/Other (specify))'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 0.950769\n",
"True 0.049231\n",
"Name: Radiologist impression based on CT (HP, CHP, other (specify)), dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Radiologist impression based on CT (HP, CHP, other (specify))'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CHP 180\n",
"HP 129\n",
"POSSIBLE HP 25\n",
"NO EVIDENCE OF HP 18\n",
"SARCOIDOSIS/HP 18\n",
"POSSIBLE CHP 18\n",
"SUBACUTE HP 15\n",
"SARCOIDOSIS/CHP 10\n",
"FIBROSING HP 8\n",
"POSSIBLE HP 7\n",
"FIBROTIC HP 6\n",
"SARCOIDOSIS 5\n",
"CHP/SARCOIDOSIS 5\n",
"NO EVIDENCE OF HP 4\n",
"FIBROTIC CHP 4\n",
"HP (LESS LIKELY) 4\n",
"NO EVIDENCE OF CHP 4\n",
"HP 3\n",
"ILD 3\n",
"HP/PAP overlap 3\n",
"RB-ILD/HP 3\n",
"FIBROSING HP 3\n",
"CHP (LESS LIKELY) 3\n",
"HP/LIP 3\n",
"PULMONARY HYPERTENSION 2\n",
"IPF/CHP 2\n",
"CHP/DRUG RELATED FIBROSIS 2\n",
"POSSIBLE SUBACUTE HP 2\n",
"POSSIBLE CHP 2\n",
"RBILD/HP 2\n",
" ... \n",
"RB-ILD/DIP and HP 1\n",
"possibility of HP/very mild RB-ILD 1\n",
"LIP/HP 1\n",
"RBILD/SUBACUTE HP 1\n",
"SARCODOSIS/CHP 1\n",
"obliterative bronchiolitis / HP 1\n",
"CHP/drug reaction secondary to amiodarone therapy 1\n",
"HP/SARCOIDOSIS/LIP 1\n",
" RB-ILD + subacute HP 1\n",
"CHP/PULMONARY HYPERTENSION 1\n",
"POSSIBLE NON-FIBROTIC CHP 1\n",
"LIP 1\n",
"ongoing subacute HP/ progressive fibrotic HP 1\n",
"subacute HP/ respiratory bronchiolitis 1\n",
"HP LESS LIKELY 1\n",
"FIBROTIC OP/UIP 1\n",
"pleuroparenchymal fibroelastosis 1\n",
"HP-pulmonary alveolar proteinosis overlap 1\n",
"CHP/CTD 1\n",
"surfactant deficiency or hypersensitivity pneumonitis 1\n",
"SUBACUTE HP 1\n",
"SUBACUTE/CHP 1\n",
"widespread pulmonary fibrosis 1\n",
" atypical mycobacterium/ possibility of a vasculitis 1\n",
"fibrotic interstitial lung disease/HP 1\n",
"SUBACUTE CHP 1\n",
" connective tissue disease/CHP 1\n",
"SUBACUTE HP/PULMONARY HAEMORRHAGE 1\n",
"RBILD/ACUTE HP 1\n",
"FIBROTIC IIP/HP 1\n",
"Name: Radiologist impression based on CT (HP, CHP, other (specify)), Length: 154, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Radiologist impression based on CT (HP, CHP, other (specify))'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 0.98\n",
"True 0.02\n",
"Name: inducer (N/Y (what?), dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['inducer (N/Y (what?)'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"N 301\n",
"Y - BIRDS 11\n",
"Y - EX-SMOKER 11\n",
"Y - MOULD 10\n",
"Y - ASBESTOS 5\n",
"Y - DUST 5\n",
"Y - SMOKER 5\n",
"Y - PARROT 4\n",
"Y 4\n",
"Y - EX SMOKER 4\n",
"Y - POSSIBLE EXPOSURE TO BIRDS 3\n",
"Y - POSSIBLE EXPOSURE TO PARROTS 3\n",
"Y - COMPOST 3\n",
"Y - FEATHER PILLOWS 3\n",
"Y - HORSES + HAY 3\n",
"Y- MOULD AND DAMP 2\n",
"Y - PARROTS 2\n",
"Y - CHILDHOOD EXPOSURE TO BIRDS 2\n",
"Y - POSSIBLE EXPOSURE TO ASBESTOS 2\n",
"Y - HORSES 2\n",
"Y - CHICKENS 2\n",
"Y - MOULD + BIRDS 2\n",
"Y - BIRD 2\n",
"Y - POSSIBLE DAMPNESS 2\n",
"Y - POSSIBLE EXPOSURE TO DAMP 2\n",
"Y - DAMP + MOULD 2\n",
"Y - EXPOSURE TO ASBESTOS + DAMP/MOULD 1\n",
"Y - MOULD/DAMP AND POSSIBLE LEAKING DIESEL 1\n",
"Y - MOULD + DUST 1\n",
"Y -POSSIBLE EXPOSURE TO BIRDS/COMPOST 1\n",
" ... \n",
"Y - BIRDS, FEATHER DUVETS 1\n",
"Y - BIRDS; COMPOST 1\n",
"Y - FARMER'S LUNG - WEED KILLER/wet and mouldy silage 1\n",
"Y- EX-SMOKER; FEATHER PILLOWS + DUVETS 1\n",
"Y - POSSIBLE EXPOSURE TO SPICES 1\n",
"Y - COTTON + INORGANIC DUST 1\n",
"Y - EX-SMOKER; ASBESTOS; ALUMINIUM, LEAD + ZINC 1\n",
"Y - AVIAN + FIRE 1\n",
"Y - ACETATE; BUDGIE; ASBESTOS 1\n",
"Y - epoxy-resin systems, aluminum, sulfuric acid, acid anhydrides, isocyanates and asbestos, cockatiel + COMPOST ODOURS 1\n",
"Y - EXPOSURE TO SAND 1\n",
"Y - EXPOSURE TO PARROT/DIRT/COMPOST 1\n",
"Y - MOULD; wet clay, paints, dyes and glazing; COMPOST 1\n",
"Y - ASBESTOS + BIRDS 1\n",
"Y - SMOKER + fibreglass dust 1\n",
"Y - EX-SMOKER; BIRDS; METAL DUST; DAMP 1\n",
"Y - POSSIBLE ASBESTOS EXPOSURE 1\n",
"Y - EXPOSURE TO PIGEONS + CANARIES 1\n",
"Y- GOOSE FEATHER DUVET 1\n",
"Y - BUDGIES 1\n",
"Y - PIGEONS 1\n",
"Y - WILD BIRDS (E.G. PIGEONS) 1\n",
"Y - MOULD + SPICES 1\n",
"Y - EX-SMOKER; cleaning products, glues, paper dust 1\n",
"Y - metal dusts, rubber fumes, sand mists and noxious chemicals 1\n",
"Y - EXPOSURE TO EXHAUST FUMES 1\n",
"Y - EX-SMOKER; ASBESTOS 1\n",
"Y - cleaning product fumes; DAMP + MOULD 1\n",
"Y - SOME EXPOSURE TO FUMES 1\n",
"MOULD EXPOSURE 1\n",
"Name: inducer (N/Y (what?), Length: 266, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['inducer (N/Y (what?)'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True 0.972308\n",
"False 0.027692\n",
"Name: r/o tb/ntm, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['r/o tb/ntm'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"History of TB 15\n",
" 1\n",
"Y 1\n",
"identified MTB 1\n",
"Name: r/o tb/ntm, dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['r/o tb/ntm'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 0.975385\n",
"True 0.024615\n",
"Name: ssig (N/Y(what?), dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['ssig (N/Y(what?)'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"N 583\n",
"N 6\n",
"Y - AVIAN 4\n",
"Y - AVIAN + ASPERGILLUS 3\n",
"Y- positive avian precipitins 2\n",
"pigeon serum feathers+ droppings+ budgerigar droppings and feathers 1\n",
"Cardiolipin IgM antibodies  1\n",
"Y - positive PR3 antibody 1\n",
"Y- THERMOACTINOMYCES 1\n",
"Y - Aspergillus IgG 1\n",
"Y - positive avian precipitins 1\n",
"Y - AVIAN PRECIPITANTS 1\n",
"Y - ASPERGILLUS PRECIPITANTS 1\n",
"Y - anti-Ro52 1\n",
"Y - weakly positive aspergillus precipitins 1\n",
"POSITIVE AVIUM PRECIPITINS 1\n",
"Y- high anti-Aspergillus precipitins 1\n",
"Y - positive aspergillus IgG 1\n",
"Y - ASPERGILLUS 1\n",
"ENA positive weekly ANCA positive 1\n",
"Y - antinuclear antibody, and avian specific IgG levels 1\n",
"Y - positive Farmers lung mix precipitins 1\n",
"Y - mycobacterial avium intracellulare 1\n",
"Y - HIGH IGG VALUES FOR AL AVIAN PRECIPITANS 1\n",
"Y - positive test for Aspergillus IgG 1\n",
"Positive anti-CCP antibodies 1\n",
"Y - high aspergillus IGG 1\n",
"Y - positive aspergillus precipitins 1\n",
"Y - aspergillus 1\n",
"Y - ASPERGILLUS 1\n",
"Y - avian-specific IgG antibodies 1\n",
"Y - Aspergillus IgG levels were markedly raised 1\n",
"Y- AVIAN + ASPERGILLUS 1\n",
"Y - positive aspergillus IgG (145) 1\n",
"Y - pANCA antibodies 1\n",
"Positive avian precipitins 1\n",
"Y - AVIAN PRECIPITANTS POSITIVE FOR PARROT, CANARY, PARAKEET ETC. 1\n",
"Y -positive precipitins against a few moulds 1\n",
"Y - equivocal micropolyspora foeni antibodies 1\n",
"Y - DUST + CAT DANDER 1\n",
"P-ANCA positive 1\n",
"Name: ssig (N/Y(what?), dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['ssig (N/Y(what?)'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 641\n",
"True 9\n",
"Name: bx (N/Y(result), dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['bx (N/Y(result)'].isnull().value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"N 285\n",
"Y - NORMAL 100\n",
"Y 36\n",
"Y - CONFIRMED DIAGNOSIS OF HP 14\n",
"Y - N/A 7\n",
"Y -NORMAL 6\n",
"Y - UIP PATTERN 6\n",
"Y - NORMAL 6\n",
"Y 5\n",
"Y - UIP 4\n",
"Y - CONSISTENT WITH HP 4\n",
"Y- NORMAL 4\n",
"Y - HP 4\n",
"Y - INFLAMMATORY PATTERN 3\n",
"Y - INCONCLUSIVE 3\n",
"Y - OP 3\n",
"Y - EAA 3\n",
"Y - NAD 2\n",
"Y - Upper airways inflammed 2\n",
"Y -ILD 2\n",
"Y - EVIDENCE OF HP 2\n",
"Y -HP 2\n",
"Y - NSIP 2\n",
"Y - alveolar proteinosis 2\n",
"Y - CONFIRMED DIAGNOSIS OF SARCOIDOSIS 2\n",
"Y - UNREMARKABLE 2\n",
"Y-NORMAL 2\n",
"Y - INTERSTITIAL PNEUMONITIS 1\n",
"Y - POSSIBILITY OF HP 1\n",
"Y - chronic inflammatory infiltrate with giant cells 1\n",
" ... \n",
"Y - features of HP 1\n",
"Y -UIP (focal bronchocentric chronic inflammation) 1\n",
"Y - respiratory bronchiolitis interstitial lung disease 1\n",
"Y - non-caseating granulomatous inflammation 1\n",
"Y - fibrous scars 1\n",
"Y - consistent with UIP with some elements of NSIP 1\n",
"Y - distorted anatomy due to volume loss of right hemithorax 1\n",
"Y - NIL OF NOTE 1\n",
"Y - LIP 1\n",
"Y - POSSIBLE ADENOCARCINOMA + HP 1\n",
"Y-HP 1\n",
"Y - Mild endobronchial inflammation 1\n",
"Y - SUGGESTS HP 1\n",
"Y - chronic inflammation with a bronchiolitic picture and some fibrosis 1\n",
"Y - Minor endobronchial inflammation only 1\n",
"Y -interstitial pneumonia pattern of fibrosis 1\n",
"Y - fibrosing alveolitis 1\n",
"Y - eosinophilia 1\n",
"Y - SMALL AIRWAY DISEASE 1\n",
"Y - fibrosing interstitial process 1\n",
"Y - NSIP/CHP 1\n",
"Y - HP 1\n",
"Y - UIP/IPF 1\n",
"Y -UIP type pattern 1\n",
"Y - respiratory bronchiolitis associated ILD and emphysema 1\n",
"Y - Tracheobronchomalacia 1\n",
"Y - MILD INFLAMMATION 1\n",
"Y -SUGGESTS OP/NSIP 1\n",
"Y - Modest inflammatory changes in bronchial tree 1\n",
"Y - NSIP/HP 1\n",
"Name: bx (N/Y(result), Length: 155, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['bx (N/Y(result)'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 0.978462\n",
"True 0.021538\n",
"Name: dx from letters (HP, CHP, other (specify), dtype: float64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['dx from letters (HP, CHP, other (specify)'].isnull().value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"HP 153\n",
"CHP 104\n",
"IPF 53\n",
"FIBROTIC HP 26\n",
"ILD 20\n",
"SARCOIDOSIS 12\n",
"PULMONARY SARCOIDOSIS 9\n",
"FIBROTIC NSIP 8\n",
"FIBROTIC HP 7\n",
"SUBACUTE HP 7\n",
"CTD-ILD 6\n",
"FIBROTIC CHP 5\n",
"POSSIBLE HP 5\n",
"NSIP 5\n",
"FIBROTIC OP 4\n",
"UIP 3\n",
"PPFE 2\n",
"HP AND PPFE 2\n",
"smoking-related ILD 2\n",
"CHRONIC FIBROTIC HP 2\n",
"Unclassifiable ILD 2\n",
"PULMONARY FIBROSIS 2\n",
"HP 2\n",
"OP/NSIP 2\n",
"IPF/CHP 2\n",
"PREVIOUS HP 2\n",
"CHP 2\n",
"Possible Sarcoidosis 2\n",
"NSIP/HP 2\n",
"SUSPECTED ILD 2\n",
" ... \n",
"LIVER AND BONY METASTASES 1\n",
"pulmonary sarcoidosis 1\n",
"FIBROTIC CHP 1\n",
"Fibrotic NSIP 1\n",
"OtherNSIP/ COP/ Single Lung Transplant 1\n",
"Fibrotic NSIP/IPF 1\n",
"Mild subpleural fibrosis 1\n",
"SMOKING RELATED ILD/OP 1\n",
"Interstitial lung disease with auto-immune features 1\n",
"Overlap of CT-ILD and HP 1\n",
"IDIOPATHIC BRONCHIOLITIS 1\n",
"Wegner’s Granulomatosis 1\n",
"Chronic eosinophilic pneumonia 1\n",
"Fibrotic pulmonary sarcoidosis 1\n",
"AMYLODOSIS 1\n",
"LIMITED ILD 1\n",
"smoking-related interstitial lung disease 1\n",
"PPFE with associated ILD 1\n",
"IDIOPATHIC FIBROSING ALVEOLITIS 1\n",
"IPAf (with an NSIP or IPF 1\n",
"Pulmonary fibrosis following ARDS 1\n",
"Mild air trapping with lymphocytosis and small airways obstruction 1\n",
"UIP/ILD 1\n",
"Inflammatory ILD 1\n",
"Widespread sub-pleural ground glass opacity with sparing of apices 1\n",
"Smoking related ILD /FIBROTIC NSIP 1\n",
"HP WITH PPFE 1\n",
"SUSPECTED CHP 1\n",
"OP 1\n",
"SARCOIDOSIS/CHP 1\n",
"Name: dx from letters (HP, CHP, other (specify), Length: 211, dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['dx from letters (HP, CHP, other (specify)'].value_counts()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment