Last active
June 23, 2022 00:23
-
-
Save davidwhogg/aa850fbcf69f435699752ba786a980d8 to your computer and use it in GitHub Desktop.
A notebook for the Gaia Hike
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "06320673", | |
"metadata": {}, | |
"source": [ | |
"# XP coefficients and APOGEE data\n", | |
"\n", | |
"## Authors:\n", | |
"- **Adrian Price-Whelan** (Flatiron)\n", | |
"- **David W. Hogg** (NYU) (Flatiron)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "60f627c4", | |
"metadata": {}, | |
"source": [ | |
"## Read in APOGEE data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "aa26193d", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T01:40:27.141892Z", | |
"start_time": "2022-06-15T01:40:27.135575Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pathlib\n", | |
"\n", | |
"from astropy.convolution import convolve, Gaussian2DKernel\n", | |
"import astropy.coordinates as coord\n", | |
"from astropy.stats import median_absolute_deviation as MAD\n", | |
"import astropy.table as at\n", | |
"import astropy.units as u\n", | |
"import matplotlib as mpl\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"import numpy as np\n", | |
"from scipy.stats import binned_statistic, binned_statistic_2d\n", | |
"\n", | |
"from pyia import GaiaData\n", | |
"import h5py\n", | |
"\n", | |
"from tqdm import tqdm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2eb39275", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T01:22:16.673443Z", | |
"start_time": "2022-06-15T01:22:07.797965Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"datadir = \"./\"\n", | |
"xm = at.Table.read(datadir + 'allStar-dr17-synspec-gaiadr3.fits')\n", | |
"xm2 = at.Table.read(datadir + 'allStar-dr17-synspec-gaiadr3-gaiasourcelite.fits')\n", | |
"xm2.rename_column('source_id', 'GAIADR3_SOURCE_ID')\n", | |
"allstar = at.Table.read(datadir + 'allStarLite-dr17-synspec_rev1.fits')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "37f1f66a", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:47:13.865546Z", | |
"start_time": "2022-06-15T17:46:54.953972Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"tbl = at.unique(at.hstack((allstar, xm)), keys='APOGEE_ID')\n", | |
"tbl = tbl[tbl['GAIADR3_SOURCE_ID'] != 0]\n", | |
"tbl = at.join(tbl, xm2, keys='GAIADR3_SOURCE_ID')\n", | |
"len(tbl)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0513a3c2", | |
"metadata": {}, | |
"source": [ | |
"## Read in the BP/RP Coefficients" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "07ef4639", | |
"metadata": {}, | |
"source": [ | |
"See: `DR3-xp-subset.ipynb` for getting the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "79306271", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:47:15.631613Z", | |
"start_time": "2022-06-15T17:47:15.629159Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"apogee_xp_cont_filename = pathlib.Path(datadir + 'apogee-dr17-xpcontinuous.hdf5')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "02677824", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:47:23.349365Z", | |
"start_time": "2022-06-15T17:47:16.202584Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# Read data and lightly rearrange\n", | |
"xp_tbl = at.Table()\n", | |
"with h5py.File(apogee_xp_cont_filename, 'r') as f:\n", | |
" xp_tbl['GAIADR3_SOURCE_ID'] = f['source_id'][:]\n", | |
" xp_tbl['bp'] = f['bp_coefficients'][:]\n", | |
" xp_tbl['rp'] = f['rp_coefficients'][:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f4f341ec", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:47:29.937161Z", | |
"start_time": "2022-06-15T17:47:23.350804Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# Read data and make simple cuts\n", | |
"xp_apogee_tbl = at.join(tbl, xp_tbl, keys='GAIADR3_SOURCE_ID')\n", | |
"xp_apogee_tbl = xp_apogee_tbl[\n", | |
" (xp_apogee_tbl['TEFF'] > 0) &\n", | |
" (xp_apogee_tbl['LOGG'] > -0.5) &\n", | |
" (xp_apogee_tbl['M_H'] > -3.)\n", | |
"]\n", | |
"len(xp_apogee_tbl)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "2657bb59", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:47:41.085389Z", | |
"start_time": "2022-06-15T17:47:40.921033Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# Make XP spectral coefficient ratios.\n", | |
"_bp = xp_apogee_tbl['bp'][:, 1:] / xp_apogee_tbl['bp'][:, 0:1]\n", | |
"_rp = xp_apogee_tbl['rp'][:, 1:] / xp_apogee_tbl['rp'][:, 0:1]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "bcb8fa9d", | |
"metadata": {}, | |
"source": [ | |
"## Make rectangular features and labels for any kind of plotting or ML...?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e96bb05e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# This does something useful!\n", | |
"xp_apogee_tbl = xp_apogee_tbl.filled()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6589c96e", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2022-06-15T17:48:04.069510Z", | |
"start_time": "2022-06-15T17:48:03.887065Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"feature_mask = (\n", | |
" (xp_apogee_tbl['J'] < 13) &\n", | |
" (xp_apogee_tbl['H'] < 12) &\n", | |
" (xp_apogee_tbl['K'] < 11))\n", | |
"\n", | |
"features = np.hstack((\n", | |
" (xp_apogee_tbl['bp'][:, 1:11] / xp_apogee_tbl['bp'][:, 0:1])[feature_mask],\n", | |
" (xp_apogee_tbl['rp'][:, 1:11] / xp_apogee_tbl['rp'][:, 0:1])[feature_mask],\n", | |
" (xp_apogee_tbl['phot_bp_mean_mag'] - xp_apogee_tbl['phot_rp_mean_mag'])[feature_mask, None],\n", | |
" (xp_apogee_tbl['J'] - xp_apogee_tbl['H'])[feature_mask, None],\n", | |
" (xp_apogee_tbl['H'] - xp_apogee_tbl['K'])[feature_mask, None]\n", | |
"))\n", | |
"\n", | |
"feature_names = np.concatenate((\n", | |
" [f'BP[{i}]' for i in range(1, 10+1)],\n", | |
" [f'RP[{i}]' for i in range(1, 10+1)],\n", | |
" ['BP-RP', 'J-H', 'H-K'],\n", | |
"))\n", | |
"\n", | |
"print(features.shape)\n", | |
"print(len(feature_names), feature_names)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c4482ab5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"labels = np.vstack((\n", | |
" xp_apogee_tbl['TEFF'].filled(np.nan),\n", | |
" xp_apogee_tbl['LOGG'].filled(np.nan),\n", | |
" xp_apogee_tbl['AK_WISE'].filled(np.nan),\n", | |
" xp_apogee_tbl['M_H'].filled(np.nan),\n", | |
" xp_apogee_tbl['ALPHA_M'].filled(np.nan),\n", | |
" #(xp_apogee_tbl['parallax'] * 10 ** (1/5 * xp_apogee_tbl['phot_g_mean_mag'])).filled(np.nan)\n", | |
")).T[feature_mask]\n", | |
"print(labels.shape)\n", | |
"\n", | |
"label_names = ['TEFF', 'LOGG', 'AK_WISE', 'M_H', 'ALPHA_M'] # , 'SCHM_G']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f73adfb4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# check that the data aren't wack\n", | |
"plt.scatter(features[:, 10], labels[:, 0], c=\"k\", s=1., alpha=0.05)\n", | |
"plt.xlabel(feature_names[10])\n", | |
"plt.ylabel(label_names[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "aa1d5240", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment