Skip to content

Instantly share code, notes, and snippets.

@Kelvinrr
Last active May 11, 2020 16:40
Show Gist options
  • Select an option

  • Save Kelvinrr/bd56682ccd9a64e29b2b690096d48943 to your computer and use it in GitHub Desktop.

Select an option

Save Kelvinrr/bd56682ccd9a64e29b2b690096d48943 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{ "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import glob\n",
    "import os\n",
    "import tarfile\n",
    "import json\n",
    "\n",
    "from pymongo import MongoClient\n",
    "import plio\n",
    "from plio.io import io_spectral_profiler\n",
    "\n",
    "import sqlalchemy\n",
    "from sqlalchemy.ext.declarative import declarative_base\n",
    "from sqlalchemy import Column, String, Integer\n",
    "from sqlalchemy.ext.hybrid import hybrid_property\n",
    "from sqlalchemy import create_engine, pool, orm\n",
    "from sqlalchemy.orm import create_session, scoped_session, sessionmaker\n",
    "from sqlalchemy_utils import database_exists, create_database\n",
    "\n",
    "from geoalchemy2 import Geometry\n",
    "from geoalchemy2.shape import from_shape, to_shape\n",
    "from sqlalchemy.schema import MetaData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Base = declarative_base(metadata = MetaData(schema=\"kaguyasp\"))\n",
    "\n",
    "class Points(Base):\n",
    "    __tablename__ = 'points'\n",
    "    \n",
    "    # TODO: Add array for spectra\n",
    "    id = Column(Integer, primary_key=True, autoincrement=True)\n",
    "    sourcefile = Column(String)\n",
    "    \n",
    "    geom = Column(\"geom\", Geometry('POINT', dimension=2, srid=4326, spatial_index=True))\n",
    "    \n",
    "    \n",
    "db_uri = 'postgresql://{}:{}@{}:{}/{}'.format('postgres',\n",
    "                                              '',\n",
    "                                              '137.227.237.178',\n",
    "                                              '32431',\n",
    "                                              'postgres')\n",
    "engine = sqlalchemy.create_engine(db_uri)\n",
    "Session = orm.sessionmaker(bind=engine, autocommit=False)\n",
    "\n",
    "Base.metadata.create_all(engine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def unpack_sp():\n",
    "    # TODO: Update for Postgres \n",
    "    client = MongoClient('localhost',27017)\n",
    "    db = client.spectral_profiler\n",
    "    spot_data = db.spot_data\n",
    "    image_data = db.image_data\n",
    "\n",
    "    nfiles = 0\n",
    "    process = True\n",
    "\n",
    "    months = glob.glob('/work/projects/jaxa02/SP_Level2C/02/2008/*')\n",
    "    for month in months:\n",
    "        days = glob.glob(os.path.join(month, '*'))\n",
    "        if process == False:\n",
    "            break\n",
    "\n",
    "        for day in days:\n",
    "            if process == False:\n",
    "                break\n",
    "            files = glob.glob(os.path.join(day, '*'))\n",
    "            spot_datas = []\n",
    "            labels = []\n",
    "            for j,f in enumerate(files):\n",
    "                tfile = tarfile.open(f, 'r:')\n",
    "                names = tfile.getnames()\n",
    "                for n in names:\n",
    "                    if '.spc' in n:\n",
    "                        content = tfile.extractfile(n)\n",
    "                        break\n",
    "                c = content.read()\n",
    "                sp = io_spectral_profiler.Spectral_Profiler(c)\n",
    "                for i in range(len(sp.spectra)):\n",
    "                    nfiles += 1\n",
    "                    data = {}\n",
    "                    data['file'] = f\n",
    "                    data['observation_id'] = i\n",
    "                    data['nspectra'] = sp.nspectra\n",
    "                    spectra = sp.spectra[i].to_json()\n",
    "                    for c in sp.spectra[i].columns:\n",
    "                        data[c] = sp.spectra[i][c].to_json()\n",
    "                    data['ancillarydata'] = sp.ancillary_data.iloc[i].to_json()\n",
    "                    spot_datas.append(data)\n",
    "\n",
    "                sp.label['file'] = f\n",
    "                labels.append(json.loads(json.dumps(sp.label, default = json_serial)))\n",
    "\n",
    "                if nfiles > int(1e6):\n",
    "                    process = False\n",
    "                    break\n",
    "\n",
    "            spot_data.insert_many(spot_datas)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment