Last active
May 11, 2020 16:40
-
-
Save Kelvinrr/bd56682ccd9a64e29b2b690096d48943 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from datetime import datetime\n", | |
| "import glob\n", | |
| "import os\n", | |
| "import tarfile\n", | |
| "import json\n", | |
| "\n", | |
| "from pymongo import MongoClient\n", | |
| "import plio\n", | |
| "from plio.io import io_spectral_profiler\n", | |
| "\n", | |
| "import sqlalchemy\n", | |
| "from sqlalchemy.ext.declarative import declarative_base\n", | |
| "from sqlalchemy import Column, String, Integer\n", | |
| "from sqlalchemy.ext.hybrid import hybrid_property\n", | |
| "from sqlalchemy import create_engine, pool, orm\n", | |
| "from sqlalchemy.orm import create_session, scoped_session, sessionmaker\n", | |
| "from sqlalchemy_utils import database_exists, create_database\n", | |
| "\n", | |
| "from geoalchemy2 import Geometry\n", | |
| "from geoalchemy2.shape import from_shape, to_shape\n", | |
| "from sqlalchemy.schema import MetaData" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "Base = declarative_base(metadata = MetaData(schema=\"kaguyasp\"))\n", | |
| "\n", | |
| "class Points(Base):\n", | |
| " __tablename__ = 'points'\n", | |
| " \n", | |
| " # TODO: Add array for spectra\n", | |
| " id = Column(Integer, primary_key=True, autoincrement=True)\n", | |
| " sourcefile = Column(String)\n", | |
| " \n", | |
| " geom = Column(\"geom\", Geometry('POINT', dimension=2, srid=4326, spatial_index=True))\n", | |
| " \n", | |
| " \n", | |
| "db_uri = 'postgresql://{}:{}@{}:{}/{}'.format('postgres',\n", | |
| " '',\n", | |
| " '137.227.237.178',\n", | |
| " '32431',\n", | |
| " 'postgres')\n", | |
| "engine = sqlalchemy.create_engine(db_uri)\n", | |
| "Session = orm.sessionmaker(bind=engine, autocommit=False)\n", | |
| "\n", | |
| "Base.metadata.create_all(engine)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def unpack_sp():\n", | |
| " # TODO: Update for Postgres \n", | |
| " client = MongoClient('localhost',27017)\n", | |
| " db = client.spectral_profiler\n", | |
| " spot_data = db.spot_data\n", | |
| " image_data = db.image_data\n", | |
| "\n", | |
| " nfiles = 0\n", | |
| " process = True\n", | |
| "\n", | |
| " months = glob.glob('/work/projects/jaxa02/SP_Level2C/02/2008/*')\n", | |
| " for month in months:\n", | |
| " days = glob.glob(os.path.join(month, '*'))\n", | |
| " if process == False:\n", | |
| " break\n", | |
| "\n", | |
| " for day in days:\n", | |
| " if process == False:\n", | |
| " break\n", | |
| " files = glob.glob(os.path.join(day, '*'))\n", | |
| " spot_datas = []\n", | |
| " labels = []\n", | |
| " for j,f in enumerate(files):\n", | |
| " tfile = tarfile.open(f, 'r:')\n", | |
| " names = tfile.getnames()\n", | |
| " for n in names:\n", | |
| " if '.spc' in n:\n", | |
| " content = tfile.extractfile(n)\n", | |
| " break\n", | |
| " c = content.read()\n", | |
| " sp = io_spectral_profiler.Spectral_Profiler(c)\n", | |
| " for i in range(len(sp.spectra)):\n", | |
| " nfiles += 1\n", | |
| " data = {}\n", | |
| " data['file'] = f\n", | |
| " data['observation_id'] = i\n", | |
| " data['nspectra'] = sp.nspectra\n", | |
| " spectra = sp.spectra[i].to_json()\n", | |
| " for c in sp.spectra[i].columns:\n", | |
| " data[c] = sp.spectra[i][c].to_json()\n", | |
| " data['ancillarydata'] = sp.ancillary_data.iloc[i].to_json()\n", | |
| " spot_datas.append(data)\n", | |
| "\n", | |
| " sp.label['file'] = f\n", | |
| " labels.append(json.loads(json.dumps(sp.label, default = json_serial)))\n", | |
| "\n", | |
| " if nfiles > int(1e6):\n", | |
| " process = False\n", | |
| " break\n", | |
| "\n", | |
| " spot_data.insert_many(spot_datas)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment