Created
          November 11, 2016 21:24 
        
      - 
      
- 
        Save restrepo/6047a1bc4da689fe22d653a76c1e16ed to your computer and use it in GitHub Desktop. 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Add DOI info to productos UdeA" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import publications as p\n", | |
| "from unidecode import unidecode" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import re\n", | |
| "import sys\n", | |
| "import numpy as np\n", | |
| "import time\n", | |
| "from publications import *\n", | |
| "from unidecode import unidecode\n", | |
| "import difflib\n", | |
| "def get_doi(\n", | |
| " surname=None,#'florez',\n", | |
| " title=r'Baryonic violation of R-parity from anomalous $U(1)_H$',\n", | |
| " other='',\n", | |
| " DOI=None,\n", | |
| " check_text=None,\n", | |
| " check_mesagges_key=None,\n", | |
| " JSON=False):\n", | |
| " import re\n", | |
| " import requests\n", | |
| " import time\n", | |
| " import sys\n", | |
| " '''\n", | |
| " Search for a DOI and check against the full DOI info. If JSON is set True, the full\n", | |
| " info is returned as a python dictionary\n", | |
| " \n", | |
| " Implementations:\n", | |
| " \n", | |
| " 1) Search and check for a DOI by surname and title or if only DOI is given, just check the DOI.\n", | |
| " \n", | |
| " The checking is doing by comparing check_text with the check_mesagges_key from the full info.\n", | |
| " By default the given 'title' is used for the check.\n", | |
| " \n", | |
| " For other possible check_mesagges_key's, see in a browser the several keys of the 'mesagges' \n", | |
| " dictionary at:\n", | |
| " https://api.crossref.org/v1/works/DOI, \n", | |
| " for example:\n", | |
| " https://api.crossref.org/v1/works/10.1103/physrevd.87.095010\n", | |
| " \n", | |
| " \n", | |
| " 2) if only DOI is given, just get full info aboit the DOI\n", | |
| " \n", | |
| " Examples:\n", | |
| " 2) get_doi(surname='Florez',title='Baryonic violation of R-parity from anomalous U(1)H',JSON=True)\n", | |
| " 3) get_doi(DOI)\n", | |
| " '''\n", | |
| " similarity=0.6\n", | |
| " if not check_text:\n", | |
| " check_text=title.lower()\n", | |
| " if not check_mesagges_key:\n", | |
| " check_mesagges_key='title' #'container-title'\n", | |
| " if not surname and DOI:\n", | |
| " if not check_text and not check_mesagges_key:\n", | |
| " similarity=0.1\n", | |
| " check_text='http://dx.doi.org/'\n", | |
| " check_mesagges_key='DOI'\n", | |
| " JSON=True\n", | |
| " \n", | |
| " doi=''\n", | |
| " if JSON:\n", | |
| " doi={}\n", | |
| " if not DOI:\n", | |
| " search=''\n", | |
| " if surname:\n", | |
| " search=surname\n", | |
| " if title:\n", | |
| " if len(search)>0:\n", | |
| " search=search+', '+title\n", | |
| " if other:\n", | |
| " if len(search)>0:\n", | |
| " search=search+', '+other\n", | |
| "\n", | |
| " r=requests.get('http://search.crossref.org/?q=%s' %search)\n", | |
| " urldoi='http://dx.doi.org/'\n", | |
| "\n", | |
| " DOI=''\n", | |
| " if len(r.text.split(urldoi))>1:\n", | |
| " DOI=r.text.split(urldoi)[1].split(\"\\',\")[0].split('>\\n')[0]\n", | |
| " else:\n", | |
| " DOI=''\n", | |
| " \n", | |
| " \n", | |
| " if DOI:\n", | |
| " json='https://api.crossref.org/v1/works/'\n", | |
| " rr=requests.get( json+DOI )\n", | |
| " if rr.status_code==200:\n", | |
| " if 'message' in rr.json():\n", | |
| " if check_mesagges_key in rr.json()['message']:\n", | |
| " chk=get_close_matches_Levenshtein(check_text,\\\n", | |
| " rr.json()[\"message\"][check_mesagges_key][0].lower(),\\\n", | |
| " n=1,cutoff=similarity)\n", | |
| " if chk:\n", | |
| " if \"DOI\" in rr.json()[\"message\"]:\n", | |
| " doi=rr.json()[\"message\"][\"DOI\"]\n", | |
| " if JSON:\n", | |
| " doi=rr.json()[\"message\"]\n", | |
| " \n", | |
| " \n", | |
| " time.sleep(1)\n", | |
| " return doi\n", | |
| "def get_close_matches_Levenshtein(word, possibilities,n=3,cutoff=0.6,full=False):\n", | |
| " '''Replaces difflib.get_close_matches with faster algortihm based on\n", | |
| " Levenshtein.ratio.\n", | |
| " HINT: Similarity increase significatively after lower() and unidecode()\n", | |
| " '''\n", | |
| " import pandas as pd\n", | |
| " import Levenshtein\n", | |
| " if type(possibilities)==str:\n", | |
| " possibilities=[possibilities]\n", | |
| " rl=['']\n", | |
| " rs=pd.DataFrame()\n", | |
| " MATCH=False\n", | |
| " for p in possibilities:\n", | |
| " similarity=Levenshtein.ratio(word,p)\n", | |
| " if similarity>=cutoff:\n", | |
| " MATCH=True\n", | |
| " rs=rs.append({'similarity':similarity,'match':p},ignore_index=True)\n", | |
| "\n", | |
| " if MATCH:\n", | |
| " rs=rs.sort_values('similarity',ascending=False).reset_index(drop=True)\n", | |
| " if full:\n", | |
| " return list(rs['match'][:n].values),list(rs['similarity'][:n].values)\n", | |
| " else:\n", | |
| " return list(rs['match'][:n].values)\n", | |
| " else:\n", | |
| " if full:\n", | |
| " return ([],0)\n", | |
| " else:\n", | |
| " return []\n", | |
| " \n", | |
| "def check_hash(df,hashseries,in_hash,min_match=10):\n", | |
| " ''' hashseries obtained from dataframe df, e.g\n", | |
| " hashseris=df.some_column.str.replace('\\W+','').str.lower().map(unicode)\n", | |
| " within which in_hash will be searched for match at least min_match characters \n", | |
| " '''\n", | |
| " comparision=True\n", | |
| " for si in reversed(range(0,len(in_hash)+1)):\n", | |
| " chk=df[hashseries.str.match(in_hash[:si])]\n", | |
| " if chk.shape[0]>0:\n", | |
| " return comparision,chk\n", | |
| " break \n", | |
| " if si<min_match:\n", | |
| " comparision=False\n", | |
| " return comparision,pd.DataFrame()\n", | |
| " \n", | |
| " \n", | |
| "def columns_add_prefix(df,prefix):\n", | |
| " return df.rename_axis( dict( (key,prefix+'_'+key) for key in df.columns.values) , axis=1)\n", | |
| "\n", | |
| "def fill_NaN(df):\n", | |
| " '''Fill NaN entries with proper empty values\n", | |
| " Type : dtype: Fill with\n", | |
| " string: \"0\" : ''\n", | |
| " float : \"float64\" \n", | |
| " '''\n", | |
| " for key in df.columns:\n", | |
| " if df[key].dtype=='O':\n", | |
| " df[key]=df[key].str.strip()\n", | |
| " df[key]=df[key].fillna('')\n", | |
| " elif df[key].dtype=='float64':\n", | |
| " df[key]=df[key].fillna(0.0)\n", | |
| " return df\n", | |
| "def read_excel_fill_NaN(*args, **kwargs):\n", | |
| " '''Fill NaN entries with proper empty values\n", | |
| " Type : dtype: Fill with\n", | |
| " string: \"0\" : ''\n", | |
| " float : \"float64\" \n", | |
| " '''\n", | |
| " df=pd.read_excel(*args, **kwargs)\n", | |
| " df=fill_NaN(df)\n", | |
| " return df\n", | |
| " \n", | |
| "#To add to main publications object:\n", | |
| "def add_sjr_info_from_issn(self,SJR,column_issn='SN',SJR_column_journal='SJR_Title',SJR_column_issn='SJR_Issn'):\n", | |
| " '''self is an publication object and SJR is the info for a journal in column SJR_Issn'''\n", | |
| " if not SJR_column_journal in self.articles.columns:\n", | |
| " sys.exit(\"Run first the the more exact and fast add_sjr_info\")\n", | |
| " \n", | |
| " self.articles=fill_NaN(self.articles)\n", | |
| " kk=self.articles[self.articles[SJR_column_journal]=='']\n", | |
| " for issn in kk[column_issn].str.replace('-','').unique():\n", | |
| " mtch=SJR[SJR[SJR_column_issn].str.contains(issn)].reset_index(drop=True)\n", | |
| " if mtch.shape[0]==1:\n", | |
| " moa=kk[ kk[column_issn].str.replace('-','')==issn ]\n", | |
| " if moa.shape[0]>=1:\n", | |
| " #DEBUG: more filters if \n", | |
| " for key in SJR.columns.values:\n", | |
| " self.articles.loc[moa.index.values,key]=mtch.ix[0][key]\n", | |
| " \n", | |
| " return self\n", | |
| "\n", | |
| "def add_sjr_info_from_journal(self,SJR,column_journal='SO',SJR_column_journal='SJR_Title'):\n", | |
| " '''self is an publication object and SJR is the info for a journal in column SJR_Issn'''\n", | |
| " if not SJR_column_journal in self.articles.columns:\n", | |
| " sys.exit(\"Run first the more exact and fast add_sjr_info\")\n", | |
| " \n", | |
| " self.articles=fill_NaN(self.articles)\n", | |
| " kk=self.articles[self.articles[SJR_column_journal]=='']\n", | |
| " #kk_hash_SO=kk[column_journal].str.replace('\\W+','').str.lower().str.strip().map(unidecode)\n", | |
| " SJR_hash_Title=SJR[SJR_column_journal].str.replace('\\W+','').str.lower().str.strip().map(unidecode)\n", | |
| " for title in kk[column_journal].str.lower().str.strip().unique():\n", | |
| " hash_match,mtch=check_hash(SJR,SJR_hash_Title,re.sub('\\W+','',title).lower().strip() )\n", | |
| " if hash_match:\n", | |
| " mtch=mtch.reset_index(drop=True)\n", | |
| " if mtch.shape[0]>1:\n", | |
| " newtitle=re.sub('\\W+',' ',title)\n", | |
| " mtch=SJR[SJR[SJR_column_journal].str.lower().str.strip().str.match('%s ' %newtitle)]\n", | |
| " if mtch.shape[0]:\n", | |
| " mtch=mtch.reset_index(drop=True)\n", | |
| " \n", | |
| " if mtch.shape[0]==1:\n", | |
| " moa=kk[ kk[column_journal].str.lower().str.strip()==title ]\n", | |
| " if moa.shape[0]>=1:\n", | |
| " for key in SJR.columns.values:\n", | |
| " self.articles.loc[moa.index.values,key]=mtch.ix[0][key]\n", | |
| " return self\n", | |
| "\n", | |
| "def add_sjr_info(self,SJR,column_journal='SO',SJR_column_journal='SJR_Title'):\n", | |
| " '''self is an publication object and SJR is the info for a journal in column SJR_Title'''\n", | |
| " self.articles=self.articles.reset_index(drop=True)\n", | |
| " for joa in np.intersect1d( self.articles[column_journal].str.lower().str.strip().unique(),\\\n", | |
| " SJR[SJR_column_journal].str.lower().str.strip().unique() ):\n", | |
| " moa=self.articles[ self.articles[column_journal].str.lower() == joa ]\n", | |
| " if moa.shape[0]:\n", | |
| " mtch=SJR[SJR[SJR_column_journal].str.lower().str.strip()==joa].reset_index(drop=True)\n", | |
| " if mtch.shape[0]==1:\n", | |
| " #DEBUG: filter by ISSN if >1:\n", | |
| " for key in SJR.columns.values:\n", | |
| " self.articles.loc[moa.index.values,key]=mtch.ix[0][key]\n", | |
| " \n", | |
| " return self\n", | |
| " \n", | |
| "def merge_with_close_matches(left,right,left_on,right_on,left_extra_on,right_extra_on,how='inner',\\\n", | |
| " n=1,cutoff=0.6,full=True):\n", | |
| " '''For each entry of the column: left_on of DataFrame left (cannot have empty fields), \n", | |
| " try to find the close match inside each row of right DataFrame, by comparing with \n", | |
| " the right_on entry of the row. When a row match is found, the full right row is appended \n", | |
| " to the matched row in the left DataFrame. \n", | |
| " If the similarity between the entries at left_on and right_on is less than 0.8, \n", | |
| " an additional check is performed between the entries left_extra_on and right_extra_on\n", | |
| " of the matched row.\n", | |
| " \n", | |
| " how implemented: inner and left (Default: left)\n", | |
| " '''\n", | |
| " from unidecode import unidecode\n", | |
| " import pandas as pd\n", | |
| " words=left[left_on].str.lower().map(unidecode)\n", | |
| " possibilities=right[right_on].str.lower().map(unidecode)\n", | |
| " \n", | |
| " joined=pd.DataFrame()\n", | |
| " for i in left.index:\n", | |
| " if i%100==0:\n", | |
| " print('.', end=\"\")\n", | |
| " joined_series=left.loc[i]\n", | |
| " #joined_series=joined_series.append(pd.Series( {similarity_column:0} ))\n", | |
| " title,similarity=get_close_matches_Levenshtein(words[i],possibilities,n=n,cutoff=cutoff,full=full)\n", | |
| " if title:\n", | |
| " mtch=right[possibilities==title[0]]\n", | |
| " chk_cutoff=similarity[0]\n", | |
| " crosscheck=0.8\n", | |
| " if chk_cutoff<crosscheck:\n", | |
| " if get_close_matches_Levenshtein( unidecode(left[left_extra_on][i].lower()),\\\n", | |
| " [unidecode(mtch[right_extra_on][mtch.index[0]].lower())] ):\n", | |
| " chk_cutoff=crosscheck+0.1\n", | |
| " if chk_cutoff>=crosscheck:\n", | |
| " joined_series=joined_series.append(mtch.loc[mtch.index[0]])\n", | |
| " #joined_series[similarity_column]=similarity[0]\n", | |
| " if how=='inner':\n", | |
| " joined=joined.append(joined_series,ignore_index=True)\n", | |
| " if how=='left':\n", | |
| " joined=joined.append(joined_series,ignore_index=True)\n", | |
| " return joined" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import io\n", | |
| "import requests\n", | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "#key='125rkGaqMdlUotxZSVTNHmZ3DCh41jgTwaQ127z8_gGs'\n", | |
| "key='1jTuRWgzwCbFBMzK_8qiTDWx_O66Qw3BaYT_5e5-V3IY'\n", | |
| "#key='1OaTFwLu6KmJGPYdoKBpLJuXIO2eROOMl-hUSGHIdRAA'\n", | |
| "url='https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv' %key\n", | |
| "r=requests.get(url)\n", | |
| "if r.status_code==200:\n", | |
| " csv_file=io.StringIO(r.text) # or directly with: urllib.request.urlopen(url)\n", | |
| " udea=pd.read_csv( csv_file,keep_default_na=False)\n", | |
| " udea=udea.rename_axis({'material ':'material'},axis='columns')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 331, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(51346, 11)" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=udea.fillna('')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(11598, 11)" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea_rp=udea[udea['nombre revista o premio']!='']\n", | |
| "udea_rp=udea_rp.drop_duplicates(subset=['título']).reset_index(drop=True)\n", | |
| "udea_rp.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(11598, 11)" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u=udea_rp.reset_index(drop=True).fillna('')\n", | |
| "u.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u['simple_title']=u.título.str.lower().map(unidecode).str.split('(').str[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>nombre</th>\n", | |
| " <th>material</th>\n", | |
| " <th>nro autores</th>\n", | |
| " <th>nombre revista o premio</th>\n", | |
| " <th>año realiz</th>\n", | |
| " <th>título</th>\n", | |
| " <th>país</th>\n", | |
| " <th>idioma</th>\n", | |
| " <th>item adic</th>\n", | |
| " <th>valor item</th>\n", | |
| " <th>simple_title</th>\n", | |
| " <th>apellido</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>QUINTERO RIAZA VICTOR MANUEL</td>\n", | |
| " <td>Articulo en revista Tipo A2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>REVISTA COLOMBIANA DE CIRUGIA</td>\n", | |
| " <td>2015</td>\n", | |
| " <td>MANEJO NO QUIRURGICO DE PACIENTES SINTOMATICOS CON CAPSULA ENDOSCOPICA RETENIDA EN ENFERMEDAD DE CROHN.</td>\n", | |
| " <td>46</td>\n", | |
| " <td>ESPA?</td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>manejo no quirurgico de pacientes sintomaticos con capsula endoscopica retenida en enfermedad de crohn.</td>\n", | |
| " <td>quintero</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " nombre material nro autores \\\n", | |
| "0 QUINTERO RIAZA VICTOR MANUEL Articulo en revista Tipo A2 3 \n", | |
| "\n", | |
| " nombre revista o premio año realiz \\\n", | |
| "0 REVISTA COLOMBIANA DE CIRUGIA 2015 \n", | |
| "\n", | |
| " título \\\n", | |
| "0 MANEJO NO QUIRURGICO DE PACIENTES SINTOMATICOS CON CAPSULA ENDOSCOPICA RETENIDA EN ENFERMEDAD DE CROHN. \n", | |
| "\n", | |
| " país idioma item adic valor item \\\n", | |
| "0 46 ESPA? \n", | |
| "\n", | |
| " simple_title \\\n", | |
| "0 manejo no quirurgico de pacientes sintomaticos con capsula endoscopica retenida en enfermedad de crohn. \n", | |
| "\n", | |
| " apellido \n", | |
| "0 quintero " | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u['apellido']=u.nombre.str.lower().map(unidecode).str.split(' ').str[0]\n", | |
| "u[:1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Obtain DOI comparing titles" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(51425, 11)" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=udea.fillna('')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(11598, 11)" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea_rp=udea[udea['nombre revista o premio']!='']\n", | |
| "udea_rp=udea_rp.drop_duplicates(subset=['título']).reset_index(drop=True)\n", | |
| "udea_rp.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea_old_doi=udea[udea.doi!=''].reset_index(drop=True)\n", | |
| "udea_not_doi=udea[udea.doi==''].reset_index(drop=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "((3053, 11), (48372, 11))" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea_old_doi.shape,udea_not_doi.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "WOS_SCP=pd.read_excel('WOS_SCP.xlsx')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "WOS_SCP=fill_NaN(WOS_SCP)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(1589, 11)" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uu=udea_rp[udea_rp.doi==''].reset_index(drop=True)\n", | |
| "udea_rp[udea_rp.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(10009, 11)" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uu.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "uu['simple_title']=uu.título.str.lower().map(unidecode).str.split('(').str[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(4131, 96)" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "WOS_SCP[WOS_SCP.DI!=''].shape\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>nombre</th>\n", | |
| " <th>material</th>\n", | |
| " <th>nro autores</th>\n", | |
| " <th>nombre revista o premio</th>\n", | |
| " <th>año realiz</th>\n", | |
| " <th>título</th>\n", | |
| " <th>país</th>\n", | |
| " <th>idioma</th>\n", | |
| " <th>item adic</th>\n", | |
| " <th>valor item</th>\n", | |
| " <th>doi</th>\n", | |
| " <th>simple_title</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>QUINTERO RIAZA VICTOR MANUEL</td>\n", | |
| " <td>Articulo en revista Tipo A2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>REVISTA COLOMBIANA DE CIRUGIA</td>\n", | |
| " <td>2015</td>\n", | |
| " <td>MANEJO NO QUIRURGICO DE PACIENTES SINTOMATICOS CON CAPSULA ENDOSCOPICA RETENIDA EN ENFERMEDAD DE CROHN.</td>\n", | |
| " <td>46</td>\n", | |
| " <td>ESPA?</td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>manejo no quirurgico de pacientes sintomaticos con capsula endoscopica retenida en enfermedad de crohn.</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " nombre material nro autores \\\n", | |
| "0 QUINTERO RIAZA VICTOR MANUEL Articulo en revista Tipo A2 3 \n", | |
| "\n", | |
| " nombre revista o premio año realiz \\\n", | |
| "0 REVISTA COLOMBIANA DE CIRUGIA 2015 \n", | |
| "\n", | |
| " título \\\n", | |
| "0 MANEJO NO QUIRURGICO DE PACIENTES SINTOMATICOS CON CAPSULA ENDOSCOPICA RETENIDA EN ENFERMEDAD DE CROHN. \n", | |
| "\n", | |
| " país idioma item adic valor item doi \\\n", | |
| "0 46 ESPA? \n", | |
| "\n", | |
| " simple_title \n", | |
| "0 manejo no quirurgico de pacientes sintomaticos con capsula endoscopica retenida en enfermedad de crohn. " | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uu[:1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "....................................................................................................." | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "uuu=merge_with_close_matches(uu,WOS_SCP[WOS_SCP.DI!=''][['TI','DI','SO']],\\\n", | |
| " left_on='título',right_on='TI',left_extra_on='nombre revista o premio',\\\n", | |
| " right_extra_on='SO',how='left')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(1944, 15)" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uuu=uuu.fillna('')\n", | |
| "uuu[uuu.DI!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'10.3305/nh.2015.32.6.9859'" | |
| ] | |
| }, | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uuu[uuu.DI!=''].loc[uuu[uuu.DI!=''].index.values[0],'DI']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for i in uuu[uuu.DI!=''].index:\n", | |
| " for j in udea[udea.título==uuu.loc[i,'título']].index:\n", | |
| " udea.loc[j,'doi']=uuu.loc[i,'DI']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(6253, 11)" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea[udea.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 329, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea.to_excel('producción_reconocida_2002_2016_doi.xlsx',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 330, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>nombre</th>\n", | |
| " <th>material</th>\n", | |
| " <th>nro autores</th>\n", | |
| " <th>nombre revista o premio</th>\n", | |
| " <th>año realiz</th>\n", | |
| " <th>título</th>\n", | |
| " <th>país</th>\n", | |
| " <th>idioma</th>\n", | |
| " <th>item adic</th>\n", | |
| " <th>valor item</th>\n", | |
| " <th>doi</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>URREA DUQUE JUAN PABLO</td>\n", | |
| " <td>Ponencia en extenso en evento internacional</td>\n", | |
| " <td>2</td>\n", | |
| " <td></td>\n", | |
| " <td>2015</td>\n", | |
| " <td>STATISTICAL PERFORMANCE EVALUATION OF P2P VIDEO STREAMING ON MULTI-HOP WIRELESS NETWORKS.</td>\n", | |
| " <td>46</td>\n", | |
| " <td>INGLE</td>\n", | |
| " <td>EDITORIAL</td>\n", | |
| " <td>IEEE</td>\n", | |
| " <td></td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " nombre material \\\n", | |
| "0 URREA DUQUE JUAN PABLO Ponencia en extenso en evento internacional \n", | |
| "\n", | |
| " nro autores nombre revista o premio año realiz \\\n", | |
| "0 2 2015 \n", | |
| "\n", | |
| " título \\\n", | |
| "0 STATISTICAL PERFORMANCE EVALUATION OF P2P VIDEO STREAMING ON MULTI-HOP WIRELESS NETWORKS. \n", | |
| "\n", | |
| " país idioma item adic valor item doi \n", | |
| "0 46 INGLE EDITORIAL IEEE " | |
| ] | |
| }, | |
| "execution_count": 330, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea[:1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Obtain DOI comparing simple titles" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')\n", | |
| "if 'doi' not in udea:\n", | |
| " udea['doi']=''\n", | |
| "udea=udea.fillna('')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def obtain_doi_from_title(udea,WOS_SCP,\\\n", | |
| " title_left_on='simple_title',\\\n", | |
| " title_right_on='TI'):\n", | |
| " udea_rp=udea[udea['nombre revista o premio']!='']\n", | |
| " udea_rp=udea_rp.drop_duplicates(subset=['título']).reset_index(drop=True)\n", | |
| " \n", | |
| " #udea_old_doi=udea[udea.doi!=''].reset_index(drop=True)\n", | |
| " #udea_not_doi=udea[udea.doi==''].reset_index(drop=True)\n", | |
| "\n", | |
| " WOS_SCP=pd.read_excel('WOS_SCP.xlsx')\n", | |
| " WOS_SCP=fill_NaN(WOS_SCP)\n", | |
| "\n", | |
| " uu=udea_rp[udea_rp.doi==''].reset_index(drop=True)\n", | |
| " if title_left_on=='simple_title':\n", | |
| " uu['simple_title']=uu.título.str.lower().map(unidecode).str.split('(').str[0]\n", | |
| "\n", | |
| " uuu=merge_with_close_matches(uu,WOS_SCP[WOS_SCP.DI!=''][[title_right_on,'DI','SO']],\\\n", | |
| " left_on=title_left_on,right_on=title_right_on,left_extra_on='nombre revista o premio',\\\n", | |
| " right_extra_on='SO',how='left')\n", | |
| "\n", | |
| " uuu=uuu.fillna('')\n", | |
| " if 'DI' in uuu:\n", | |
| " uuu=uuu[uuu.DI!='']\n", | |
| " for i in uuu.index:\n", | |
| " for j in udea[udea.título==uuu.loc[i,'título']].index:\n", | |
| " udea.loc[j,'doi']=uuu.loc[i,'DI']\n", | |
| "\n", | |
| "\n", | |
| " return udea" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(0, 12)" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea[udea.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')\n", | |
| "udea=udea.fillna('')\n", | |
| "udea=obtain_doi_from_title(udea,WOS_SCP,\\\n", | |
| " title_left_on='título',\\\n", | |
| " title_right_on='TI')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea.to_excel('producción_reconocida_2002_2016_doi.xlsx',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')\n", | |
| "udea=udea.fillna('')\n", | |
| "udea=obtain_doi_from_title(udea,WOS_SCP,\\\n", | |
| " title_left_on='simple_title',\\\n", | |
| " title_right_on='TI')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea.to_excel('producción_reconocida_2002_2016_doi.xlsx',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "..............." | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')\n", | |
| "udea=udea.fillna('')\n", | |
| "udea=obtain_doi_from_title(udea,WOS_SCP,\\\n", | |
| " title_left_on='título',\\\n", | |
| " title_right_on='SCP_Title')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea.to_excel('producción_reconocida_2002_2016_doi.xlsx',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "................................................................................." | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "udea=pd.read_excel('producción_reconocida_2002_2016_doi.xlsx')\n", | |
| "udea=udea.fillna('')\n", | |
| "udea=obtain_doi_from_title(udea,WOS_SCP,\\\n", | |
| " title_left_on='simple_title',\\\n", | |
| " title_right_on='SCP_Title')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(6299, 11)" | |
| ] | |
| }, | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "udea[udea.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea.to_excel('producción_reconocida_2002_2016_doi.xlsx',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "udea[udea.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Find DOIS online from crossref API \n", | |
| "Move to the end" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 311, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u=u[u.doi=='']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 312, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(10010, 13)" | |
| ] | |
| }, | |
| "execution_count": 312, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u.shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u['simple_title']=u.título.str.lower().map(unidecode).str.split('(').str[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "u['apellido']=u.nombre.str.lower().map(unidecode).str.split(' ').str[0]\n", | |
| "u[:1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "." | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import subprocess\n", | |
| "#u['doi']=''\n", | |
| "TITLE='simple_title'\n", | |
| "#TITLE='título'\n", | |
| "u['title']=u[TITLE].str.lower().map(unidecode)\n", | |
| "\n", | |
| "for i in u.index:\n", | |
| " if i%100==0:\n", | |
| " print('.',end='')\n", | |
| " kk=subprocess.getstatusoutput('echo %d > doi.log' %i)\n", | |
| " #f=open('doi.log','w')\n", | |
| " #f.write('%d' %i)\n", | |
| " #f.close()\n", | |
| " u.loc[i,'doi']=get_doi(u.apellido[i],u['title'][i])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 313, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(0, 13)" | |
| ] | |
| }, | |
| "execution_count": 313, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "u[u.doi!=''].shape" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 236, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "uu=u[u.doi!=''][['título','doi']].reset_index(drop=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 237, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>título</th>\n", | |
| " <th>doi</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>OPTIMIZACION DEL TRATAMIENTO DE AGUAS RESIDUALES DE CULTIVOS DE FLORES USANDO HUMEDALES CONTRUIDOS DE FLUJO SUBSUPERFICIAL HORIZONTAL. (OPTIMIZATION OF WASTEWATER TREATMENT FROM A FLOWER CROPS USI...</td>\n", | |
| " <td>10.17533/udea.rfnsp.v34n1a03</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " título \\\n", | |
| "0 OPTIMIZACION DEL TRATAMIENTO DE AGUAS RESIDUALES DE CULTIVOS DE FLORES USANDO HUMEDALES CONTRUIDOS DE FLUJO SUBSUPERFICIAL HORIZONTAL. (OPTIMIZATION OF WASTEWATER TREATMENT FROM A FLOWER CROPS USI... \n", | |
| "\n", | |
| " doi \n", | |
| "0 10.17533/udea.rfnsp.v34n1a03 " | |
| ] | |
| }, | |
| "execution_count": 237, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "uu[:1]" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment