dfulu · November 28, 2019 16:02 · dfulu · Nov 28, 2019
diff --git a/load_noaa.py b/load_noaa.py
 import pandas as pd

 def load_noaa(filepath, skiprows=1, skipfooter=4, na_values=-999):
    """Loads a NOAA style downloaded dataset into a tidy pandas dataframe
    
    Parameters
    ----------
    filepath : str
        Filepath of dataset
    skiprows : int, optional
        Number of rows of non-data at the top of the file. (the default is 1, 
        which works for MEIv2)
    skipfooter : int, optional
        Number of rows of non-data at the bottom of the file. (the default is 4, 
        which works for MEIv2)
    na_values : float, optional
        Missing value number (the default is -999, which works for MEIv2)

    Returns
    -------
    pandas.DataFrame
    
    """
    df = pd.read_csv(filepath, sep='\s+', 
                     skiprows=skiprows, skipfooter=skipfooter,  
                     header=None, na_values=na_values)
    df = pd.melt(df, id_vars=[0])
    
    df['date'] = pd.to_datetime(
        df.apply(
            lambda row: '{:04d}/{:02d}/01'.format(int(row.iloc[0]), int(row.iloc[1])), 
          axis=1)
    )
    df.loc[:, [ 'date', 'value']].set_index('date').sort_values('date')
    return df

 if __name__=='__main__':
  filepath = "meiv2.data"
  df = load_noaa(filepath, skiprows=1, skipfooter=4)
  df.head()
	import pandas as pd

	def load_noaa(filepath, skiprows=1, skipfooter=4, na_values=-999):
	"""Loads a NOAA style downloaded dataset into a tidy pandas dataframe

	Parameters
	----------
	filepath : str
	Filepath of dataset
	skiprows : int, optional
	Number of rows of non-data at the top of the file. (the default is 1,
	which works for MEIv2)
	skipfooter : int, optional
	Number of rows of non-data at the bottom of the file. (the default is 4,
	which works for MEIv2)
	na_values : float, optional
	Missing value number (the default is -999, which works for MEIv2)

	Returns
	-------
	pandas.DataFrame

	"""
	df = pd.read_csv(filepath, sep='\s+',
	skiprows=skiprows, skipfooter=skipfooter,
	header=None, na_values=na_values)
	df = pd.melt(df, id_vars=[0])

	df['date'] = pd.to_datetime(
	df.apply(
	lambda row: '{:04d}/{:02d}/01'.format(int(row.iloc[0]), int(row.iloc[1])),
	axis=1)
	)
	df.loc[:, [ 'date', 'value']].set_index('date').sort_values('date')
	return df

	if __name__=='__main__':
	filepath = "meiv2.data"
	df = load_noaa(filepath, skiprows=1, skipfooter=4)
	df.head()