Source code for irisreader.coalignment.goes_data

#!/usr/bin/env python

import pandas as pd
import datetime as dt
import requests
import os
from scipy.interpolate import interp1d
import numpy as np
import matplotlib.pyplot as plt

import irisreader as ir
from irisreader.utils.notebooks import in_notebook 
# TODO: create a downloader utility function?


[docs]class goes_data:
    """
    This class represents an interface to GOES X-ray flux data. An instantiated
    object will download all GOES15 XRS data between the given dates and will
    store it in the specified data directory. Data that are already present
    will not be downloaded again. The stored data is made accessible as a
    pandas data frame which can then be plotted with `plot` or interpolated to
    other points in time with `interpolate`. Optionally, the data are only
    loaded upon first read access.
    
    Parameters
    ----------
    start_date : datetime.datetime
        Start date/time of the time window for which GOES data should be 
        downloaded.
    end_date : datetime.datetime
        End date/time of the time window for which GOES data should be 
        downloaded.
    data_dir : string
        Data directory in which the downloaded data will be stored.
    lazy_eval : boolean
        Whether or not data should only be loaded upon first read access.

    Attributes
    ----------    
    start_date : datetime.datetime
        Start date/time of the GOES X-ray flux data time window
    end_date : datetime.datetime
        End date/time of the GOES X-ray flux data time window
    data:
        Pandas data frame with GOES data.
    """
    
    def __init__( self, start_date, end_date, data_dir, lazy_eval=False ):
        self._data_dir = data_dir
        self.start_date = start_date
        self.end_date = end_date
        self._files = []
        self.data = None
        
        if not lazy_eval:
            self._load()
            
    def __repr__( self ):
        repr_str = "---------------- GOES XRS interface ------------------------------------\n"
        repr_str += "data:                        XRS data\n"
        repr_str += "plot():                      plot x-ray flux history\n"
        repr_str += "interpolate( timestamps ):   interpolate to IRIS timestamps\n"
        repr_str += "get_peak_flux():             get the peak flux in the observation\n"
        repr_str += "------------------------------------------------------------------------\n"
        return repr_str
        

    # catch attribute requests to perform lazy loading if necessary
    def __getattribute__( self, name ):
        if name=="data" and object.__getattribute__( self, "data" ) is None:
            self._load()
            return object.__getattribute__( self, "data" )
        else:
            return object.__getattribute__( self, name )
            
    # function to load data and download files if necessary
    def _load( self ):
        
        # create data directory if not present
        if not os.path.exists( self._data_dir ):
            try:
                os.mkdir( self._data_dir )
            except: # write to the local folder if no permission to create directory
                self._data_dir = "goes_data"
                os.mkdir( self._data_dir )
                
            
        # download files
        self._get_files( self.start_date, self.end_date )
        
        # parse files and concatenate data frames
        self.data = pd.DataFrame( [] )
        for file in self._files:
            self.data = self.data.append( self._parse_file( self._data_dir + "/" + file ) )
            
    # function to download all required goes data for a certain time span
    def _get_files( self, start_date, end_date ):
        for day in range((end_date-start_date).days + 3):
            current_date = start_date + dt.timedelta(days=day-1)
            date_str = current_date.strftime("%Y%m%d")
            target_file_name = "g15_xrs_2s_" + date_str + "_" + date_str + ".csv"
            target_url = ir.config.goes_base_url + "/" + str(current_date.year) + "/" + str(current_date.month).zfill(2) + "/goes15/csv/" + target_file_name
            self._download_file( target_url, target_file_name )
            self._files.append( target_file_name )

    # function to download a file if it is not yet present                     
    def _download_file( self, url, target_file_name ):
        
        # download data
        if not os.path.exists( self._data_dir + "/" + target_file_name ):
            print( "Downloading " + url )
            r = requests.get( url )
        
            if r.ok:
                with open( self._data_dir + "/" + target_file_name, "wb" ) as f:
                    f.write( r.content )
            else:
                raise Exception( "GOES: {} could not be downloaded (possibly change irisreader.config.goes_base_url)".format(url) )
        

    # function to parse goes csv data into a pandas data frame
    def _parse_file( self, file_path ):
        with open( file_path, "r" ) as f:
            # Skip lines until data: label is read
            for line in f:
                if "<html" in line:
                    os.remove( file_path )
                    raise Exception("GOES: Could not parse: {} is a html file (removed it)".format(file_path))
                if line.startswith("data:"):
                    break

            return pd.read_csv( f, sep=",", parse_dates=["time_tag"], index_col="time_tag" )


[docs]    def plot( self, restrict_to_obstime=False, **kwargs ):
        """
        This function plots the GOES X-ray flux around the given time period.
        
        Parameters
        ----------
        restrict_to_obstime : boolean
            If True, only the flux within the observation time period will be 
            plotted.
        """
        
        # prepare a data frame with zero values set to NaN (such that matplotlib does not plot them)
        plot_data = self.data.copy()
        plot_data.loc[:]['A_FLUX'][plot_data['A_FLUX']==0] = np.nan
        plot_data.loc[:]['B_FLUX'][plot_data['B_FLUX']==0] = np.nan
        
        # plot the fluxes
        ax = plot_data.plot( y=['A_FLUX', 'B_FLUX'], logy=True, title="GOES X-ray Flux", **kwargs )
        
        # restrict plot to observation time period if desired, otherwise draw
        # shaded region and dashed restriction lines where the observation
        # takes place
        if restrict_to_obstime:
            ax.set_xlim([self.start_date, self.end_date])
        else:
            ax.axvspan( self.start_date, self.end_date, alpha=0.05, color='red' )
            ax.axvline( x=self.start_date, color='red', linestyle='--', linewidth=1.0 )
            ax.axvline( x=self.end_date, color='red', linestyle='--', linewidth=1.0 )
        
        # draw magnitudes with dashed lines 
        ax.axhline( y=1e-4, color='black', linestyle='--', linewidth=1.0 )
        ax.axhline( y=1e-5, color='black', linestyle='--', linewidth=1.0 )
        ax.axhline( y=1e-6, color='black', linestyle='--', linewidth=1.0 )
        ax.axhline( y=1e-7, color='black', linestyle='--', linewidth=1.0 )
        ax.axhline( y=1e-8, color='black', linestyle='--', linewidth=1.0 )
        
        # set boundaries, labels and legend
        ax.set_ylim([1e-9, 1e-2])
        ax.set_ylabel(r'Watts / m$^2$')
        ax.set_xlabel("Universal Time")
        ax.legend( ['GOES 15 0.5-1 $\AA$', 'GOES 15 1-8 $\AA$'] )

        # set a magnitude scale on the right
        ax2 = ax.twinx()
        ax2.set_yscale( 'log' )
        ax2.set_ylim( ax.get_ylim() )
        ax2.set_yticks([3e-8, 3e-7, 3e-6, 3e-5, 3e-4])
        ax2.set_yticklabels(['A', 'B', 'C', 'M', 'X'])
        ax2.minorticks_off()
        ax2.tick_params( right=False )
        
        if not in_notebook():
            plt.show()
    
        
[docs]    def interpolate( self, iris_timestamps, field=['B_FLUX'] ):
        """
        This function takes timestamps from `iris_data_cube.get_timestamps()`
        (UNIX time - seconds since Thursday, 1 January 1970 00:00:00) and
        computes interpolated GOES flux data.
        
        Parameters
        ----------
        iris_timestamps : float
            Array with timestamps from `iris_data_cube.get_timestamps()` or 
            `combined_raster.get_timestamps()`.
        field : string
            'B_FLUX' (default) for the 1-8 Angstrom X-ray flux or 'A_FLUX' for
            the 0.5-1 Angstrom X-ray flux.
            
        Returns
        -------
        float
            List with interpolated GOES fluxes in W/m^2
        """
        
        goes_timestamps = np.array( (self.data.index-dt.datetime.utcfromtimestamp(0)).total_seconds() )
        f = interp1d( x=goes_timestamps, y=self.data[field].values.flatten(), kind="cubic" )
        return f( iris_timestamps )
    
[docs]    def get_peak_flux( self, flux='B_FLUX' ):
        """
        Returns the peak flux measured in the observation time period.
        
        Parameters
        ----------
        field : str
            'B_FLUX' (default) for the 1-8 Angstrom X-ray flux or 'A_FLUX' for
            the 0.5-1 Angstrom X-ray flux.
            
        Returns
        -------
        float
            Peak flux in W/m^2
        """
        fluxes = self.data[np.logical_and( self.data.index >= self.start_date, self.data.index <= self.end_date)][flux]
        if len( fluxes ) > 0:
            return np.nanmax( fluxes )
        else:
            return None


if __name__ == "__main__":
    start = dt.datetime(2014, 1, 26, 0, 13, 24, 610000)
    end = dt.datetime(2014, 1, 26, 1, 6, 2, 656000)
    
    g = goes_data( start, end, "/tmp/goes", lazy_eval=True )
    g.plot()
    print( g.get_peak_flux() )