Source code for seasenselib.readers.csv_reader

"""
Module for reading CTD data from CSV files into xarray Datasets.
"""

from __future__ import annotations
from collections import defaultdict
from datetime import datetime
import csv
import xarray as xr
import seasenselib.parameters as params
from .base import AbstractReader



[docs]
class CsvReader(AbstractReader):
    """ Reads CTD data from a CSV file into a xarray Dataset.

    This class reads CTD data from a CSV file, processes the data into a dictionary of columns,
    and organizes it into an xarray Dataset. It handles the conversion of timestamps to 
    datetime objects and assigns metadata according to CF conventions.

    Attributes
    ----------
    data : xr.Dataset
        The xarray Dataset containing the sensor data.
    input_file : str
        The path to the input CSV file containing the CTD data.
    mapping : dict, optional
        A dictionary mapping names used in the input file to standard names.

    Methods
    -------
    __init__(input_file: str, mapping: dict | None = None)
        Initializes the CsvReader with the input file and optional mapping.
    _load_data()
        Reads the CSV file and processes the data into an xarray Dataset.
    
    Properties
    ----------
    data : xr.Dataset (read-only)
        Returns the xarray Dataset containing the sensor data.
        For backward compatibility, get_data() method is also available but deprecated.
    
    get_file_type()
        Returns the type of the file being read, which is 'CSV'.
    get_file_extension()
        Returns the file extension for this reader, which is '.csv'.
    """


[docs]
    def __init__(self, input_file: str,
                 mapping: dict | None = None,
                 **kwargs):
        """Initialize CsvReader.
        
        Parameters
        ----------
        input_file : str
            Path to the CSV file.
        mapping : dict, optional
            Variable name mapping dictionary.
        **kwargs
            Additional base class parameters:
            
            - input_header_file : str | None
                Path to separate header file (if applicable).
            - perform_default_postprocessing : bool, default=True
                Whether to perform default post-processing.
            - rename_variables : bool, default=True
                Whether to rename variables to standard names.
            - assign_metadata : bool, default=True
                Whether to assign CF-compliant metadata.
            - sort_variables : bool, default=True
                Whether to sort variables alphabetically.
        """
        super().__init__(input_file, mapping, **kwargs)
        self._validate_file()


    @classmethod
    def _get_valid_extensions(cls) -> tuple[str, ...]:
        """Return valid file extensions for CSV files."""
        return ('.csv', '.txt', '.dat')

    @classmethod
    def _is_extension_validation_strict(cls) -> bool:
        """CSV/text formats can have various extensions, so warn only."""
        return False


[docs]
    def _load_data(self) -> xr.Dataset:
        """Load the CSV file and return an xarray Dataset.
        
        Returns
        -------
        xr.Dataset
            The loaded dataset.
        """
        # Read the CSV into a dictionary of columns
        with open(self.input_file, mode='r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)

            # Initialize a defaultdict of lists
            data = defaultdict(list)
            for row in reader:
                for key, value in row.items():
                    # Append the value from the row to the right list in data
                    data[key].append(value)

            # Convert defaultdict to dict
            data = dict(data)

            # Validation
            super()._validate_necessary_parameters(data, None, None, 'CSV file')

            # Convert 'time' values to datetime objects
            data[params.TIME] = [
                datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f') \
                    for timestamp in data[params.TIME]
            ]

            # Convert all other columns to floats
            for key in data.keys():
                if key != params.TIME and key in params.default_mappings: 
                    data[key] = [float(value) for value in data[key]]

            # Create xarray Dataset
            ds = self._get_xarray_dataset_template( 
                data[params.TIME],data[params.DEPTH],
                data[params.LATITUDE][0], data[params.LONGITUDE][0]
            )

            # Assign parameter values and meta information for each parameter to xarray Dataset
            for key in data.keys():
                super()._assign_data_for_key_to_xarray_dataset(ds, key, data[key])
                super()._assign_metadata_for_key_to_xarray_dataset( ds, key )
    
            return ds



[docs]
    @classmethod
    def format_key(cls) -> str:
        return 'csv'

    

[docs]
    @classmethod
    def format_name(cls) -> str:
        return 'CSV'



[docs]
    @classmethod
    def file_extension(cls) -> str | None:
        return '.csv'