"""
Module for reading CTD data from CSV files into xarray Datasets.
"""
from __future__ import annotations
from collections import defaultdict
from datetime import datetime
import csv
import xarray as xr
import seasenselib.parameters as params
from .base import AbstractReader
[docs]
class CsvReader(AbstractReader):
""" Reads CTD data from a CSV file into a xarray Dataset.
This class reads CTD data from a CSV file, processes the data into a dictionary of columns,
and organizes it into an xarray Dataset. It handles the conversion of timestamps to
datetime objects and assigns metadata according to CF conventions.
Attributes
----------
data : xr.Dataset
The xarray Dataset containing the sensor data.
input_file : str
The path to the input CSV file containing the CTD data.
mapping : dict, optional
A dictionary mapping names used in the input file to standard names.
Methods
-------
__init__(input_file: str, mapping: dict | None = None)
Initializes the CsvReader with the input file and optional mapping.
_load_data()
Reads the CSV file and processes the data into an xarray Dataset.
Properties
----------
data : xr.Dataset (read-only)
Returns the xarray Dataset containing the sensor data.
For backward compatibility, get_data() method is also available but deprecated.
get_file_type()
Returns the type of the file being read, which is 'CSV'.
get_file_extension()
Returns the file extension for this reader, which is '.csv'.
"""
[docs]
def __init__(self, input_file: str,
mapping: dict | None = None,
**kwargs):
"""Initialize CsvReader.
Parameters
----------
input_file : str
Path to the CSV file.
mapping : dict, optional
Variable name mapping dictionary.
**kwargs
Additional base class parameters:
- input_header_file : str | None
Path to separate header file (if applicable).
- perform_default_postprocessing : bool, default=True
Whether to perform default post-processing.
- rename_variables : bool, default=True
Whether to rename variables to standard names.
- assign_metadata : bool, default=True
Whether to assign CF-compliant metadata.
- sort_variables : bool, default=True
Whether to sort variables alphabetically.
"""
super().__init__(input_file, mapping, **kwargs)
self._validate_file()
@classmethod
def _get_valid_extensions(cls) -> tuple[str, ...]:
"""Return valid file extensions for CSV files."""
return ('.csv', '.txt', '.dat')
@classmethod
def _is_extension_validation_strict(cls) -> bool:
"""CSV/text formats can have various extensions, so warn only."""
return False
[docs]
def _load_data(self) -> xr.Dataset:
"""Load the CSV file and return an xarray Dataset.
Returns
-------
xr.Dataset
The loaded dataset.
"""
# Read the CSV into a dictionary of columns
with open(self.input_file, mode='r', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
# Initialize a defaultdict of lists
data = defaultdict(list)
for row in reader:
for key, value in row.items():
# Append the value from the row to the right list in data
data[key].append(value)
# Convert defaultdict to dict
data = dict(data)
# Validation
super()._validate_necessary_parameters(data, None, None, 'CSV file')
# Convert 'time' values to datetime objects
data[params.TIME] = [
datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f') \
for timestamp in data[params.TIME]
]
# Convert all other columns to floats
for key in data.keys():
if key != params.TIME and key in params.default_mappings:
data[key] = [float(value) for value in data[key]]
# Create xarray Dataset
ds = self._get_xarray_dataset_template(
data[params.TIME],data[params.DEPTH],
data[params.LATITUDE][0], data[params.LONGITUDE][0]
)
# Assign parameter values and meta information for each parameter to xarray Dataset
for key in data.keys():
super()._assign_data_for_key_to_xarray_dataset(ds, key, data[key])
super()._assign_metadata_for_key_to_xarray_dataset( ds, key )
return ds
[docs]
@classmethod
def file_extension(cls) -> str | None:
return '.csv'