diff --git a/CHANGELOG.md b/CHANGELOG.md index da04cc371bd4fb0acebd901e2f016e62dbf5e1ba..205a05df2bd2dbbf523a6131281e56219354c121 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Types of changes ## TODOs +- set log level via config - add sources for BB tagging and ACN background - investigate if data caching is necessary (load-from-thredds)? - scatter-plot: plot as line if sufficient data is available? @@ -34,6 +35,21 @@ Types of changes ## [Unreleased] +## v0.0.17, 2025-03-31 + +### Added + +- trajectory data loading from disk and thredds +- trajectory plotting +- backend: possibility to set log level in config file +- frontend: loading indicators for the plots + +### Changed + +- thredds functionality now part of ./src/data.py + +## v0.0.16, 2025-03-28 + ### Added - prepare checkbox to toggle trajectories plotting diff --git a/main.py b/main.py index f96dac988fafd7c9a437ea26ff715a562ec339c8..58bb51ed1cbfedb3146b997e55a5042189678db5 100644 --- a/main.py +++ b/main.py @@ -9,20 +9,24 @@ from pathlib import Path import dash_bootstrap_components as dbc import pandas as pd +import xarray as xr from dash import Dash +import src.data as loader from src.callbacks import ( register_info_updaters, register_map_plot, register_timeseries_plot, ) from src.config import DotDict, get_latest_semantic_version_tag -from src.data import load_from_disk, load_from_thredds -from src.layout import create_layout -from src.thredds import get_datasets +from src.layout import create + +# ----------------------------------------------------------------------------- appconfig: DotDict = DotDict.from_toml_file("./config/env.toml") appconfig.VERSION = get_latest_semantic_version_tag() or appconfig.VERSION +log.basicConfig() +log.getLogger().setLevel(appconfig.LOGLEVEL) varconfig: pd.DataFrame = pd.read_csv("./config/parms_units.csv", sep=";") varconfig = varconfig.set_index("VNAME") @@ -32,15 +36,29 @@ airportsconfig = airportsconfig.set_index("IATA") # using separate variable declaration here to have a type annotation: data: pd.DataFrame = pd.DataFrame() +trajdata: dict[int, xr.Dataset] = {} ap_info: dict[int, str] = {} + +# fmt: off data, ap_info = ( - load_from_thredds(get_datasets(appconfig), appconfig, airportsconfig, True) # type: ignore + loader.ms_from_thredds(loader.get_ms_datasets(appconfig), appconfig, airportsconfig, True) if appconfig.USE_THREDDS - else load_from_disk(Path("./testdata").glob("MS_*.nc"), appconfig, airportsconfig, True) # type: ignore + else loader.ms_from_disk( + Path("./testdata").glob(appconfig.FNAME_PREFIX + "*" + appconfig.FNAME_SUFFIX), + appconfig, + airportsconfig, + True) +) +trajdata = ( + loader.tr_from_thredds(appconfig) + if appconfig.USE_THREDDS + else loader.tr_from_disk( + Path("./testdata").glob(appconfig.FNAME_PREFIX_TR + "*" + appconfig.FNAME_SUFFIX) + ) ) -log.debug(f"Data loaded. Used THREDDS: {appconfig.USE_THREDDS}") -# fmt: off +# ----------------------------------------------------------------------------- + app = Dash( appconfig.TITLE, requests_pathname_prefix=appconfig.REQUESTS_PATH_PREFIX, @@ -52,12 +70,12 @@ app.title = appconfig.TITLE app.css.config.serve_locally = True app.scripts.config.serve_locally = True -app.layout = create_layout(data, appconfig) +app.layout = create(data, appconfig) server = app.server # for deployment via gunicorn / WSGI server register_info_updaters(app, varconfig, ap_info) -register_map_plot(app, data, varconfig) +register_map_plot(app, data, trajdata, varconfig) register_timeseries_plot(app, data, varconfig) diff --git a/pyproject.toml b/pyproject.toml index dd6e6cf8e15fde2c8cb485ee731d0a1f379032e0..32bc95576eaecaf5feac500cf7d19e42937884d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,23 @@ [project] name = "caribic-dash" -version = "0.0.16" +version = "0.0.17" description = "IRISCC dashboard with CARIBIC data" readme = "README.md" requires-python = ">=3.12" classifiers = [ - "Programming Language :: Python :: 3", - "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", ] dependencies = [ - "dash>=2.18.2", - "dash-bootstrap-components>=2.0.0", - "gunicorn>=23.0.0", - "netcdf4>=1.7.2", # indirect; backend for xarray - "pandas>=2.2.3", - "plotly>=6.0.1", - "python-dotenv>=1.0.1", - "siphon>=0.10.0", - "xarray>=2025.3.0", + "dash>=2.18.2", + "dash-bootstrap-components>=2.0.0", + "gunicorn>=23.0.0", + "netcdf4>=1.7.2", # indirect; backend for xarray + "pandas>=2.2.3", + "plotly>=6.0.1", + "python-dotenv>=1.0.1", + "siphon>=0.10.0", + "xarray>=2025.3.0", ] [dependency-groups] diff --git a/src/callbacks.py b/src/callbacks.py index fb534c5dea2c33295db048234f9cc06297991a46..50af39cc6bd40f5926a65d9b252f621ed5d76bb9 100644 --- a/src/callbacks.py +++ b/src/callbacks.py @@ -1,5 +1,6 @@ import pandas as pd import plotly.graph_objects as go +import xarray as xr from dash import Dash from dash.dependencies import Input, Output from plotly.subplots import make_subplots @@ -32,7 +33,12 @@ def register_info_updaters(app: Dash, var_info: pd.DataFrame, ap_info: dict[int, ) -def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.DataFrame): +def register_map_plot( + app: Dash, + timeseries_data: pd.DataFrame, + trajectory_data: dict[int, xr.Dataset], + var_info: pd.DataFrame, +): """ Register callbacks for updating map plot. @@ -41,7 +47,7 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat """ @app.callback( - [Output("fig-map", "figure")], + [Output("fig-map", "figure"), Output("confirm-trajdata-warning", "displayed")], [ Input("flight-dropdown", "value"), Input("variable-dropdown", "value"), @@ -50,16 +56,35 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat ) def update_map_plot(selected_flight: int, primary_variable: str, show_trajectories: bool): filtered_df = timeseries_data[timeseries_data["flight_number"] == selected_flight] - valid_indices = filtered_df[primary_variable].notna() + valid_indices = filtered_df[primary_variable].notna() # type: ignore unit_primary = var_info.loc[primary_variable, :].Unit + fig_map = go.Figure() + + show_trajdata_warn = False if show_trajectories: - print("ON") # try to find the trajectory file for selected flight - # show pop up if no trajectory file found ? - # make plots if trajectory file found - - fig_map = go.Figure( + if trajs := trajectory_data.get(selected_flight): + # make plots if trajectory file found + for i in range(trajs.trajectory.size): + fig_map.add_trace( + go.Scattermap( + lat=trajs["latitude"].sel(trajectory=i), + lon=trajs["longitude"].sel(trajectory=i), + mode="markers", + marker={ + "color": "#c8c8c8", # [ + # "grey" if x else "lightgrey" + # for x in trajs["pressure"].sel(trajectory=i) > 700 + # ] + }, + ) + ) + else: + # show pop up if no trajectory file found + show_trajdata_warn = True + + fig_map.add_trace( # world map / flight track go.Scattermap( lon=filtered_df.loc[valid_indices, "lon"], lat=filtered_df.loc[valid_indices, "lat"], @@ -113,7 +138,7 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat showlegend=False, ) - return [fig_map] + return [fig_map, show_trajdata_warn] def register_timeseries_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.DataFrame): @@ -134,7 +159,7 @@ def register_timeseries_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: ) def update_ts_plot(selected_flight: int, primary_variable: str, secondary_variable: str): filtered_df = timeseries_data[timeseries_data["flight_number"] == selected_flight] - valid_indices = filtered_df[primary_variable].notna() + valid_indices = filtered_df[primary_variable].notna() # type: ignore unit_primary = var_info.loc[primary_variable, :].Unit unit_secondary = ( var_info.loc[secondary_variable, :].Unit diff --git a/src/data.py b/src/data.py index 06485da6dfea4f42ccf48e0c2440a0f4b687401a..f22242b74b8bc1a262afc71dad020f45286fe4e1 100644 --- a/src/data.py +++ b/src/data.py @@ -9,12 +9,39 @@ from typing import Iterable import pandas as pd import xarray as xr +from siphon.catalog import Dataset, TDSCatalog from siphon.catalog import Dataset as SiphonDataset from .config import DotDict -def load_from_thredds( +def get_ms_datasets(cfg: DotDict) -> list[Dataset]: + """Obtain references to measurement data from THREDDS""" + + log.info("begin query THREDDS...") + + catalog = TDSCatalog(cfg.THREDDS_URL) + flights = ( + list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1)) + if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1 + else cfg.FLIGHTS + ) + datasets = [] + + for i, ds in enumerate(catalog.datasets): + if not (ds.startswith(cfg.FNAME_PREFIX) and ds.endswith(cfg.FNAME_SUFFIX)): + continue + if int(ds.split("_")[2]) not in flights: + continue + + datasets.append(catalog.datasets[i]) + + log.info("THREDDS query completed") + + return datasets + + +def ms_from_thredds( datasets: list[SiphonDataset], config: DotDict, apconfig: pd.DataFrame, drop: bool = False ) -> (pd.DataFrame, dict[int, str]): # type: ignore """ @@ -25,14 +52,15 @@ def load_from_thredds( ValueError: If no datasets are found. """ - log.debug("begin load data from THREDDS...") + log.info("begin load data from THREDDS...") dataframes = [] airports = {} for i, ds in enumerate(datasets): - fullset = datasets[i].remote_access(use_xarray=True).to_dataframe() - if "ACN" in fullset.columns and fullset["ACN"].isna().all(): + log.info(f"load {ds.name}") + fullset = datasets[i].remote_access(use_xarray=True).to_dataframe() # type: ignore + if "ACN" in fullset.columns and fullset["ACN"].isna().all(): # type: ignore log.warning(f"'ACN' column is all NaN values! Skip {ds.name}") continue @@ -63,12 +91,12 @@ def load_from_thredds( if len(dataframes) == 0: raise ValueError("No datasets found!") - log.debug("Data loaded from THREDDS successfully") + log.info("data loaded from THREDDS successfully") return (pd.concat(dataframes), airports) -def load_from_disk( +def ms_from_disk( paths: Iterable[Path], config: DotDict, apconfig: pd.DataFrame, drop: bool = False ) -> (pd.DataFrame, dict[int, str]): # type: ignore """ @@ -78,12 +106,15 @@ def load_from_disk( ValueError: If 'ACN' not found in dataset. ValueError: If no datasets are found. """ + + log.info("begin load data from disk...") + dataframes = [] airports = {} for path in sorted(paths): fullset = xr.open_dataset(path).to_dataframe() - if "ACN" in fullset.columns and fullset["ACN"].isna().all(): + if "ACN" in fullset.columns and fullset["ACN"].isna().all(): # type: ignore log.warning(f"'ACN' column is all NaN values! Skip {path.name}") continue @@ -103,7 +134,7 @@ def load_from_disk( df["date"] = datetime.strptime(path.stem.split("_")[1], "%Y%m%d").strftime("%Y-%m-%d") # make a linear interpolation of the acetonitrile column for BB flaggin if "ACN" in df.columns: - if df["ACN"].isna().all(): + if df["ACN"].isna().all(): # type: ignore log.warning("'ACN' column is all NaN values!") df["BB_flag"] = df["ACN"].interpolate() > ( config.BB_THRESH + config.ACN_PRC_NSIGMA * df["ACN_prc"].interpolate() @@ -114,8 +145,62 @@ def load_from_disk( dataframes.append(df) if len(dataframes) == 0: - raise ValueError("No datasets found!") + raise ValueError("no datasets found!") - log.info("Data loaded from disk successfully") + log.info("data loaded from disk successfully") return (pd.concat(dataframes), airports) + + +def tr_from_disk(paths: Iterable[Path]) -> dict[int, xr.Dataset]: + + log.info("begin load trajectory data from disk...") + + d = {} + for p in paths: + fno = int(p.name.split("_")[-2]) + assert d.get(fno) is None # assert there are no dupes + d[fno] = xr.load_dataset(p) + + log.info("trajectory data loaded from disk successfully") + + return d + + +def tr_from_thredds(cfg: DotDict) -> dict[int, xr.Dataset]: + """ + Load multiple netCDF files from a THREDS TDS into a dict, + mapping flight number -> xarray.Dataset. + + Raises: + ValueError: If no datasets are found. + """ + + log.info("begin load trajectory data from THREDDS...") + + d = {} + + catalog = TDSCatalog(cfg.THREDDS_URL_TRDATA) + flights = ( + list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1)) + if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1 + else cfg.FLIGHTS + ) + + for i, ds in enumerate(catalog.datasets): # 'ds' is treated as string here + if not (ds.startswith(cfg.FNAME_PREFIX_TR) and ds.endswith(cfg.FNAME_SUFFIX)): + log.warning(f"invalid dataset '{ds}'") + continue + fno = int(ds.split("_")[4]) + if fno not in flights: + log.warning(f"dataset '{ds}' not in specified flight range") + continue + + log.info(f"load {ds}") + + assert d.get(fno) is None # assert there are no dupes + d[fno] = catalog.datasets[i].remote_access(use_xarray=True) + + log.info("trajectory data loaded from THREDDS successfully") + + return d diff --git a/src/layout.py b/src/layout.py index 1638aa543da806a6f9e2d49dc9c60c28f810607c..b76dca336c37d4bdb89ac25be19a8f9d77796fd2 100644 --- a/src/layout.py +++ b/src/layout.py @@ -9,18 +9,18 @@ from dash import dcc, html from .config import DotDict -def create_layout(df: pd.DataFrame, config: DotDict) -> list: +def create(df: pd.DataFrame, config: DotDict) -> list: """ Create the layout for the dashboard. Makes a list of html / Dash components. """ - flights = df[["flight_number", "date"]].drop_duplicates("flight_number") + flights = df[["flight_number", "date"]].drop_duplicates("flight_number") # type: ignore flight_opts = [ { - "label": f"{row.flight_number:d} | {row.date}", - "value": row.flight_number, + "label": f"{row.flight_number:d} | {row.date}", # type: ignore + "value": row.flight_number, # type: ignore } for row in flights.itertuples() ] @@ -214,7 +214,7 @@ def create_layout(df: pd.DataFrame, config: DotDict) -> list: ), ], style={ - "width": "30%", + "width": "25%", "display": "inline-block", "text-align": "right", "vertical-align": "middle", @@ -223,29 +223,45 @@ def create_layout(df: pd.DataFrame, config: DotDict) -> list: ], style={ "display": "flex", - # "flex-direction": "row", - # "justify-content": "left", "align-items": "center", "width": "100%", }, ), - dcc.Graph( - id="fig-map", - style={ - "margin-top": "10px", - "margin-right": "10px", - "margin-bottom": "0px", - "margin-left": "10px", - }, + dcc.ConfirmDialog( + id="confirm-trajdata-warning", + message="No trajectory data available for this flight.", ), - dcc.Graph( - id="fig-ts", - style={ - "margin-top": "10px", - "margin-right": "10px", - "margin-bottom": "0px", - "margin-left": "10px", - }, + dcc.Loading( + id="loading-map", + type="circle", # Options: "graph", "cube", "circle", "dot", or "default" + overlay_style={"visibility": "visible", "filter": "blur(3px)"}, + children=html.Div( + dcc.Graph( + id="fig-map", + style={ + "margin-top": "10px", + "margin-right": "10px", + "margin-bottom": "0px", + "margin-left": "10px", + }, + ) + ), + ), + dcc.Loading( + id="loading-ts", + type="circle", # Options: "graph", "cube", "circle", "dot", or "default" + overlay_style={"visibility": "visible", "filter": "blur(3px)"}, + children=html.Div( + dcc.Graph( + id="fig-ts", + style={ + "margin-top": "10px", + "margin-right": "10px", + "margin-bottom": "0px", + "margin-left": "10px", + }, + ), + ), ), html.Label( "BB flagging: BB influence if ACN > (145 ppt + 3*ACN_prc)", diff --git a/src/thredds.py b/src/thredds.py deleted file mode 100644 index be9439dcc137bbe82dd582ae3c3ac050a0a97c09..0000000000000000000000000000000000000000 --- a/src/thredds.py +++ /dev/null @@ -1,26 +0,0 @@ -import logging as log - -from siphon.catalog import Dataset, TDSCatalog - -from .config import DotDict - - -def get_datasets(cfg: DotDict) -> list[Dataset]: - log.debug("begin query THREDDS...") - catalog = TDSCatalog(cfg.THREDDS_URL) - flights = ( - list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1)) - if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1 - else cfg.FLIGHTS - ) - datasets = [] - - for i, ds in enumerate(catalog.datasets): - if not (ds.startswith(cfg.FNAME_PREFIX) and ds.endswith(cfg.FNAME_SUFFIX)): - continue - if int(ds.split("_")[2]) not in flights: - continue - - datasets.append(catalog.datasets[i]) - log.debug("THREDDS query completed") - return datasets diff --git a/uv.lock b/uv.lock index 2df36139182515bd4ad80ceea6ba3fc4468bc539..32c5ae6ddd56dbf50ec8726c606abdf9e564b7b8 100644 --- a/uv.lock +++ b/uv.lock @@ -80,7 +80,7 @@ wheels = [ [[package]] name = "caribic-dash" -version = "0.0.16" +version = "0.0.17" source = { virtual = "." } dependencies = [ { name = "dash" },