Skip to content
Snippets Groups Projects
Commit d6cd0c0a authored by Florian Obersteiner's avatar Florian Obersteiner :octopus:
Browse files

add trajectory plotting

parent 20440d31
No related branches found
No related tags found
1 merge request!8trajectory plotting
......@@ -15,6 +15,7 @@ Types of changes
## TODOs
- set log level via config
- add sources for BB tagging and ACN background
- investigate if data caching is necessary (load-from-thredds)?
- scatter-plot: plot as line if sufficient data is available?
......@@ -34,6 +35,21 @@ Types of changes
## [Unreleased]
## v0.0.17, 2025-03-31
### Added
- trajectory data loading from disk and thredds
- trajectory plotting
- backend: possibility to set log level in config file
- frontend: loading indicators for the plots
### Changed
- thredds functionality now part of ./src/data.py
## v0.0.16, 2025-03-28
### Added
- prepare checkbox to toggle trajectories plotting
......
......@@ -9,20 +9,24 @@ from pathlib import Path
import dash_bootstrap_components as dbc
import pandas as pd
import xarray as xr
from dash import Dash
import src.data as loader
from src.callbacks import (
register_info_updaters,
register_map_plot,
register_timeseries_plot,
)
from src.config import DotDict, get_latest_semantic_version_tag
from src.data import load_from_disk, load_from_thredds
from src.layout import create_layout
from src.thredds import get_datasets
from src.layout import create
# -----------------------------------------------------------------------------
appconfig: DotDict = DotDict.from_toml_file("./config/env.toml")
appconfig.VERSION = get_latest_semantic_version_tag() or appconfig.VERSION
log.basicConfig()
log.getLogger().setLevel(appconfig.LOGLEVEL)
varconfig: pd.DataFrame = pd.read_csv("./config/parms_units.csv", sep=";")
varconfig = varconfig.set_index("VNAME")
......@@ -32,15 +36,29 @@ airportsconfig = airportsconfig.set_index("IATA")
# using separate variable declaration here to have a type annotation:
data: pd.DataFrame = pd.DataFrame()
trajdata: dict[int, xr.Dataset] = {}
ap_info: dict[int, str] = {}
# fmt: off
data, ap_info = (
load_from_thredds(get_datasets(appconfig), appconfig, airportsconfig, True) # type: ignore
loader.ms_from_thredds(loader.get_ms_datasets(appconfig), appconfig, airportsconfig, True)
if appconfig.USE_THREDDS
else load_from_disk(Path("./testdata").glob("MS_*.nc"), appconfig, airportsconfig, True) # type: ignore
else loader.ms_from_disk(
Path("./testdata").glob(appconfig.FNAME_PREFIX + "*" + appconfig.FNAME_SUFFIX),
appconfig,
airportsconfig,
True)
)
trajdata = (
loader.tr_from_thredds(appconfig)
if appconfig.USE_THREDDS
else loader.tr_from_disk(
Path("./testdata").glob(appconfig.FNAME_PREFIX_TR + "*" + appconfig.FNAME_SUFFIX)
)
)
log.debug(f"Data loaded. Used THREDDS: {appconfig.USE_THREDDS}")
# fmt: off
# -----------------------------------------------------------------------------
app = Dash(
appconfig.TITLE,
requests_pathname_prefix=appconfig.REQUESTS_PATH_PREFIX,
......@@ -52,12 +70,12 @@ app.title = appconfig.TITLE
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True
app.layout = create_layout(data, appconfig)
app.layout = create(data, appconfig)
server = app.server # for deployment via gunicorn / WSGI server
register_info_updaters(app, varconfig, ap_info)
register_map_plot(app, data, varconfig)
register_map_plot(app, data, trajdata, varconfig)
register_timeseries_plot(app, data, varconfig)
......
import pandas as pd
import plotly.graph_objects as go
import xarray as xr
from dash import Dash
from dash.dependencies import Input, Output
from plotly.subplots import make_subplots
......@@ -32,7 +33,12 @@ def register_info_updaters(app: Dash, var_info: pd.DataFrame, ap_info: dict[int,
)
def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.DataFrame):
def register_map_plot(
app: Dash,
timeseries_data: pd.DataFrame,
trajectory_data: dict[int, xr.Dataset],
var_info: pd.DataFrame,
):
"""
Register callbacks for updating map plot.
......@@ -41,7 +47,7 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat
"""
@app.callback(
[Output("fig-map", "figure")],
[Output("fig-map", "figure"), Output("confirm-trajdata-warning", "displayed")],
[
Input("flight-dropdown", "value"),
Input("variable-dropdown", "value"),
......@@ -50,16 +56,35 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat
)
def update_map_plot(selected_flight: int, primary_variable: str, show_trajectories: bool):
filtered_df = timeseries_data[timeseries_data["flight_number"] == selected_flight]
valid_indices = filtered_df[primary_variable].notna()
valid_indices = filtered_df[primary_variable].notna() # type: ignore
unit_primary = var_info.loc[primary_variable, :].Unit
fig_map = go.Figure()
show_trajdata_warn = False
if show_trajectories:
print("ON")
# try to find the trajectory file for selected flight
# show pop up if no trajectory file found ?
# make plots if trajectory file found
fig_map = go.Figure(
if trajs := trajectory_data.get(selected_flight):
# make plots if trajectory file found
for i in range(trajs.trajectory.size):
fig_map.add_trace(
go.Scattermap(
lat=trajs["latitude"].sel(trajectory=i),
lon=trajs["longitude"].sel(trajectory=i),
mode="markers",
marker={
"color": "#c8c8c8", # [
# "grey" if x else "lightgrey"
# for x in trajs["pressure"].sel(trajectory=i) > 700
# ]
},
)
)
else:
# show pop up if no trajectory file found
show_trajdata_warn = True
fig_map.add_trace( # world map / flight track
go.Scattermap(
lon=filtered_df.loc[valid_indices, "lon"],
lat=filtered_df.loc[valid_indices, "lat"],
......@@ -113,7 +138,7 @@ def register_map_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.Dat
showlegend=False,
)
return [fig_map]
return [fig_map, show_trajdata_warn]
def register_timeseries_plot(app: Dash, timeseries_data: pd.DataFrame, var_info: pd.DataFrame):
......@@ -134,7 +159,7 @@ def register_timeseries_plot(app: Dash, timeseries_data: pd.DataFrame, var_info:
)
def update_ts_plot(selected_flight: int, primary_variable: str, secondary_variable: str):
filtered_df = timeseries_data[timeseries_data["flight_number"] == selected_flight]
valid_indices = filtered_df[primary_variable].notna()
valid_indices = filtered_df[primary_variable].notna() # type: ignore
unit_primary = var_info.loc[primary_variable, :].Unit
unit_secondary = (
var_info.loc[secondary_variable, :].Unit
......
......@@ -9,12 +9,39 @@ from typing import Iterable
import pandas as pd
import xarray as xr
from siphon.catalog import Dataset, TDSCatalog
from siphon.catalog import Dataset as SiphonDataset
from .config import DotDict
def load_from_thredds(
def get_ms_datasets(cfg: DotDict) -> list[Dataset]:
"""Obtain references to measurement data from THREDDS"""
log.info("begin query THREDDS...")
catalog = TDSCatalog(cfg.THREDDS_URL)
flights = (
list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1))
if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1
else cfg.FLIGHTS
)
datasets = []
for i, ds in enumerate(catalog.datasets):
if not (ds.startswith(cfg.FNAME_PREFIX) and ds.endswith(cfg.FNAME_SUFFIX)):
continue
if int(ds.split("_")[2]) not in flights:
continue
datasets.append(catalog.datasets[i])
log.info("THREDDS query completed")
return datasets
def ms_from_thredds(
datasets: list[SiphonDataset], config: DotDict, apconfig: pd.DataFrame, drop: bool = False
) -> (pd.DataFrame, dict[int, str]): # type: ignore
"""
......@@ -25,14 +52,15 @@ def load_from_thredds(
ValueError: If no datasets are found.
"""
log.debug("begin load data from THREDDS...")
log.info("begin load data from THREDDS...")
dataframes = []
airports = {}
for i, ds in enumerate(datasets):
fullset = datasets[i].remote_access(use_xarray=True).to_dataframe()
if "ACN" in fullset.columns and fullset["ACN"].isna().all():
log.info(f"load {ds.name}")
fullset = datasets[i].remote_access(use_xarray=True).to_dataframe() # type: ignore
if "ACN" in fullset.columns and fullset["ACN"].isna().all(): # type: ignore
log.warning(f"'ACN' column is all NaN values! Skip {ds.name}")
continue
......@@ -63,12 +91,12 @@ def load_from_thredds(
if len(dataframes) == 0:
raise ValueError("No datasets found!")
log.debug("Data loaded from THREDDS successfully")
log.info("data loaded from THREDDS successfully")
return (pd.concat(dataframes), airports)
def load_from_disk(
def ms_from_disk(
paths: Iterable[Path], config: DotDict, apconfig: pd.DataFrame, drop: bool = False
) -> (pd.DataFrame, dict[int, str]): # type: ignore
"""
......@@ -78,12 +106,15 @@ def load_from_disk(
ValueError: If 'ACN' not found in dataset.
ValueError: If no datasets are found.
"""
log.info("begin load data from disk...")
dataframes = []
airports = {}
for path in sorted(paths):
fullset = xr.open_dataset(path).to_dataframe()
if "ACN" in fullset.columns and fullset["ACN"].isna().all():
if "ACN" in fullset.columns and fullset["ACN"].isna().all(): # type: ignore
log.warning(f"'ACN' column is all NaN values! Skip {path.name}")
continue
......@@ -103,7 +134,7 @@ def load_from_disk(
df["date"] = datetime.strptime(path.stem.split("_")[1], "%Y%m%d").strftime("%Y-%m-%d")
# make a linear interpolation of the acetonitrile column for BB flaggin
if "ACN" in df.columns:
if df["ACN"].isna().all():
if df["ACN"].isna().all(): # type: ignore
log.warning("'ACN' column is all NaN values!")
df["BB_flag"] = df["ACN"].interpolate() > (
config.BB_THRESH + config.ACN_PRC_NSIGMA * df["ACN_prc"].interpolate()
......@@ -114,8 +145,62 @@ def load_from_disk(
dataframes.append(df)
if len(dataframes) == 0:
raise ValueError("No datasets found!")
raise ValueError("no datasets found!")
log.info("Data loaded from disk successfully")
log.info("data loaded from disk successfully")
return (pd.concat(dataframes), airports)
def tr_from_disk(paths: Iterable[Path]) -> dict[int, xr.Dataset]:
log.info("begin load trajectory data from disk...")
d = {}
for p in paths:
fno = int(p.name.split("_")[-2])
assert d.get(fno) is None # assert there are no dupes
d[fno] = xr.load_dataset(p)
log.info("trajectory data loaded from disk successfully")
return d
def tr_from_thredds(cfg: DotDict) -> dict[int, xr.Dataset]:
"""
Load multiple netCDF files from a THREDS TDS into a dict,
mapping flight number -> xarray.Dataset.
Raises:
ValueError: If no datasets are found.
"""
log.info("begin load trajectory data from THREDDS...")
d = {}
catalog = TDSCatalog(cfg.THREDDS_URL_TRDATA)
flights = (
list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1))
if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1
else cfg.FLIGHTS
)
for i, ds in enumerate(catalog.datasets): # 'ds' is treated as string here
if not (ds.startswith(cfg.FNAME_PREFIX_TR) and ds.endswith(cfg.FNAME_SUFFIX)):
log.warning(f"invalid dataset '{ds}'")
continue
fno = int(ds.split("_")[4])
if fno not in flights:
log.warning(f"dataset '{ds}' not in specified flight range")
continue
log.info(f"load {ds}")
assert d.get(fno) is None # assert there are no dupes
d[fno] = catalog.datasets[i].remote_access(use_xarray=True)
log.info("trajectory data loaded from THREDDS successfully")
return d
......@@ -9,18 +9,18 @@ from dash import dcc, html
from .config import DotDict
def create_layout(df: pd.DataFrame, config: DotDict) -> list:
def create(df: pd.DataFrame, config: DotDict) -> list:
"""
Create the layout for the dashboard.
Makes a list of html / Dash components.
"""
flights = df[["flight_number", "date"]].drop_duplicates("flight_number")
flights = df[["flight_number", "date"]].drop_duplicates("flight_number") # type: ignore
flight_opts = [
{
"label": f"{row.flight_number:d} | {row.date}",
"value": row.flight_number,
"label": f"{row.flight_number:d} | {row.date}", # type: ignore
"value": row.flight_number, # type: ignore
}
for row in flights.itertuples()
]
......@@ -214,7 +214,7 @@ def create_layout(df: pd.DataFrame, config: DotDict) -> list:
),
],
style={
"width": "30%",
"width": "25%",
"display": "inline-block",
"text-align": "right",
"vertical-align": "middle",
......@@ -223,29 +223,45 @@ def create_layout(df: pd.DataFrame, config: DotDict) -> list:
],
style={
"display": "flex",
# "flex-direction": "row",
# "justify-content": "left",
"align-items": "center",
"width": "100%",
},
),
dcc.Graph(
id="fig-map",
style={
"margin-top": "10px",
"margin-right": "10px",
"margin-bottom": "0px",
"margin-left": "10px",
},
dcc.ConfirmDialog(
id="confirm-trajdata-warning",
message="No trajectory data available for this flight.",
),
dcc.Graph(
id="fig-ts",
style={
"margin-top": "10px",
"margin-right": "10px",
"margin-bottom": "0px",
"margin-left": "10px",
},
dcc.Loading(
id="loading-map",
type="circle", # Options: "graph", "cube", "circle", "dot", or "default"
overlay_style={"visibility": "visible", "filter": "blur(3px)"},
children=html.Div(
dcc.Graph(
id="fig-map",
style={
"margin-top": "10px",
"margin-right": "10px",
"margin-bottom": "0px",
"margin-left": "10px",
},
)
),
),
dcc.Loading(
id="loading-ts",
type="circle", # Options: "graph", "cube", "circle", "dot", or "default"
overlay_style={"visibility": "visible", "filter": "blur(3px)"},
children=html.Div(
dcc.Graph(
id="fig-ts",
style={
"margin-top": "10px",
"margin-right": "10px",
"margin-bottom": "0px",
"margin-left": "10px",
},
),
),
),
html.Label(
"BB flagging: BB influence if ACN > (145 ppt + 3*ACN_prc)",
......
import logging as log
from siphon.catalog import Dataset, TDSCatalog
from .config import DotDict
def get_datasets(cfg: DotDict) -> list[Dataset]:
log.debug("begin query THREDDS...")
catalog = TDSCatalog(cfg.THREDDS_URL)
flights = (
list(range(cfg.FLIGHTNO_MIN, cfg.FLIGHTNO_MAX + 1))
if cfg.FLIGHTNO_MIN != -1 and cfg.FLIGHTNO_MAX != -1
else cfg.FLIGHTS
)
datasets = []
for i, ds in enumerate(catalog.datasets):
if not (ds.startswith(cfg.FNAME_PREFIX) and ds.endswith(cfg.FNAME_SUFFIX)):
continue
if int(ds.split("_")[2]) not in flights:
continue
datasets.append(catalog.datasets[i])
log.debug("THREDDS query completed")
return datasets
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment