import altair as alt
Results
Setup
import altair
load data to test functions
reset_seed()
= pd.read_parquet(hai_big_path)
hai = pd.read_parquet(hai_era_big_path) hai_era
= imp_dataloader(hai, hai_era, var_sel = 'TA', gap_len=5, block_len=100, control_lags = [1], n_rep=1, bs=1).cpu() dls
= MeteoImpItem(1,2, 'TA', gap_len=10)
item = [item] items
= orig_target(dls, items)[0] targ
input, _ = one_batch_with_items(dls, items)
Imputation Methods
Kalman Filter
take a MeteoImp Df
and gap fill it using the given model
= torch.load(here("analysis/results/trained_models/1_gap_varying_6-336_v1.pickle")) model
= predict_items(model, dls=dls, items = items)
preds, targs 0].mean[targs[0].mask] = targs[0].data[targs[0].mask] preds[
KalmanImputation
input[1].shape
torch.Size([1, 100, 9])
= model(input) preds_raw
len(preds_raw.mean[0])
10
1, 7) _extract_var(preds_raw.mean,
[[tensor([-0.7555], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.7765], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8153], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8653], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8660], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8712], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8551], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8420], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8442], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([-0.8536], dtype=torch.float64, grad_fn=<IndexBackward0>)]]
1, 7) _extract_var(preds_raw.cov,
[[tensor([[0.0227]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0307]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0359]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0392]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0409]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0409]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0393]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0361]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0310]], dtype=torch.float64, grad_fn=<IndexBackward0>),
tensor([[0.0230]], dtype=torch.float64, grad_fn=<IndexBackward0>)]]
= [MeteoImpItem(1,2, list(hai.columns), gap_len=3)] * 3 items_all
= one_batch_with_items(dls, items_all) input_all, _
= model(input_all) preds_raw_all
3, 9) _extract_var(preds_raw_all.mean,
[[tensor([-0.5650], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.5887], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.6259], dtype=torch.float64, grad_fn=<SliceBackward0>)],
[tensor([-0.5650], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.5887], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.6259], dtype=torch.float64, grad_fn=<SliceBackward0>)],
[tensor([-0.5650], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.5887], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([-0.6259], dtype=torch.float64, grad_fn=<SliceBackward0>)]]
1]).shape buffer_pred(preds_raw_all.mean, input_all[
torch.Size([3, 100, 9])
3, 9) _extract_var(preds_raw_all.cov,
[[tensor([[0.1189]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1717]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1251]], dtype=torch.float64, grad_fn=<SliceBackward0>)],
[tensor([[0.1189]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1717]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1251]], dtype=torch.float64, grad_fn=<SliceBackward0>)],
[tensor([[0.1189]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1717]], dtype=torch.float64, grad_fn=<SliceBackward0>),
tensor([[0.1251]], dtype=torch.float64, grad_fn=<SliceBackward0>)]]
PredictLossVar
PredictLossVar (only_gap:bool, var:int)
loss (negative log likelihood) for only for one variable for each batch
True, 3)(preds_raw_all, input_all) PredictLossVar(
[tensor(-0.2252, dtype=torch.float64, grad_fn=<MeanBackward0>),
tensor(-0.2252, dtype=torch.float64, grad_fn=<MeanBackward0>),
tensor(-0.2252, dtype=torch.float64, grad_fn=<MeanBackward0>)]
1].shape input_all[
torch.Size([3, 100, 9])
PredictLikelihoodVar
PredictLikelihoodVar (only_gap:bool, var:int)
mean between timesteps of Likelihood for only for one variable for each batch
True, 3)(preds_raw_all, input_all) PredictLikelihoodVar(
[tensor(1.1569, dtype=torch.float64, grad_fn=<MeanBackward0>),
tensor(1.1569, dtype=torch.float64, grad_fn=<MeanBackward0>),
tensor(1.1569, dtype=torch.float64, grad_fn=<MeanBackward0>)]
MultiMetrics
MultiMetrics (**metrics)
Initialize self. See help(type(self)) for accurate signature.
KalmanImputation
KalmanImputation (model)
Initialize self. See help(type(self)) for accurate signature.
= KalmanImputation(model) k_imp
= k_imp(item, dls=dls) pred
'pred': pred[45:55],'data': targ.data[45:55], 'mask': targ.mask[45:55]}, hide_idx=False) display_as_row({
pred
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | 2.3467 | 0.0000 | 304.7310 | 0.9520 | 5.3900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:00:00 | 2.1803 | 0.0000 | 304.7310 | 0.9710 | 5.5900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:30:00 | 1.8728 | 0.0000 | 304.7310 | 0.9580 | 5.6700 | 96.2900 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:00:00 | 1.4769 | 0.0000 | 304.7310 | 0.9380 | 6.6100 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:30:00 | 1.4710 | 0.0000 | 315.0050 | 0.7850 | 6.6500 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 05:00:00 | 1.4303 | 0.0000 | 315.0050 | 0.5290 | 6.1200 | 96.2700 | 0.0000 | 43.0800 | 1.3000 |
2000-01-04 05:30:00 | 1.5573 | 0.0000 | 315.0050 | 0.4500 | 5.5800 | 96.2600 | 0.2900 | 43.1300 | 1.3000 |
2000-01-04 06:00:00 | 1.6616 | 0.0000 | 315.0050 | 0.4990 | 4.7000 | 96.2500 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 06:30:00 | 1.6438 | 0.0000 | 315.0050 | 0.3820 | 5.0700 | 96.2300 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 07:00:00 | 1.5697 | 0.0000 | 315.0050 | 0.3130 | 5.2300 | 96.2400 | 0.0000 | 43.1300 | 1.2900 |
data
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | 2.1900 | 0.0000 | 304.7310 | 0.9520 | 5.3900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:00:00 | 2.2700 | 0.0000 | 304.7310 | 0.9710 | 5.5900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:30:00 | 2.3200 | 0.0000 | 304.7310 | 0.9580 | 5.6700 | 96.2900 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:00:00 | 2.3400 | 0.0000 | 304.7310 | 0.9380 | 6.6100 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:30:00 | 2.2400 | 0.0000 | 315.0050 | 0.7850 | 6.6500 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 05:00:00 | 2.0000 | 0.0000 | 315.0050 | 0.5290 | 6.1200 | 96.2700 | 0.0000 | 43.0800 | 1.3000 |
2000-01-04 05:30:00 | 1.9400 | 0.0000 | 315.0050 | 0.4500 | 5.5800 | 96.2600 | 0.2900 | 43.1300 | 1.3000 |
2000-01-04 06:00:00 | 2.0700 | 0.0000 | 315.0050 | 0.4990 | 4.7000 | 96.2500 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 06:30:00 | 2.0400 | 0.0000 | 315.0050 | 0.3820 | 5.0700 | 96.2300 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 07:00:00 | 2.0300 | 0.0000 | 315.0050 | 0.3130 | 5.2300 | 96.2400 | 0.0000 | 43.1300 | 1.2900 |
mask
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 03:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 03:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 04:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 04:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 05:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 05:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 06:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 06:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 07:00:00 | False | True | True | True | True | True | True | True | True |
Kalman Imputation - Specialized Variables
KalmanImputationVar
KalmanImputationVar (models)
Initialize self. See help(type(self)) for accurate signature.
Details | |
---|---|
models | dataframe with 2 columns model and var |
= KalmanImputationVar(pd.DataFrame({'model': [model], 'var': 'TA'})) k_impVar
= k_impVar(var= 'TA', item=item, dls=dls) pred
'pred': pred[45:55],'data': targ.data[45:55], 'mask': targ.mask[45:55]}, hide_idx=False) display_as_row({
pred
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | 2.3467 | 0.0000 | 304.7310 | 0.9520 | 5.3900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:00:00 | 2.1803 | 0.0000 | 304.7310 | 0.9710 | 5.5900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:30:00 | 1.8728 | 0.0000 | 304.7310 | 0.9580 | 5.6700 | 96.2900 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:00:00 | 1.4769 | 0.0000 | 304.7310 | 0.9380 | 6.6100 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:30:00 | 1.4710 | 0.0000 | 315.0050 | 0.7850 | 6.6500 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 05:00:00 | 1.4303 | 0.0000 | 315.0050 | 0.5290 | 6.1200 | 96.2700 | 0.0000 | 43.0800 | 1.3000 |
2000-01-04 05:30:00 | 1.5573 | 0.0000 | 315.0050 | 0.4500 | 5.5800 | 96.2600 | 0.2900 | 43.1300 | 1.3000 |
2000-01-04 06:00:00 | 1.6616 | 0.0000 | 315.0050 | 0.4990 | 4.7000 | 96.2500 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 06:30:00 | 1.6438 | 0.0000 | 315.0050 | 0.3820 | 5.0700 | 96.2300 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 07:00:00 | 1.5697 | 0.0000 | 315.0050 | 0.3130 | 5.2300 | 96.2400 | 0.0000 | 43.1300 | 1.2900 |
data
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | 2.1900 | 0.0000 | 304.7310 | 0.9520 | 5.3900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:00:00 | 2.2700 | 0.0000 | 304.7310 | 0.9710 | 5.5900 | 96.3000 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 03:30:00 | 2.3200 | 0.0000 | 304.7310 | 0.9580 | 5.6700 | 96.2900 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:00:00 | 2.3400 | 0.0000 | 304.7310 | 0.9380 | 6.6100 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 04:30:00 | 2.2400 | 0.0000 | 315.0050 | 0.7850 | 6.6500 | 96.2600 | 0.0000 | 43.0800 | 1.3100 |
2000-01-04 05:00:00 | 2.0000 | 0.0000 | 315.0050 | 0.5290 | 6.1200 | 96.2700 | 0.0000 | 43.0800 | 1.3000 |
2000-01-04 05:30:00 | 1.9400 | 0.0000 | 315.0050 | 0.4500 | 5.5800 | 96.2600 | 0.2900 | 43.1300 | 1.3000 |
2000-01-04 06:00:00 | 2.0700 | 0.0000 | 315.0050 | 0.4990 | 4.7000 | 96.2500 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 06:30:00 | 2.0400 | 0.0000 | 315.0050 | 0.3820 | 5.0700 | 96.2300 | 0.0000 | 43.1300 | 1.2900 |
2000-01-04 07:00:00 | 2.0300 | 0.0000 | 315.0050 | 0.3130 | 5.2300 | 96.2400 | 0.0000 | 43.1300 | 1.2900 |
mask
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-04 02:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 03:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 03:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 04:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 04:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 05:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 05:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 06:00:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 06:30:00 | False | True | True | True | True | True | True | True | True |
2000-01-04 07:00:00 | False | True | True | True | True | True | True | True | True |
MDS
Need to call R from python
print('here::here("R/REddyProc_tools.R")') r.
[1] "here::here(\"R/REddyProc_tools.R\")"
'here::here("R/REddyProc_tools.R")' |
importr_install
importr_install (pkg)
setupR
setupR ()
R2pd
R2pd (x)
pd2R
pd2R (x)
Experiment
The time series needs to be at least 90 days long, we we add 45 days before and after the gap
There is a problem with conversions of timestamps between R and Python, so convert to string in Python and then back to datetime in R
add_buffer
add_buffer (index, inner_index, n)
Adds a buffer of after and after index so length is at least
100], 50)
add_buffer(hai.index, hai.index[:-50:], 50); add_buffer(hai.index, hai.index[
item2REddy
item2REddy (item, var, df)
Add context around item for supporting REddyProc
= item2REddy(targ, 'TA', hai)
REddy_df
= r.toR_timestamp(pd2R(REddy_df))
REddy_df_r
= R2pd(r.fill_gaps_EProc(REddy_df_r, "TA")) filled
R[write to console]: New sEddyProc class for site 'ID'
R[write to console]: Initialized variable 'TA' with 10 real gaps for gap filling.
R[write to console]: Limited MDS algorithm for gap filling of 'TA.gap_0' with LUT(SW_IN only) and MDC.
R[write to console]: Look up table with window size of 7 days with SW_IN
R[write to console]: 10
R[write to console]: Finished gap filling of 'TA' in 0 seconds. Artificial gaps filled: 4419, real gaps filled: 10, unfilled (long) gaps: 0.
filled
TA_orig | TA_f | TA_fqc | TA_fall | TA_fall_qc | TA_fnum | TA_fsd | TA_fmeth | TA_fwin | |
---|---|---|---|---|---|---|---|---|---|
1 | -0.60 | -0.60 | 0.0 | -0.60 | NaN | NaN | NaN | NaN | NaN |
2 | -0.65 | -0.65 | 0.0 | -0.65 | NaN | NaN | NaN | NaN | NaN |
3 | -0.58 | -0.58 | 0.0 | -0.58 | NaN | NaN | NaN | NaN | NaN |
4 | -0.51 | -0.51 | 0.0 | -0.51 | NaN | NaN | NaN | NaN | NaN |
5 | -0.49 | -0.49 | 0.0 | -0.49 | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4415 | 3.30 | 3.30 | 0.0 | 3.30 | NaN | NaN | NaN | NaN | NaN |
4416 | 3.10 | 3.10 | 0.0 | 3.10 | NaN | NaN | NaN | NaN | NaN |
4417 | 3.05 | 3.05 | 0.0 | 3.05 | NaN | NaN | NaN | NaN | NaN |
4418 | 3.05 | 3.05 | 0.0 | 3.05 | NaN | NaN | NaN | NaN | NaN |
4419 | 3.05 | 3.05 | 0.0 | 3.05 | NaN | NaN | NaN | NaN | NaN |
4419 rows × 9 columns
"TIMESTAMP_END") REddy_df.set_index(
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | gap | |
---|---|---|---|---|---|---|---|---|---|---|
TIMESTAMP_END | ||||||||||
2000-01-01 00:30:00 | -0.60 | 0.0 | 302.475 | 0.222 | 2.05 | 96.63 | 0.0 | 43.00 | 1.44 | 0.0 |
2000-01-01 01:00:00 | -0.65 | 0.0 | 302.475 | 0.122 | 2.53 | 96.58 | 0.0 | 43.00 | 1.43 | 0.0 |
2000-01-01 01:30:00 | -0.58 | 0.0 | 301.677 | 0.090 | 3.15 | 96.56 | 0.0 | 43.00 | 1.43 | 0.0 |
2000-01-01 02:00:00 | -0.51 | 0.0 | 301.677 | 0.110 | 3.12 | 96.56 | 0.0 | 43.00 | 1.45 | 0.0 |
2000-01-01 02:30:00 | -0.49 | 0.0 | 301.677 | 0.102 | 3.04 | 96.57 | 0.0 | 43.00 | 1.44 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2000-04-01 23:30:00 | 3.30 | 0.0 | 283.856 | 1.092 | 2.50 | 95.06 | 0.0 | 44.96 | 4.32 | 0.0 |
2000-04-02 00:00:00 | 3.10 | 0.0 | 283.856 | 1.013 | 2.09 | 95.04 | 0.0 | 44.96 | 4.22 | 0.0 |
2000-04-02 00:30:00 | 3.05 | 0.0 | 283.856 | 0.964 | 2.42 | 95.04 | 0.0 | 44.96 | 4.13 | 0.0 |
2000-04-02 01:00:00 | 3.05 | 0.0 | 283.856 | 0.933 | 3.04 | 95.04 | 0.0 | 44.96 | 4.04 | 0.0 |
2000-04-02 01:30:00 | 3.05 | 0.0 | 251.387 | 0.930 | 2.96 | 95.03 | 0.0 | 44.95 | 3.94 | 0.0 |
4419 rows × 10 columns
= filled.set_index(pd.to_datetime(REddy_df.TIMESTAMP_END))
filled
= filled.loc[targ.data.index]
filled_item
= targ.data.copy()
pred ~targ.mask["TA"], "TA"] = filled_item["TA_f"][~targ.mask["TA"]] pred.loc[
45:55], targ.data[45:55] pred[
( TA SW_IN LW_IN VPD WS PA P \
time
2000-01-04 02:30:00 1.853612 0.0 304.731 0.952 5.39 96.30 0.00
2000-01-04 03:00:00 1.837083 0.0 304.731 0.971 5.59 96.30 0.00
2000-01-04 03:30:00 1.823985 0.0 304.731 0.958 5.67 96.29 0.00
2000-01-04 04:00:00 1.809805 0.0 304.731 0.938 6.61 96.26 0.00
2000-01-04 04:30:00 1.794282 0.0 315.005 0.785 6.65 96.26 0.00
2000-01-04 05:00:00 1.783447 0.0 315.005 0.529 6.12 96.27 0.00
2000-01-04 05:30:00 1.771525 0.0 315.005 0.450 5.58 96.26 0.29
2000-01-04 06:00:00 1.760362 0.0 315.005 0.499 4.70 96.25 0.00
2000-01-04 06:30:00 1.748410 0.0 315.005 0.382 5.07 96.23 0.00
2000-01-04 07:00:00 1.737933 0.0 315.005 0.313 5.23 96.24 0.00
SWC TS
time
2000-01-04 02:30:00 43.08 1.31
2000-01-04 03:00:00 43.08 1.31
2000-01-04 03:30:00 43.08 1.31
2000-01-04 04:00:00 43.08 1.31
2000-01-04 04:30:00 43.08 1.31
2000-01-04 05:00:00 43.08 1.30
2000-01-04 05:30:00 43.13 1.30
2000-01-04 06:00:00 43.13 1.29
2000-01-04 06:30:00 43.13 1.29
2000-01-04 07:00:00 43.13 1.29 ,
TA SW_IN LW_IN VPD WS PA P SWC \
time
2000-01-04 02:30:00 2.19 0.0 304.731 0.952 5.39 96.30 0.00 43.08
2000-01-04 03:00:00 2.27 0.0 304.731 0.971 5.59 96.30 0.00 43.08
2000-01-04 03:30:00 2.32 0.0 304.731 0.958 5.67 96.29 0.00 43.08
2000-01-04 04:00:00 2.34 0.0 304.731 0.938 6.61 96.26 0.00 43.08
2000-01-04 04:30:00 2.24 0.0 315.005 0.785 6.65 96.26 0.00 43.08
2000-01-04 05:00:00 2.00 0.0 315.005 0.529 6.12 96.27 0.00 43.08
2000-01-04 05:30:00 1.94 0.0 315.005 0.450 5.58 96.26 0.29 43.13
2000-01-04 06:00:00 2.07 0.0 315.005 0.499 4.70 96.25 0.00 43.13
2000-01-04 06:30:00 2.04 0.0 315.005 0.382 5.07 96.23 0.00 43.13
2000-01-04 07:00:00 2.03 0.0 315.005 0.313 5.23 96.24 0.00 43.13
TS
time
2000-01-04 02:30:00 1.31
2000-01-04 03:00:00 1.31
2000-01-04 03:30:00 1.31
2000-01-04 04:00:00 1.31
2000-01-04 04:30:00 1.31
2000-01-04 05:00:00 1.30
2000-01-04 05:30:00 1.30
2000-01-04 06:00:00 1.29
2000-01-04 06:30:00 1.29
2000-01-04 07:00:00 1.29 )
gap_fill_item
gap_fill_item (item, REddy_df, var, filled)
"TA", filled) gap_fill_item(targ, REddy_df,
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-03 04:00:00 | 0.81 | 0.0 | 304.148 | 0.000 | 3.53 | 97.00 | 0.0 | 43.09 | 1.37 |
2000-01-03 04:30:00 | 0.95 | 0.0 | 304.382 | 0.000 | 3.99 | 96.98 | 0.0 | 43.09 | 1.37 |
2000-01-03 05:00:00 | 1.09 | 0.0 | 304.382 | 0.000 | 3.61 | 96.96 | 0.0 | 43.09 | 1.37 |
2000-01-03 05:30:00 | 1.18 | 0.0 | 304.382 | 0.009 | 3.90 | 96.93 | 0.0 | 43.09 | 1.37 |
2000-01-03 06:00:00 | 1.35 | 0.0 | 304.382 | 0.061 | 4.17 | 96.91 | 0.0 | 43.09 | 1.38 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2000-01-05 03:30:00 | 4.62 | 0.0 | 330.202 | 1.162 | 6.53 | 95.91 | 0.0 | 44.12 | 1.82 |
2000-01-05 04:00:00 | 4.51 | 0.0 | 330.202 | 1.636 | 5.76 | 95.94 | 0.0 | 44.12 | 1.85 |
2000-01-05 04:30:00 | 4.11 | 0.0 | 299.320 | 1.746 | 5.79 | 95.98 | 0.0 | 44.12 | 1.85 |
2000-01-05 05:00:00 | 3.77 | 0.0 | 299.320 | 2.065 | 6.00 | 96.03 | 0.0 | 44.12 | 1.83 |
2000-01-05 05:30:00 | 3.36 | 0.0 | 299.320 | 1.911 | 4.76 | 96.10 | 0.0 | 44.11 | 1.81 |
100 rows × 9 columns
MDSImputation
MDSImputation
MDSImputation (var, df)
Initialize self. See help(type(self)) for accurate signature.
= MDSImputation('TA', hai) mds_imp
mds_imp(targ)
TA | SW_IN | LW_IN | VPD | WS | PA | P | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-03 04:00:00 | 0.81 | 0.0 | 304.148 | 0.000 | 3.53 | 97.00 | 0.0 | 43.09 | 1.37 |
2000-01-03 04:30:00 | 0.95 | 0.0 | 304.382 | 0.000 | 3.99 | 96.98 | 0.0 | 43.09 | 1.37 |
2000-01-03 05:00:00 | 1.09 | 0.0 | 304.382 | 0.000 | 3.61 | 96.96 | 0.0 | 43.09 | 1.37 |
2000-01-03 05:30:00 | 1.18 | 0.0 | 304.382 | 0.009 | 3.90 | 96.93 | 0.0 | 43.09 | 1.37 |
2000-01-03 06:00:00 | 1.35 | 0.0 | 304.382 | 0.061 | 4.17 | 96.91 | 0.0 | 43.09 | 1.38 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2000-01-05 03:30:00 | 4.62 | 0.0 | 330.202 | 1.162 | 6.53 | 95.91 | 0.0 | 44.12 | 1.82 |
2000-01-05 04:00:00 | 4.51 | 0.0 | 330.202 | 1.636 | 5.76 | 95.94 | 0.0 | 44.12 | 1.85 |
2000-01-05 04:30:00 | 4.11 | 0.0 | 299.320 | 1.746 | 5.79 | 95.98 | 0.0 | 44.12 | 1.85 |
2000-01-05 05:00:00 | 3.77 | 0.0 | 299.320 | 2.065 | 6.00 | 96.03 | 0.0 | 44.12 | 1.83 |
2000-01-05 05:30:00 | 3.36 | 0.0 | 299.320 | 1.911 | 4.76 | 96.10 | 0.0 | 44.11 | 1.81 |
100 rows × 9 columns
ERA Imputation
ERAImputation
ERAImputation ()
Initialize self. See help(type(self)) for accurate signature.
targ.data.columns
Index(['TA', 'SW_IN', 'LW_IN', 'VPD', 'WS', 'PA', 'P', 'SWC', 'TS'], dtype='object')
= ERAImputation() era_imp
era_imp(targ)
TA | SW_IN | VPD | PA | P | WS | LW_IN | SWC | TS | |
---|---|---|---|---|---|---|---|---|---|
time | |||||||||
2000-01-03 04:00:00 | 1.702 | 0.0 | 0.693 | 97.016 | 0.000 | 2.948 | 304.148 | NaN | NaN |
2000-01-03 04:30:00 | 1.762 | 0.0 | 0.691 | 97.002 | 0.026 | 2.989 | 304.382 | NaN | NaN |
2000-01-03 05:00:00 | 1.736 | 0.0 | 0.697 | 96.988 | 0.000 | 3.015 | 304.382 | NaN | NaN |
2000-01-03 05:30:00 | 1.711 | 0.0 | 0.703 | 96.974 | 0.000 | 3.041 | 304.382 | NaN | NaN |
2000-01-03 06:00:00 | 1.686 | 0.0 | 0.709 | 96.960 | 0.000 | 3.066 | 304.382 | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2000-01-05 03:30:00 | 4.945 | 0.0 | 0.774 | 95.910 | 0.252 | 6.107 | 330.202 | NaN | NaN |
2000-01-05 04:00:00 | 4.789 | 0.0 | 0.758 | 95.926 | 0.252 | 6.189 | 330.202 | NaN | NaN |
2000-01-05 04:30:00 | 4.632 | 0.0 | 0.741 | 95.942 | 0.044 | 6.270 | 299.320 | NaN | NaN |
2000-01-05 05:00:00 | 4.251 | 0.0 | 0.766 | 95.988 | 0.000 | 5.923 | 299.320 | NaN | NaN |
2000-01-05 05:30:00 | 3.869 | 0.0 | 0.790 | 96.035 | 0.000 | 5.576 | 299.320 | NaN | NaN |
100 rows × 9 columns
Metrics
MaskedMetric
MaskedMetric (metric)
Initialize self. See help(type(self)) for accurate signature.
rmse
rmse (y_true, y_pred)
NormalizedMetric
NormalizedMetric (metric:__main__.MaskedMetric, mean, std)
Initialize self. See help(type(self)) for accurate signature.
normalize
normalize (x, mean, std)
rmse_mask(targ, pred)
array([0.3698457])
Validation Imputations
checks that the values of the imputation makes sense
this is the average error for ERA
'TA'] - hai_era.loc[hai.index]['TA_ERA']).pow(2).mean()) np.sqrt((hai[
1.8708585984196093
rmse_mask(targ, ERAImputation()(targ))
array([0.29919709])
= []
err_era for _ in range(100):
= random.choices(dls.items, k = 1)
items = orig_target(dls, items)[0]
targ err_era.append(rmse_mask(targ, ERAImputation()(targ)))
np.array(err_era).mean()
1.3839633283333717
Comparison
Take a variable to be filled, makes an artificial gap of given len, tries to fill with 3 methods and return metrics for each of them, repeat n_rep
times
prep visualization
format_gap_len
format_gap_len (gap_len:int)
Nice formatting for gap lengths
Type | Details | |
---|---|---|
gap_len | int | gap length in num observations (30 mins) |
10), "5 h")
test_eq(format_gap_len(48), "1 day (24 h)")
test_eq(format_gap_len(90), "2 days (45 h)")
test_eq(format_gap_len(336), "1 week (168 h)") test_eq(format_gap_len(
prep_df
prep_df (df)
Aggregation
=0)['TA'] hai.std(axis
7.924610764367695
get_stats(hai)
(tensor([8.3339e+00, 1.2096e+02, 3.1150e+02, 3.3807e+00, 3.1800e+00, 9.5962e+01,
4.3427e-02, 3.4843e+01, 7.9349e+00], dtype=torch.float64),
tensor([ 7.9246, 204.0026, 41.9557, 4.3684, 1.6254, 0.8552, 0.2803,
8.9131, 5.6586], dtype=torch.float64))
\[ \text{RMSE}_\text{stand} = \frac{1}{\sigma}\text{RMSE}\]
RMSEAgg
RMSEAgg (df)
Aggregate to rmse and normalized rmse
timeseriesAgg
timeseriesAgg (targ, pred, *args)
Imp Methods
ImpComparison
ImpComparison (models:pandas.core.frame.DataFrame, df, control, block_len, rmse=True, time_series=False)
Initialize self. See help(type(self)) for accurate signature.
l_model
l_model (x, base_path=Path('/home/runner/work/meteo_imp/meteo_imp/analysi s/results/trained_models'))
= pd.DataFrame.from_records([
models_var 'var': 'TA', 'model': l_model("TA_specialized_gap_6-336_v3_0.pickle",base_path)},
{'var': 'SW_IN', 'model': l_model("SW_IN_specialized_gap_6-336_v2_0.pickle",base_path)},
{'var': 'LW_IN', 'model': l_model("LW_IN_specialized_gap_6-336_v1.pickle",base_path)},
{'var': 'VPD', 'model': l_model("VPD_specialized_gap_6-336_v2_0.pickle",base_path)},
{'var': 'WS', 'model': l_model("WS_specialized_gap_6-336_v1.pickle",base_path)},
{'var': 'PA', 'model': l_model("PA_specialized_gap_6-336_v3_0.pickle",base_path)},
{'var': 'P', 'model': l_model("1_gap_varying_6-336_v3.pickle",base_path)},
{'var': 'TS', 'model': l_model("TS_specialized_gap_6-336_v2_0.pickle",base_path)},
{'var': 'SWC', 'model': l_model("SWC_specialized_gap_6-336_v2_1.pickle",base_path)},
{ ])
= ImpComparison(models = models_var, df = hai, control = hai_era, block_len = 446) comp
= comp.compare(gap_len = [12, 24, 48, 336], var=["TA", "SW_IN"], n_rep=3) data_results
data_results.head()
method | var | gap_len | idx_rep | rmse | rmse_stand | gap_len_f | |
---|---|---|---|---|---|---|---|
0 | Kalman Filter | TA | 6 | 0 | 0.310825 | 0.039223 | 6 h |
3 | Kalman Filter | TA | 6 | 1 | 0.529280 | 0.066789 | 6 h |
6 | Kalman Filter | TA | 6 | 2 | 0.330725 | 0.041734 | 6 h |
1 | ERA-I | TA | 6 | 0 | 1.329099 | 0.167718 | 6 h |
4 | ERA-I | TA | 6 | 1 | 1.468124 | 0.185261 | 6 h |
data_results.method.unique()
['Kalman Filter', 'ERA-I', 'MDS']
Categories (3, object): ['Kalman Filter' < 'ERA-I' < 'MDS']
data_results.columns
Index(['method', 'var', 'gap_len', 'idx_rep', 'rmse', 'rmse_stand',
'gap_len_f'],
dtype='object')
Kalman Filters
KalmanImpComparison
KalmanImpComparison (models:pandas.core.frame.DataFrame, df:pandas.core.frame.DataFrame, control:pandas.core.frame.DataFrame, block_len:int, rmse:bool=True, time_series:bool=False)
Compare different Kalman filters
Type | Default | Details | |
---|---|---|---|
models | DataFrame | model, gap_single_var, | |
df | DataFrame | ||
control | DataFrame | ||
block_len | int | ||
rmse | bool | True | |
time_series | bool | False |
= pd.DataFrame({'model': [ l_model("1_gap_varying_336_no_control_v1.pickle"), l_model("1_gap_varying_6-336_v3.pickle")],
models_nc 'type': [ 'No Control', 'Use Control' ]})
models_nc
model | type | |
---|---|---|
0 | Kalman Filter\n N dim obs: 9,\n N dim state: 18,\n N dim contr: 14 | No Control |
1 | Kalman Filter\n N dim obs: 9,\n N dim state: 18,\n N dim contr: 14 | Use Control |
= KalmanImpComparison(models_nc, hai, hai_era, 100) kcomp
= kcomp.compare(n_rep =3, gap_len = [6, 12, 24], var = list(hai.columns)) k_results
k_results
var | loss | likelihood | gap_len | idx_rep | type | rmse | rmse_stand | gap_len_f | |
---|---|---|---|---|---|---|---|---|---|
0 | TA | -3.063759 | 1.762383 | 3 | 0 | No Control | 0.966638 | 0.121979 | 3 h |
1 | TA | -3.956292 | 2.204462 | 3 | 1 | No Control | 0.354328 | 0.044712 | 3 h |
2 | TA | -3.164076 | 1.947664 | 3 | 2 | No Control | 0.891858 | 0.112543 | 3 h |
3 | TA | -5.133151 | 2.662633 | 3 | 0 | Use Control | 0.444229 | 0.056057 | 3 h |
4 | TA | -5.387288 | 2.626224 | 3 | 1 | Use Control | 0.209779 | 0.026472 | 3 h |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1 | TS | -1.180353 | 1.672593 | 12 | 1 | No Control | 0.677688 | 0.119762 | 12 h |
2 | TS | 0.954142 | 1.668745 | 12 | 2 | No Control | 1.172264 | 0.207163 | 12 h |
3 | TS | 1.036145 | 1.528941 | 12 | 0 | Use Control | 0.994793 | 0.175801 | 12 h |
4 | TS | 1.770240 | 1.534406 | 12 | 1 | Use Control | 1.173229 | 0.207334 | 12 h |
5 | TS | 3.380553 | 1.458859 | 12 | 2 | Use Control | 1.446071 | 0.255551 | 12 h |
162 rows × 9 columns
Plotting
Utils
Faceting
facet_wrap
facet_wrap (data:pandas.core.frame.DataFrame, plot_fn, col:str, y_labels:list[str]|None=None, n_cols=3, y_resolve='independent')
Type | Default | Details | |
---|---|---|---|
data | DataFrame | full dataset | |
plot_fn | function that makes the plot, takes 2 arguments: data and y_label | ||
col | str | column to facet | |
y_labels | list[str] | None | None | custom labels y axis |
n_cols | int | 3 | |
y_resolve | str | independent |
facet_grid
facet_grid (data:pandas.core.frame.DataFrame, plot_fn, col:str, row:str, y_labels:list[str]|None=None)
Type | Default | Details | |
---|---|---|---|
data | DataFrame | full dataset | |
plot_fn | function that makes the plot, takes 2 arguments: data and y_label | ||
col | str | column to facet, | |
row | str | ||
y_labels | list[str] | None | None | custom labels y axis |
from itertools import product
= pd.DataFrame(list(product(['0','1'], ['a', 'b'])), columns = ['row', 'col'])
test_data 'text'] = test_data.row + test_data.col test_data[
def test_plot(data, *args): return alt.Chart(data).mark_text().encode(text='text')
= 'row') facet_wrap(test_data, test_plot, col
= 'col', row='row') facet_grid(test_data, test_plot, col
Format
PlotFormatter
PlotFormatter (font_size:int=18, legend_font_size:int=18, title_font_size:int=20)
Format altair plot by setting font sizes/legend position
pipe’]
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
The Plot
def remove_outliers(data, var, groupby):
"""Simple but maybe computationally inefficent to remove outliers from boxplot """
def custom_box_plot(data, x, y, color, xoffset):
"""Boxplot for altair without outliers"""
= alt.Chart(data).mark_bar().encode(
bar =x,
x='q1:Q',
y='q2:Q',
y2=color,
color=xoffset
xoffset
).transform_aggregate(=f'q1({y})',
q1=f'q1({y})',
q3=[color, xoffset]
groupby
)
return bar
# custom_box_plot(data_results, 'gap_len', 'rmse', 'method', 'method')
the_plot
the_plot (data)
the_plot(data_results)
the_plot_stand
the_plot_stand (data)
the_plot_stand(data_results)
the_plot_stand2
the_plot_stand2 (data)
dict(zip(scale_meteo.domain, scale_meteo.range))
{'TA': '#1B9E77',
'SW_IN': '#D95F02',
'LW_IN': '#7570B3',
'VPD': '#E7298A',
'WS': '#66A61E',
'PA': '#E6AB02',
'SWC': '#A6761D',
'TS': '#666666'}
the_plot_stand2(data_results)
'color') alt.Color(
Color({
shorthand: 'color'
})
= alt.X('gap_len'); color = alt.Color('method'); xOffset = alt.XOffset('method'); y = alt.Y('rmse_stand') x
'median', lambda x: x.quantile(.25), lambda x: x.quantile(.75)]}).columns data_results.groupby([x.shorthand, color.shorthand, xOffset.shorthand]).agg({y.shorthand: [
MultiIndex([('rmse_stand', 'median'),
('rmse_stand', '<lambda_0>'),
('rmse_stand', '<lambda_1>')],
)
data_results
method | var | gap_len | idx_rep | rmse | rmse_stand | gap_len_f | |
---|---|---|---|---|---|---|---|
0 | Kalman Filter | TA | 6 | 0 | 0.310825 | 0.039223 | 6 h |
3 | Kalman Filter | TA | 6 | 1 | 0.529280 | 0.066789 | 6 h |
6 | Kalman Filter | TA | 6 | 2 | 0.330725 | 0.041734 | 6 h |
1 | ERA-I | TA | 6 | 0 | 1.329099 | 0.167718 | 6 h |
4 | ERA-I | TA | 6 | 1 | 1.468124 | 0.185261 | 6 h |
... | ... | ... | ... | ... | ... | ... | ... |
4 | ERA-I | SW_IN | 168 | 1 | 24.792212 | 0.121529 | 1 week (168 h) |
7 | ERA-I | SW_IN | 168 | 2 | 108.398313 | 0.531358 | 1 week (168 h) |
2 | MDS | SW_IN | 168 | 0 | 134.669370 | 0.660136 | 1 week (168 h) |
5 | MDS | SW_IN | 168 | 1 | 31.500833 | 0.154414 | 1 week (168 h) |
8 | MDS | SW_IN | 168 | 2 | 149.125227 | 0.730997 | 1 week (168 h) |
72 rows × 7 columns
'gap_len', 'method', 'var']).agg(
data_results.groupby([= pd.NamedAgg(y.shorthand, 'median'),
median = pd.NamedAgg(y.shorthand,lambda x: x.quantile(.25)),
q1 = pd.NamedAgg(y.shorthand,lambda x: x.quantile(.75))).reset_index() q3
gap_len | method | var | median | q1 | q3 | |
---|---|---|---|---|---|---|
0 | 6 | Kalman Filter | TA | 0.041734 | 0.040478 | 0.054262 |
1 | 6 | Kalman Filter | SW_IN | 0.123723 | 0.105794 | 0.215405 |
2 | 6 | Kalman Filter | LW_IN | NaN | NaN | NaN |
3 | 6 | Kalman Filter | VPD | NaN | NaN | NaN |
4 | 6 | Kalman Filter | WS | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... |
103 | 168 | MDS | WS | NaN | NaN | NaN |
104 | 168 | MDS | PA | NaN | NaN | NaN |
105 | 168 | MDS | P | NaN | NaN | NaN |
106 | 168 | MDS | SWC | NaN | NaN | NaN |
107 | 168 | MDS | TS | NaN | NaN | NaN |
108 rows × 6 columns
custom_boxplot_nooutlier
custom_boxplot_nooutlier (data:pandas.core.frame.DataFrame, x:altair.vegalite.v5.schema.channels.X, y:altair.vegalite.v5.schema.channels.Y, color:altair.vegalite.v5.schema.channels.Color, xOffset:altair.vegalite.v5.schema.channels.XOff set)
the_plot_stand3
the_plot_stand3 (data)
the_plot_stand3(data_results)
violin plots don’t work
def _the_plot_violin(data, y_label):
return alt.Chart(data).mark_area(orient='horizontal').encode(
= alt.X('density:Q', title='gap_len [h]', axis=alt.Axis(labelAngle=0)),
x = alt.Y('rmse', title=y_label, scale=alt.Scale(domainMin=-.2 *data['rmse'].mean())),
y =alt.Color('method:N', scale=alt.Scale(domain=["Kalman Filter", "ERA-I", "MDS"], scheme='dark2')),
color=alt.XOffset('method', scale=alt.Scale(domain=["Kalman Filter", "ERA-I", "MDS"])),
xOffset=alt.Column('gap_len')
column
).transform_density('rmse',
=['rmse', 'density'],
as_=['method', 'gap_len']
groupby=250, height=200) ).properties(width
"var", y_labels = [f"RMSE {var} [{units_big[var]}]" for var in data_results['var'].unique()]) facet_wrap(data_results, _the_plot_violin,
Gap Length
= KalmanImpComparison(models_var, hai, hai_era, block_len=100).compare(gap_len = [2,10,30], var=['TA', 'SW_IN', 'VPD'], n_rep=3) gap_len
gap_len.gap_len_f
0 1 h
1 1 h
2 1 h
0 5 h
1 5 h
2 5 h
0 15 h
1 15 h
2 15 h
0 1 h
1 1 h
2 1 h
0 5 h
1 5 h
2 5 h
0 15 h
1 15 h
2 15 h
0 1 h
1 1 h
2 1 h
0 5 h
1 5 h
2 5 h
0 15 h
1 15 h
2 15 h
Name: gap_len_f, dtype: category
Categories (3, object): ['1 h' < '5 h' < '15 h']
agg_gap_len
agg_gap_len (data)
= agg_gap_len(gap_len) gap_len_agg
gap_len_agg
var | gap_len | median | Q1 | Q3 | |
---|---|---|---|---|---|
0 | TA | 1 | 0.139298 | 0.117130 | 0.212692 |
1 | TA | 5 | 0.415087 | 0.307310 | 0.419030 |
2 | TA | 15 | 0.339634 | 0.332259 | 0.668379 |
3 | SW_IN | 1 | 10.995754 | 7.385527 | 15.342193 |
4 | SW_IN | 5 | 41.343274 | 29.328629 | 46.677302 |
5 | SW_IN | 15 | 22.880862 | 20.480137 | 29.897607 |
6 | LW_IN | 1 | NaN | NaN | NaN |
7 | LW_IN | 5 | NaN | NaN | NaN |
8 | LW_IN | 15 | NaN | NaN | NaN |
9 | VPD | 1 | 0.151389 | 0.142296 | 0.161319 |
10 | VPD | 5 | 0.147326 | 0.080502 | 0.581390 |
11 | VPD | 15 | 0.544413 | 0.367209 | 1.145505 |
12 | WS | 1 | NaN | NaN | NaN |
13 | WS | 5 | NaN | NaN | NaN |
14 | WS | 15 | NaN | NaN | NaN |
15 | PA | 1 | NaN | NaN | NaN |
16 | PA | 5 | NaN | NaN | NaN |
17 | PA | 15 | NaN | NaN | NaN |
18 | P | 1 | NaN | NaN | NaN |
19 | P | 5 | NaN | NaN | NaN |
20 | P | 15 | NaN | NaN | NaN |
21 | SWC | 1 | NaN | NaN | NaN |
22 | SWC | 5 | NaN | NaN | NaN |
23 | SWC | 15 | NaN | NaN | NaN |
24 | TS | 1 | NaN | NaN | NaN |
25 | TS | 5 | NaN | NaN | NaN |
26 | TS | 15 | NaN | NaN | NaN |
= _get_era_rmse(hai, hai_era) rmse_df
rmse_df
var | era_rmse | |
---|---|---|
0 | TA | 1.870859 |
1 | SW_IN | 76.320335 |
2 | VPD | 2.021377 |
3 | PA | 0.104781 |
4 | P | 0.293058 |
5 | WS | 1.207949 |
6 | LW_IN | 20.204070 |
gap_len_agg
var | gap_len | median | Q1 | Q3 | |
---|---|---|---|---|---|
0 | TA | 1 | 0.139298 | 0.117130 | 0.212692 |
1 | TA | 5 | 0.415087 | 0.307310 | 0.419030 |
2 | TA | 15 | 0.339634 | 0.332259 | 0.668379 |
3 | SW_IN | 1 | 10.995754 | 7.385527 | 15.342193 |
4 | SW_IN | 5 | 41.343274 | 29.328629 | 46.677302 |
5 | SW_IN | 15 | 22.880862 | 20.480137 | 29.897607 |
6 | LW_IN | 1 | NaN | NaN | NaN |
7 | LW_IN | 5 | NaN | NaN | NaN |
8 | LW_IN | 15 | NaN | NaN | NaN |
9 | VPD | 1 | 0.151389 | 0.142296 | 0.161319 |
10 | VPD | 5 | 0.147326 | 0.080502 | 0.581390 |
11 | VPD | 15 | 0.544413 | 0.367209 | 1.145505 |
12 | WS | 1 | NaN | NaN | NaN |
13 | WS | 5 | NaN | NaN | NaN |
14 | WS | 15 | NaN | NaN | NaN |
15 | PA | 1 | NaN | NaN | NaN |
16 | PA | 5 | NaN | NaN | NaN |
17 | PA | 15 | NaN | NaN | NaN |
18 | P | 1 | NaN | NaN | NaN |
19 | P | 5 | NaN | NaN | NaN |
20 | P | 15 | NaN | NaN | NaN |
21 | SWC | 1 | NaN | NaN | NaN |
22 | SWC | 5 | NaN | NaN | NaN |
23 | SWC | 15 | NaN | NaN | NaN |
24 | TS | 1 | NaN | NaN | NaN |
25 | TS | 5 | NaN | NaN | NaN |
26 | TS | 15 | NaN | NaN | NaN |
= pd.merge(gap_len_agg, rmse_df, on='var') gap_len_agg
'var'] == 'TA'], "label") _plot_gap_len(gap_len_agg[gap_len_agg[
'var') facet_wrap(gap_len_agg, _plot_gap_len,
plot_gap_len
plot_gap_len (data, df, control)
plot_gap_len(gap_len, hai, hai_era)
Compare
plotting for comparing performance of 2 or more different conditions
plot_compare
plot_compare (data:pandas.core.frame.DataFrame, compare:str, y:str='rmse_stand', scale_domain:Optional[Sequence]=None, y_labels:Optional[Sequence]=None)
="type") plot_compare(k_results, compare
can also show different variables on the y, like the loss of the likelihood
="type", y="loss") plot_compare(k_results, compare
Timeseries
= ImpComparison(models = models_var, df = hai, control = hai_era, block_len = 446, rmse=False, time_series = True) comp_ts
= comp_ts.compare(gap_len = [12,24,336], var=["TA", 'SW_IN'], n_rep=1) res_ts
res_ts.columns
Index(['method', 'var', 'gap_len', 'idx_rep', 'pred', 'targ', 'gap_len_f'], dtype='object')
= res_ts.query('method == "Kalman Filter" and var == "TA"') test_ts
= test_ts.iloc[0] row_res
= row_res.pred[0]
pred_all = row_res.targ[0]
targ = row_res['var'] var
type(pred_all)
meteo_imp.kalman.training.NormalsDf
targ.mask[var]
time
2000-09-21 20:30:00 True
2000-09-21 21:00:00 True
2000-09-21 21:30:00 True
2000-09-21 22:00:00 True
2000-09-21 22:30:00 True
...
2000-10-01 01:00:00 True
2000-10-01 01:30:00 True
2000-10-01 02:00:00 True
2000-10-01 02:30:00 True
2000-10-01 03:00:00 True
Name: TA, Length: 446, dtype: bool
list(row_res.index)
['method', 'var', 'gap_len', 'idx_rep', 'pred', 'targ', 'gap_len_f']
0].mask['TA']) np.argmin(row_res.targ[
217
0].mask['TA'].index[47] row_res.targ[
Timestamp('2000-09-22 20:00:00')
unnest_predictions
unnest_predictions (row_res:pandas.core.series.Series, ctx_len:int=50)
Unnest predictions/target for each gap to plot timeseries
unnest_predictions(row_res)
time | mean | std | measurement | is_present | method | var | gap_len | idx_rep | gap_len_f | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2000-09-25 23:30:00 | NaN | NaN | 10.32 | True | Kalman Filter | TA | 6 | 0 | 6 h |
1 | 2000-09-26 00:00:00 | NaN | NaN | 10.57 | True | Kalman Filter | TA | 6 | 0 | 6 h |
2 | 2000-09-26 00:30:00 | NaN | NaN | 10.96 | True | Kalman Filter | TA | 6 | 0 | 6 h |
3 | 2000-09-26 01:00:00 | NaN | NaN | 10.85 | True | Kalman Filter | TA | 6 | 0 | 6 h |
4 | 2000-09-26 01:30:00 | NaN | NaN | 10.72 | True | Kalman Filter | TA | 6 | 0 | 6 h |
5 | 2000-09-26 02:00:00 | NaN | NaN | 10.62 | True | Kalman Filter | TA | 6 | 0 | 6 h |
6 | 2000-09-26 02:30:00 | NaN | NaN | 10.44 | True | Kalman Filter | TA | 6 | 0 | 6 h |
7 | 2000-09-26 03:00:00 | NaN | NaN | 11.07 | True | Kalman Filter | TA | 6 | 0 | 6 h |
8 | 2000-09-26 03:30:00 | NaN | NaN | 11.92 | True | Kalman Filter | TA | 6 | 0 | 6 h |
9 | 2000-09-26 04:00:00 | NaN | NaN | 11.56 | True | Kalman Filter | TA | 6 | 0 | 6 h |
10 | 2000-09-26 04:30:00 | NaN | NaN | 11.05 | True | Kalman Filter | TA | 6 | 0 | 6 h |
11 | 2000-09-26 05:00:00 | NaN | NaN | 10.55 | True | Kalman Filter | TA | 6 | 0 | 6 h |
12 | 2000-09-26 05:30:00 | NaN | NaN | 10.90 | True | Kalman Filter | TA | 6 | 0 | 6 h |
13 | 2000-09-26 06:00:00 | NaN | NaN | 10.54 | True | Kalman Filter | TA | 6 | 0 | 6 h |
14 | 2000-09-26 06:30:00 | NaN | NaN | 9.09 | True | Kalman Filter | TA | 6 | 0 | 6 h |
15 | 2000-09-26 07:00:00 | NaN | NaN | 8.95 | True | Kalman Filter | TA | 6 | 0 | 6 h |
16 | 2000-09-26 07:30:00 | NaN | NaN | 9.12 | True | Kalman Filter | TA | 6 | 0 | 6 h |
17 | 2000-09-26 08:00:00 | NaN | NaN | 9.39 | True | Kalman Filter | TA | 6 | 0 | 6 h |
18 | 2000-09-26 08:30:00 | NaN | NaN | 9.94 | True | Kalman Filter | TA | 6 | 0 | 6 h |
19 | 2000-09-26 09:00:00 | 10.423968 | 0.756711 | 10.73 | False | Kalman Filter | TA | 6 | 0 | 6 h |
20 | 2000-09-26 09:30:00 | 10.546502 | 0.819027 | 10.56 | False | Kalman Filter | TA | 6 | 0 | 6 h |
21 | 2000-09-26 10:00:00 | 10.915743 | 0.869077 | 10.80 | False | Kalman Filter | TA | 6 | 0 | 6 h |
22 | 2000-09-26 10:30:00 | 11.531336 | 0.909095 | 11.35 | False | Kalman Filter | TA | 6 | 0 | 6 h |
23 | 2000-09-26 11:00:00 | 12.373439 | 0.937222 | 12.23 | False | Kalman Filter | TA | 6 | 0 | 6 h |
24 | 2000-09-26 11:30:00 | 13.597164 | 0.951751 | 13.83 | False | Kalman Filter | TA | 6 | 0 | 6 h |
25 | 2000-09-26 12:00:00 | 14.629994 | 0.951881 | 14.81 | False | Kalman Filter | TA | 6 | 0 | 6 h |
26 | 2000-09-26 12:30:00 | 15.043486 | 0.937641 | 14.92 | False | Kalman Filter | TA | 6 | 0 | 6 h |
27 | 2000-09-26 13:00:00 | 15.080051 | 0.909788 | 15.01 | False | Kalman Filter | TA | 6 | 0 | 6 h |
28 | 2000-09-26 13:30:00 | 15.326227 | 0.869610 | 15.40 | False | Kalman Filter | TA | 6 | 0 | 6 h |
29 | 2000-09-26 14:00:00 | 15.859726 | 0.817723 | 15.92 | False | Kalman Filter | TA | 6 | 0 | 6 h |
30 | 2000-09-26 14:30:00 | 16.001325 | 0.750160 | 15.90 | False | Kalman Filter | TA | 6 | 0 | 6 h |
31 | 2000-09-26 15:00:00 | NaN | NaN | 16.40 | True | Kalman Filter | TA | 6 | 0 | 6 h |
32 | 2000-09-26 15:30:00 | NaN | NaN | 16.75 | True | Kalman Filter | TA | 6 | 0 | 6 h |
33 | 2000-09-26 16:00:00 | NaN | NaN | 16.21 | True | Kalman Filter | TA | 6 | 0 | 6 h |
34 | 2000-09-26 16:30:00 | NaN | NaN | 14.87 | True | Kalman Filter | TA | 6 | 0 | 6 h |
35 | 2000-09-26 17:00:00 | NaN | NaN | 14.37 | True | Kalman Filter | TA | 6 | 0 | 6 h |
36 | 2000-09-26 17:30:00 | NaN | NaN | 13.75 | True | Kalman Filter | TA | 6 | 0 | 6 h |
37 | 2000-09-26 18:00:00 | NaN | NaN | 13.44 | True | Kalman Filter | TA | 6 | 0 | 6 h |
38 | 2000-09-26 18:30:00 | NaN | NaN | 13.13 | True | Kalman Filter | TA | 6 | 0 | 6 h |
39 | 2000-09-26 19:00:00 | NaN | NaN | 13.09 | True | Kalman Filter | TA | 6 | 0 | 6 h |
40 | 2000-09-26 19:30:00 | NaN | NaN | 13.06 | True | Kalman Filter | TA | 6 | 0 | 6 h |
41 | 2000-09-26 20:00:00 | NaN | NaN | 13.15 | True | Kalman Filter | TA | 6 | 0 | 6 h |
42 | 2000-09-26 20:30:00 | NaN | NaN | 13.39 | True | Kalman Filter | TA | 6 | 0 | 6 h |
43 | 2000-09-26 21:00:00 | NaN | NaN | 13.47 | True | Kalman Filter | TA | 6 | 0 | 6 h |
44 | 2000-09-26 21:30:00 | NaN | NaN | 13.48 | True | Kalman Filter | TA | 6 | 0 | 6 h |
45 | 2000-09-26 22:00:00 | NaN | NaN | 13.28 | True | Kalman Filter | TA | 6 | 0 | 6 h |
46 | 2000-09-26 22:30:00 | NaN | NaN | 13.48 | True | Kalman Filter | TA | 6 | 0 | 6 h |
47 | 2000-09-26 23:00:00 | NaN | NaN | 13.49 | True | Kalman Filter | TA | 6 | 0 | 6 h |
48 | 2000-09-26 23:30:00 | NaN | NaN | 13.60 | True | Kalman Filter | TA | 6 | 0 | 6 h |
49 | 2000-09-27 00:00:00 | NaN | NaN | 13.77 | True | Kalman Filter | TA | 6 | 0 | 6 h |
50 | 2000-09-27 00:30:00 | NaN | NaN | 14.04 | True | Kalman Filter | TA | 6 | 0 | 6 h |
= pd.concat([unnest_predictions(row) for _,row in res_ts.iterrows()]) res_ts_plot
'var == "TA" and method == "Kalman Filter" and idx_rep == 0 and gap_len==6.'), y= "measurement") plot_points(res_ts_plot.query(
'var == "TA" and method == "Kalman Filter" and idx_rep == 0 and gap_len==6.')) plot_missing_area(res_ts_plot.query(
'var == "TA" and idx_rep == 0 and gap_len==6.'), y= "mean", color='method', scale=alt.Scale()) plot_line(res_ts_plot.query(
'var == "TA" and idx_rep == 0 and gap_len==6.').copy(), y= "mean", color='method', scale=alt.Scale()) plot_error(res_ts_plot.query(
'var == "TA" and idx_rep == 0 and gap_len==6.').copy(), "TA") _plot_timeseries(res_ts_plot.query(
'var == "SW_IN" and idx_rep == 0 and gap_len==12.').copy(), "SW_IN") _plot_timeseries(res_ts_plot.query(
= pd.concat([unnest_predictions(row) for _,row in res_ts.iterrows()]) data_ts
plot_timeseries
plot_timeseries (data, idx_rep:int|None=None, gap_len:int|None=None, max_idx:int=3, ctx_len={6.0: 50, 12.0: 50, 168.0: 384}, scale_color=Scale({ domain: ['Kalman Filter', 'ERA-I', 'MDS'], scheme: 'dark2' }), compare='method')
='random') plot_timeseries(res_ts, idx_rep
Scatter plot
'var == "TA" and gap_len==12. and idx_rep ==0').copy(), x="measurement", y= "mean", color='method', scale=alt.Scale()) _plot_scatter(data_ts.query(
Table
The Table
A table where in the rows there is
= data_results.groupby(['method', 'var', 'gap_len']).agg({'rmse': ['mean', 'std']}).unstack(level=0)
t
= t.columns.droplevel()
t_idx = ['RMSE', None]
t_idx.names
= t.copy()
t2 = t_idx
t2.columns
= t2.sort_index(axis=1, level=1).swaplevel(axis=1)
t2 t2
Kalman Filter | ERA-I | MDS | |||||
---|---|---|---|---|---|---|---|
RMSE | mean | std | mean | std | mean | std | |
var | gap_len | ||||||
TA | 6 | 0.390277 | 0.120791 | 1.432389 | 0.090855 | 2.153796 | 0.638164 |
12 | 0.971174 | 0.714823 | 1.421383 | 0.645727 | 3.069420 | 1.685032 | |
24 | 0.543269 | 0.045233 | 1.618961 | 0.708207 | 2.585667 | 1.306159 | |
168 | 0.818625 | 0.205868 | 1.691291 | 0.321707 | 3.811197 | 0.459708 | |
SW_IN | 6 | 35.270434 | 23.989000 | 35.687127 | 61.811917 | 40.526401 | 67.768649 |
12 | 65.749867 | 58.475011 | 60.069077 | 98.779015 | 64.785358 | 86.461266 | |
24 | 44.446448 | 19.486994 | 55.218478 | 41.980449 | 72.847162 | 75.033947 | |
168 | 61.289860 | 37.403341 | 74.600240 | 44.042412 | 105.098477 | 64.145949 | |
LW_IN | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
VPD | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
WS | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
PA | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
P | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
SWC | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN | |
TS | 6 | NaN | NaN | NaN | NaN | NaN | NaN |
12 | NaN | NaN | NaN | NaN | NaN | NaN | |
24 | NaN | NaN | NaN | NaN | NaN | NaN | |
168 | NaN | NaN | NaN | NaN | NaN | NaN |
= t2.iloc[0] row
row
RMSE
Kalman Filter mean 0.390277
std 0.120791
ERA-I mean 1.432389
std 0.090855
MDS mean 2.153796
std 0.638164
Name: (TA, 6), dtype: float64
0,2,4]]) * 2 np.argmin(row.iloc[[
0
style_the_table
style_the_table (style, cols=[0, 2, 4])
highlight_min_method
highlight_min_method (row, props, cols)
# #| export
# renames_table_latex = {name: f"\parbox{{2.1cm}}{{{val}}}" for name, val in
# {'SW_IN': "Shortwave radiation incoming \\textbf{SW IN} [\si{W/m^2}]",
# 'LW_IN': 'Longwave radiation incoming \\textbf{LW IN} [$W/m^2$]',
# 'TA': "Air Temperature \\textbf{TA} [$°C$]",
# 'VPD': "Vapuour Pressure Deficit \\textbf{VPD} [$hPa$]",
# 'PA': "Air Pressure \\textbf{PA} [$hPa$]",
# 'P': "Precipitation \\textbf{P} [$mm$]",
# 'WS': "Wind Speed \\textbf{WS} [$m/s$]",
# }.items()}
t2.style.pipe(style_the_table)
Kalman Filter | ERA-I | MDS | |||||
---|---|---|---|---|---|---|---|
RMSE | mean | std | mean | std | mean | std | |
var | gap_len | ||||||
TA | 6 | 0.390 | 0.121 | 1.432 | 0.091 | 2.154 | 0.638 |
12 | 0.971 | 0.715 | 1.421 | 0.646 | 3.069 | 1.685 | |
24 | 0.543 | 0.045 | 1.619 | 0.708 | 2.586 | 1.306 | |
168 | 0.819 | 0.206 | 1.691 | 0.322 | 3.811 | 0.460 | |
SW_IN | 6 | 35.270 | 23.989 | 35.687 | 61.812 | 40.526 | 67.769 |
12 | 65.750 | 58.475 | 60.069 | 98.779 | 64.785 | 86.461 | |
24 | 44.446 | 19.487 | 55.218 | 41.980 | 72.847 | 75.034 | |
168 | 61.290 | 37.403 | 74.600 | 44.042 | 105.098 | 64.146 | |
LW_IN | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
VPD | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
WS | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
PA | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
P | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
SWC | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - | |
TS | 6 | - | - | - | - | - | - |
12 | - | - | - | - | - | - | |
24 | - | - | - | - | - | - | |
168 | - | - | - | - | - | - |
= t2.rename(index = renames_table_latex).style.apply(highlight_min_method, props="font-weight: bold", axis=1, cols=[0,2,4]).format_index(precision=0).format(precision=3) t3
t3
Kalman Filter | ERA-I | MDS | |||||
---|---|---|---|---|---|---|---|
RMSE | mean | std | mean | std | mean | std | |
var | gap_len | ||||||
\parbox{2.1cm}{\textbf{TA} [\si{°C}]} | 6 | 0.390 | 0.121 | 1.432 | 0.091 | 2.154 | 0.638 |
12 | 0.971 | 0.715 | 1.421 | 0.646 | 3.069 | 1.685 | |
24 | 0.543 | 0.045 | 1.619 | 0.708 | 2.586 | 1.306 | |
168 | 0.819 | 0.206 | 1.691 | 0.322 | 3.811 | 0.460 | |
\parbox{2.1cm}{\textbf{SW\_IN} [\si{W/m^2}]} | 6 | 35.270 | 23.989 | 35.687 | 61.812 | 40.526 | 67.769 |
12 | 65.750 | 58.475 | 60.069 | 98.779 | 64.785 | 86.461 | |
24 | 44.446 | 19.487 | 55.218 | 41.980 | 72.847 | 75.034 | |
168 | 61.290 | 37.403 | 74.600 | 44.042 | 105.098 | 64.146 | |
\parbox{2.1cm}{\textbf{LW\_IN} [\si{W/m^2}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{VPD} [\si{hPa}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{WS} [\si{m/s}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{PA} [\si{hPa}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{P} [\si{mm}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{SWC} [\si{\%}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan | |
\parbox{2.1cm}{\textbf{TS} [\si{°C}]} | 6 | nan | nan | nan | nan | nan | nan |
12 | nan | nan | nan | nan | nan | nan | |
24 | nan | nan | nan | nan | nan | nan | |
168 | nan | nan | nan | nan | nan | nan |
print(t3.to_latex(convert_css=True, hrules=True, clines="skip-last;data", column_format="p{2.1cm}c|rr|rr|rr", caption="caption", label="table"))
\begin{table}
\caption{caption}
\label{table}
\begin{tabular}{p{2.1cm}c|rr|rr|rr}
\toprule
& & \multicolumn{2}{r}{Kalman Filter} & \multicolumn{2}{r}{ERA-I} & \multicolumn{2}{r}{MDS} \\
& RMSE & mean & std & mean & std & mean & std \\
var & gap_len & & & & & & \\
\midrule
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{TA} [\si{°C}]}} & 6 & \bfseries 0.390 & 0.121 & 1.432 & 0.091 & 2.154 & 0.638 \\
& 12 & \bfseries 0.971 & 0.715 & 1.421 & 0.646 & 3.069 & 1.685 \\
& 24 & \bfseries 0.543 & 0.045 & 1.619 & 0.708 & 2.586 & 1.306 \\
& 168 & \bfseries 0.819 & 0.206 & 1.691 & 0.322 & 3.811 & 0.460 \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{SW\_IN} [\si{W/m^2}]}} & 6 & \bfseries 35.270 & 23.989 & 35.687 & 61.812 & 40.526 & 67.769 \\
& 12 & 65.750 & 58.475 & \bfseries 60.069 & 98.779 & 64.785 & 86.461 \\
& 24 & \bfseries 44.446 & 19.487 & 55.218 & 41.980 & 72.847 & 75.034 \\
& 168 & \bfseries 61.290 & 37.403 & 74.600 & 44.042 & 105.098 & 64.146 \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{LW\_IN} [\si{W/m^2}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{VPD} [\si{hPa}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{WS} [\si{m/s}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{PA} [\si{hPa}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{P} [\si{mm}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{SWC} [\si{\%}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\multirow[c]{4}{*}{\parbox{2.1cm}{\textbf{TS} [\si{°C}]}} & 6 & nan & nan & nan & nan & nan & nan \\
& 12 & nan & nan & nan & nan & nan & nan \\
& 24 & nan & nan & nan & nan & nan & nan \\
& 168 & nan & nan & nan & nan & nan & nan \\
\cline{1-8}
\bottomrule
\end{tabular}
\end{table}
gap_len
the_table
the_table (data, y='rmse', y_name='RMSE')
the_table(data_results)
the_table_latex
the_table_latex (table, file, caption='', label='', stand=False)
"test_table.tex") the_table_latex(the_table(data_results),
Table compare
table_compare
table_compare (data, compare:str, y='rmse_stand', compare_ascending=True)
'type') table_compare(k_results,
table_compare_latex
table_compare_latex (table, file, caption='', label='')
Table Compare 3
same of above but for comparing 3 options instead of 2
table_compare3
table_compare3 (data, compare:str, y='rmse_stand', compare_ascending=True)
table_compare3_latex
table_compare3_latex (table, file, caption='', label='')
Gap len table
= gap_len.groupby(['var', 'gap_len']).agg({'rmse': ['mean']}) g
g
=0, axis=1)
(g.droplevel(level
.reset_index()=['var', 'gap_len'], var_name='rmse')
.melt(id_vars= ['var', 'rmse'], columns=['gap_len'])
.pivot(index =0, axis=1)
.droplevel(level )
table_gap_len
table_gap_len (data, y='rmse_stand')
table_gap_len(gap_len)
table_gap_len_latex
table_gap_len_latex (table, file, caption='', label='')
f"{col:.0f}" for col in list(table_gap_len(gap_len).columns)] [
from tempfile import tempdir
from pathlib import Path
/ "test_table.tex") table_gap_len_latex(table_gap_len(gap_len), Path(tempdir)