From da2c728dc571776a8047f7949605ab16668ba01a Mon Sep 17 00:00:00 2001 From: Ivan Kondov <ivan.kondov@kit.edu> Date: Mon, 24 Mar 2025 16:23:57 +0100 Subject: [PATCH 1/4] fix storage size estimate changing from pympler to bson.json_utils --- requirements.txt | 2 +- setup.cfg | 2 +- .../language/utilities/serializable.py | 30 ++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6b39ddd3..d3839faf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ scipy pandas >=2.2.0 pint >=0.24.3 pint-pandas ==0.6.2 +pymongo >= 4.7.3 fireworks >=2.0.4 pyyaml dill @@ -12,4 +13,3 @@ seaborn vre-middleware >=1.2.4 jupyter_client ipykernel -pympler diff --git a/setup.cfg b/setup.cfg index 95587fd2..c577cd85 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,7 @@ install_requires = pandas >=2.2.0 pint >=0.24.3 pint-pandas ==0.7.1 + pymongo >= 4.7.3 fireworks >=2.0.4 pyyaml dill @@ -57,7 +58,6 @@ install_requires = vre-middleware >=1.2.4 jupyter_client ipykernel - pympler [options.extras_require] test = diff --git a/src/virtmat/language/utilities/serializable.py b/src/virtmat/language/utilities/serializable.py index 0454578e..a67cbe14 100644 --- a/src/virtmat/language/utilities/serializable.py +++ b/src/virtmat/language/utilities/serializable.py @@ -4,7 +4,7 @@ import typing import numpy import pandas import pint_pandas -from pympler import asizeof +from bson import json_util from fireworks.utilities.fw_serializers import FWSerializable from fireworks.utilities.fw_serializers import serialize_fw from fireworks.utilities.fw_serializers import recursive_serialize @@ -57,21 +57,31 @@ class FWDataObject(FWSerializable): @recursive_serialize @versioned_serialize def to_dict(self): + f_name = f'{__name__}.{self.__class__.__name__}.to_dict()' + logger = get_fw_logger(f_name) + logger.debug('%s: starting', f_name) if self.datastore is None: - logger = get_fw_logger(__name__) - mem_size = asizeof.asizeof(self.value) - logger.debug('%s: size in memory: %s', __name__, mem_size) - if mem_size < ioops.DATASTORE_CONFIG['inline-threshold']: + dct = recursive_dict(self.value) + b_thres = ioops.DATASTORE_CONFIG['inline-threshold'] + b_size = json_util.get_size(dct, b_thres) + logger.debug('%s: data type: %s', f_name, type(self.value)) + logger.debug('%s: data size [B]: %s', f_name, b_size) + logger.debug('%s: inline-threshold [B]: %s', f_name, b_thres) + if b_size < b_thres: self.datastore = {'type': None} - return {'value': self.value, 'datastore': self.datastore} - logger.info('%s: inline data limit exceeded: %s', __name__, mem_size) - self.datastore, self.filename = ioops.offload_data(recursive_dict(self.value)) + logger.info('%s: data not offloaded', f_name) + return {'value': dct, 'datastore': self.datastore} + logger.info('%s: inline data limit exceeded: %s', f_name, b_size) + self.datastore, self.filename = ioops.offload_data(dct) if self.datastore['type'] is None: - logger.info('%s: data not offloaded', __name__) + logger.info('%s: data not offloaded', f_name) else: - logger.info('%s: data offloaded in %s', __name__, self.filename) + logger.info('%s: data offloaded in %s', f_name, self.filename) if self.datastore['type'] is None: + logger.debug('%s: datastore: %s', f_name, self.datastore) return {'value': self.value, 'datastore': self.datastore} + logger.debug('%s: datastore: %s', f_name, self.datastore) + logger.debug('%s: data in file: %s', f_name, self.filename) return {'datastore': self.datastore, 'filename': self.filename} @classmethod -- GitLab From 7f976edc5fa1b98ed9f2248184285a04d9e6ae5b Mon Sep 17 00:00:00 2001 From: Ivan Kondov <ivan.kondov@kit.edu> Date: Mon, 24 Mar 2025 17:45:24 +0100 Subject: [PATCH 2/4] improve prettytable formatting --- .../language/interpreter/session_manager.py | 4 +++- src/virtmat/language/utilities/fireworks.py | 15 ++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/virtmat/language/interpreter/session_manager.py b/src/virtmat/language/interpreter/session_manager.py index a0bae94f..0f097643 100644 --- a/src/virtmat/language/interpreter/session_manager.py +++ b/src/virtmat/language/interpreter/session_manager.py @@ -110,7 +110,9 @@ def get_prettytable(dataframe): table = class_(list(dataframe.columns)) for tpl in dataframe.itertuples(index=False, name=None): table.add_row(tpl) - return str(table) + table.align = 'l' + table.max_width = 120 + return table class SessionManager(InteractiveConsole): diff --git a/src/virtmat/language/utilities/fireworks.py b/src/virtmat/language/utilities/fireworks.py index 65bf4232..1a0e671c 100644 --- a/src/virtmat/language/utilities/fireworks.py +++ b/src/virtmat/language/utilities/fireworks.py @@ -348,14 +348,15 @@ def get_model_nodes(lpad, uuid): def get_model_history(lpad, uuid): """return node history with some node attributes as pandas dataframe""" - dct = {'state': [], 'updated_on': [], 'source': []} + dct = {'State': [], 'Updated on': [], 'Statement': []} for fwk in get_model_nodes(lpad, uuid): if fwk['spec']['_source_code']: - dct['state'].append(fwk['state']) - dct['updated_on'].append(get_iso_datetime(fwk['updated_on'])) - dct['source'].append('; '.join(fwk['spec']['_source_code'])) - df = pandas.DataFrame(dct).sort_values('updated_on').sort_values('state') - return df[['state', 'updated_on', 'source']] + dct['State'].append(fwk['state']) + timestamp = get_iso_datetime(fwk['updated_on'], add_tzinfo=False, sep=' ') + dct['Updated on'].append(timestamp) + dct['Statement'].append('; '.join(fwk['spec']['_source_code'])) + df = pandas.DataFrame(dct).sort_values('Updated on') + return df[['Updated on', 'State', 'Statement']] # pylint: disable=E1136 def get_model_tag(lpad, uuid): @@ -398,7 +399,7 @@ def get_models_overview(lpad, uuids): wf_states = [] for wf in wfs: hist = get_model_history(lpad, wf['metadata']['uuid']) - wf_states.append(dict(Counter(hist['state'].tolist()))) + wf_states.append(dict(Counter(hist['State'].tolist()))) df_2 = pandas.DataFrame(wf_states).fillna(0).astype('int64') df_2.rename(lambda x: x[0:3], axis='columns', inplace=True) df_3 = get_models_tags(lpad, uuids) -- GitLab From fef1c1003571666bda2153c19920f55a7726ff8b Mon Sep 17 00:00:00 2001 From: Ivan Kondov <ivan.kondov@kit.edu> Date: Mon, 24 Mar 2025 17:46:40 +0100 Subject: [PATCH 3/4] fix printing the stack trace with python 3.9 --- src/virtmat/language/utilities/textx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtmat/language/utilities/textx.py b/src/virtmat/language/utilities/textx.py index e7d2dfe8..d692e80e 100644 --- a/src/virtmat/language/utilities/textx.py +++ b/src/virtmat/language/utilities/textx.py @@ -142,7 +142,7 @@ def display_exception(func): return func(*args, **kwargs) except Exception as err: print('\n', file=sys.stderr) - traceback.print_exception(err, file=sys.stderr) + traceback.print_exception(*sys.exc_info(), file=sys.stderr) print('\n', file=sys.stderr) raise err return decorator -- GitLab From 7f744bde0fd77f0f0f4db971af9d6ae4445ff44e Mon Sep 17 00:00:00 2001 From: Ivan Kondov <ivan.kondov@kit.edu> Date: Tue, 25 Mar 2025 10:44:18 +0100 Subject: [PATCH 4/4] switch to json encoder that provides a better storage size estimate --- requirements.txt | 1 - setup.cfg | 1 - .../language/utilities/serializable.py | 19 ++++++++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index d3839faf..8f3d12d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ scipy pandas >=2.2.0 pint >=0.24.3 pint-pandas ==0.6.2 -pymongo >= 4.7.3 fireworks >=2.0.4 pyyaml dill diff --git a/setup.cfg b/setup.cfg index c577cd85..ebf7b726 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,6 @@ install_requires = pandas >=2.2.0 pint >=0.24.3 pint-pandas ==0.7.1 - pymongo >= 4.7.3 fireworks >=2.0.4 pyyaml dill diff --git a/src/virtmat/language/utilities/serializable.py b/src/virtmat/language/utilities/serializable.py index a67cbe14..291ccda7 100644 --- a/src/virtmat/language/utilities/serializable.py +++ b/src/virtmat/language/utilities/serializable.py @@ -1,10 +1,11 @@ """serialization/deserialization code""" -from dataclasses import dataclass import typing +from dataclasses import dataclass +from json import JSONEncoder +from itertools import islice import numpy import pandas import pint_pandas -from bson import json_util from fireworks.utilities.fw_serializers import FWSerializable from fireworks.utilities.fw_serializers import serialize_fw from fireworks.utilities.fw_serializers import recursive_serialize @@ -45,6 +46,18 @@ def versioned_serialize(func): return decorator +def get_json_size(obj, max_size): + """compute JSON size in bytes of a JSON serializable object up to max_size""" + gen = JSONEncoder().iterencode(obj) + chunk_size = 1024 + json_size = 0 + next_chunk = len(''.join(islice(gen, chunk_size)).encode()) + while next_chunk and json_size < max_size: + json_size += next_chunk + next_chunk = len(''.join(islice(gen, chunk_size)).encode()) + return json_size + + @dataclass class FWDataObject(FWSerializable): """top-level FWSerializable dataclass to hold any FWSerializable objects""" @@ -63,7 +76,7 @@ class FWDataObject(FWSerializable): if self.datastore is None: dct = recursive_dict(self.value) b_thres = ioops.DATASTORE_CONFIG['inline-threshold'] - b_size = json_util.get_size(dct, b_thres) + b_size = get_json_size(dct, b_thres) logger.debug('%s: data type: %s', f_name, type(self.value)) logger.debug('%s: data size [B]: %s', f_name, b_size) logger.debug('%s: inline-threshold [B]: %s', f_name, b_thres) -- GitLab