From da2c728dc571776a8047f7949605ab16668ba01a Mon Sep 17 00:00:00 2001
From: Ivan Kondov <ivan.kondov@kit.edu>
Date: Mon, 24 Mar 2025 16:23:57 +0100
Subject: [PATCH 1/4] fix storage size estimate changing from pympler to
 bson.json_utils

---
 requirements.txt                              |  2 +-
 setup.cfg                                     |  2 +-
 .../language/utilities/serializable.py        | 30 ++++++++++++-------
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6b39ddd3..d3839faf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ scipy
 pandas >=2.2.0
 pint >=0.24.3
 pint-pandas ==0.6.2
+pymongo >= 4.7.3
 fireworks >=2.0.4
 pyyaml
 dill
@@ -12,4 +13,3 @@ seaborn
 vre-middleware >=1.2.4
 jupyter_client
 ipykernel
-pympler
diff --git a/setup.cfg b/setup.cfg
index 95587fd2..c577cd85 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,6 +49,7 @@ install_requires =
     pandas >=2.2.0
     pint >=0.24.3
     pint-pandas ==0.7.1
+    pymongo >= 4.7.3
     fireworks >=2.0.4
     pyyaml
     dill
@@ -57,7 +58,6 @@ install_requires =
     vre-middleware >=1.2.4
     jupyter_client
     ipykernel
-    pympler
 
 [options.extras_require]
 test =
diff --git a/src/virtmat/language/utilities/serializable.py b/src/virtmat/language/utilities/serializable.py
index 0454578e..a67cbe14 100644
--- a/src/virtmat/language/utilities/serializable.py
+++ b/src/virtmat/language/utilities/serializable.py
@@ -4,7 +4,7 @@ import typing
 import numpy
 import pandas
 import pint_pandas
-from pympler import asizeof
+from bson import json_util
 from fireworks.utilities.fw_serializers import FWSerializable
 from fireworks.utilities.fw_serializers import serialize_fw
 from fireworks.utilities.fw_serializers import recursive_serialize
@@ -57,21 +57,31 @@ class FWDataObject(FWSerializable):
     @recursive_serialize
     @versioned_serialize
     def to_dict(self):
+        f_name = f'{__name__}.{self.__class__.__name__}.to_dict()'
+        logger = get_fw_logger(f_name)
+        logger.debug('%s: starting', f_name)
         if self.datastore is None:
-            logger = get_fw_logger(__name__)
-            mem_size = asizeof.asizeof(self.value)
-            logger.debug('%s: size in memory: %s', __name__, mem_size)
-            if mem_size < ioops.DATASTORE_CONFIG['inline-threshold']:
+            dct = recursive_dict(self.value)
+            b_thres = ioops.DATASTORE_CONFIG['inline-threshold']
+            b_size = json_util.get_size(dct, b_thres)
+            logger.debug('%s: data type: %s', f_name, type(self.value))
+            logger.debug('%s: data size [B]: %s', f_name, b_size)
+            logger.debug('%s: inline-threshold [B]: %s', f_name, b_thres)
+            if b_size < b_thres:
                 self.datastore = {'type': None}
-                return {'value': self.value, 'datastore': self.datastore}
-            logger.info('%s: inline data limit exceeded: %s', __name__, mem_size)
-            self.datastore, self.filename = ioops.offload_data(recursive_dict(self.value))
+                logger.info('%s: data not offloaded', f_name)
+                return {'value': dct, 'datastore': self.datastore}
+            logger.info('%s: inline data limit exceeded: %s', f_name, b_size)
+            self.datastore, self.filename = ioops.offload_data(dct)
             if self.datastore['type'] is None:
-                logger.info('%s: data not offloaded', __name__)
+                logger.info('%s: data not offloaded', f_name)
             else:
-                logger.info('%s: data offloaded in %s', __name__, self.filename)
+                logger.info('%s: data offloaded in %s', f_name, self.filename)
         if self.datastore['type'] is None:
+            logger.debug('%s: datastore: %s', f_name, self.datastore)
             return {'value': self.value, 'datastore': self.datastore}
+        logger.debug('%s: datastore: %s', f_name, self.datastore)
+        logger.debug('%s: data in file: %s', f_name, self.filename)
         return {'datastore': self.datastore, 'filename': self.filename}
 
     @classmethod
-- 
GitLab


From 7f976edc5fa1b98ed9f2248184285a04d9e6ae5b Mon Sep 17 00:00:00 2001
From: Ivan Kondov <ivan.kondov@kit.edu>
Date: Mon, 24 Mar 2025 17:45:24 +0100
Subject: [PATCH 2/4] improve prettytable formatting

---
 .../language/interpreter/session_manager.py       |  4 +++-
 src/virtmat/language/utilities/fireworks.py       | 15 ++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/virtmat/language/interpreter/session_manager.py b/src/virtmat/language/interpreter/session_manager.py
index a0bae94f..0f097643 100644
--- a/src/virtmat/language/interpreter/session_manager.py
+++ b/src/virtmat/language/interpreter/session_manager.py
@@ -110,7 +110,9 @@ def get_prettytable(dataframe):
     table = class_(list(dataframe.columns))
     for tpl in dataframe.itertuples(index=False, name=None):
         table.add_row(tpl)
-    return str(table)
+    table.align = 'l'
+    table.max_width = 120
+    return table
 
 
 class SessionManager(InteractiveConsole):
diff --git a/src/virtmat/language/utilities/fireworks.py b/src/virtmat/language/utilities/fireworks.py
index 65bf4232..1a0e671c 100644
--- a/src/virtmat/language/utilities/fireworks.py
+++ b/src/virtmat/language/utilities/fireworks.py
@@ -348,14 +348,15 @@ def get_model_nodes(lpad, uuid):
 
 def get_model_history(lpad, uuid):
     """return node history with some node attributes as pandas dataframe"""
-    dct = {'state': [], 'updated_on': [], 'source': []}
+    dct = {'State': [], 'Updated on': [], 'Statement': []}
     for fwk in get_model_nodes(lpad, uuid):
         if fwk['spec']['_source_code']:
-            dct['state'].append(fwk['state'])
-            dct['updated_on'].append(get_iso_datetime(fwk['updated_on']))
-            dct['source'].append('; '.join(fwk['spec']['_source_code']))
-    df = pandas.DataFrame(dct).sort_values('updated_on').sort_values('state')
-    return df[['state', 'updated_on', 'source']]
+            dct['State'].append(fwk['state'])
+            timestamp = get_iso_datetime(fwk['updated_on'], add_tzinfo=False, sep=' ')
+            dct['Updated on'].append(timestamp)
+            dct['Statement'].append('; '.join(fwk['spec']['_source_code']))
+    df = pandas.DataFrame(dct).sort_values('Updated on')
+    return df[['Updated on', 'State', 'Statement']]  # pylint: disable=E1136
 
 
 def get_model_tag(lpad, uuid):
@@ -398,7 +399,7 @@ def get_models_overview(lpad, uuids):
     wf_states = []
     for wf in wfs:
         hist = get_model_history(lpad, wf['metadata']['uuid'])
-        wf_states.append(dict(Counter(hist['state'].tolist())))
+        wf_states.append(dict(Counter(hist['State'].tolist())))
     df_2 = pandas.DataFrame(wf_states).fillna(0).astype('int64')
     df_2.rename(lambda x: x[0:3], axis='columns', inplace=True)
     df_3 = get_models_tags(lpad, uuids)
-- 
GitLab


From fef1c1003571666bda2153c19920f55a7726ff8b Mon Sep 17 00:00:00 2001
From: Ivan Kondov <ivan.kondov@kit.edu>
Date: Mon, 24 Mar 2025 17:46:40 +0100
Subject: [PATCH 3/4] fix printing the stack trace with python 3.9

---
 src/virtmat/language/utilities/textx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/virtmat/language/utilities/textx.py b/src/virtmat/language/utilities/textx.py
index e7d2dfe8..d692e80e 100644
--- a/src/virtmat/language/utilities/textx.py
+++ b/src/virtmat/language/utilities/textx.py
@@ -142,7 +142,7 @@ def display_exception(func):
             return func(*args, **kwargs)
         except Exception as err:
             print('\n', file=sys.stderr)
-            traceback.print_exception(err, file=sys.stderr)
+            traceback.print_exception(*sys.exc_info(), file=sys.stderr)
             print('\n', file=sys.stderr)
             raise err
     return decorator
-- 
GitLab


From 7f744bde0fd77f0f0f4db971af9d6ae4445ff44e Mon Sep 17 00:00:00 2001
From: Ivan Kondov <ivan.kondov@kit.edu>
Date: Tue, 25 Mar 2025 10:44:18 +0100
Subject: [PATCH 4/4] switch to json encoder that provides a better storage
 size estimate

---
 requirements.txt                              |  1 -
 setup.cfg                                     |  1 -
 .../language/utilities/serializable.py        | 19 ++++++++++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index d3839faf..8f3d12d3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,6 @@ scipy
 pandas >=2.2.0
 pint >=0.24.3
 pint-pandas ==0.6.2
-pymongo >= 4.7.3
 fireworks >=2.0.4
 pyyaml
 dill
diff --git a/setup.cfg b/setup.cfg
index c577cd85..ebf7b726 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,7 +49,6 @@ install_requires =
     pandas >=2.2.0
     pint >=0.24.3
     pint-pandas ==0.7.1
-    pymongo >= 4.7.3
     fireworks >=2.0.4
     pyyaml
     dill
diff --git a/src/virtmat/language/utilities/serializable.py b/src/virtmat/language/utilities/serializable.py
index a67cbe14..291ccda7 100644
--- a/src/virtmat/language/utilities/serializable.py
+++ b/src/virtmat/language/utilities/serializable.py
@@ -1,10 +1,11 @@
 """serialization/deserialization code"""
-from dataclasses import dataclass
 import typing
+from dataclasses import dataclass
+from json import JSONEncoder
+from itertools import islice
 import numpy
 import pandas
 import pint_pandas
-from bson import json_util
 from fireworks.utilities.fw_serializers import FWSerializable
 from fireworks.utilities.fw_serializers import serialize_fw
 from fireworks.utilities.fw_serializers import recursive_serialize
@@ -45,6 +46,18 @@ def versioned_serialize(func):
     return decorator
 
 
+def get_json_size(obj, max_size):
+    """compute JSON size in bytes of a JSON serializable object up to max_size"""
+    gen = JSONEncoder().iterencode(obj)
+    chunk_size = 1024
+    json_size = 0
+    next_chunk = len(''.join(islice(gen, chunk_size)).encode())
+    while next_chunk and json_size < max_size:
+        json_size += next_chunk
+        next_chunk = len(''.join(islice(gen, chunk_size)).encode())
+    return json_size
+
+
 @dataclass
 class FWDataObject(FWSerializable):
     """top-level FWSerializable dataclass to hold any FWSerializable objects"""
@@ -63,7 +76,7 @@ class FWDataObject(FWSerializable):
         if self.datastore is None:
             dct = recursive_dict(self.value)
             b_thres = ioops.DATASTORE_CONFIG['inline-threshold']
-            b_size = json_util.get_size(dct, b_thres)
+            b_size = get_json_size(dct, b_thres)
             logger.debug('%s: data type: %s', f_name, type(self.value))
             logger.debug('%s: data size [B]: %s', f_name, b_size)
             logger.debug('%s: inline-threshold [B]: %s', f_name, b_thres)
-- 
GitLab