construct target metadata using pydantic model.

eb7e3885 · Mohamed Anis Koubaa · 0fa0e08c · eb7e3885 · 0fa0e08c · eb7e3885
Commit eb7e3885 authored 2 months ago by Mohamed Anis Koubaa
--- a/services/backend_regimo/components/oep_access/excel_sheet_adapter.py
+++ b/services/backend_regimo/components/oep_access/excel_sheet_adapter.py
@@ -40,7 +40,7 @@ class ExcelSheetAdapter:
        except FileNotFoundError as error:
            raise SourceNotFound(error)
-        sheet_names = self.excel_file.sheet_names
+        self.sheet_names = sheet_names = self.excel_file.sheet_names
        if data_sheet_name is not None:
            if data_sheet_name not in sheet_names:
                raise SourceNotFound()
@@ -62,7 +62,7 @@ class ExcelSheetAdapter:
        print(f"This is df-serialization: \n{self.df}")
        print(f"This is dfm-serialization: \n{self.dfm}")
-    def get_header(self) -> object:
+    def get_header(self):
        """
        Read column names from excel file.
        Compose the header in a compatible way to oep.
@@ -78,7 +78,7 @@ class ExcelSheetAdapter:
            table_schema_output["columns"].append(json.loads(json_col))
        return dict(table_schema_output)
-    def get_data(self) -> object:
+    def get_data(self):
        """
        Export data from dataframe to a json object.
        :return:
@@ -86,10 +86,25 @@ class ExcelSheetAdapter:
        json_data = self.df.to_json(orient='records', indent=4, date_format='iso')
        return json.loads(json_data)
-    def get_metadata(self) -> object:
+    def get_metadata_header(self):
        """
-        Export metadata from dataframe to a json object.
+        Export metadata header from dataframe to a json object.
        :return:
        """
-        json_metadata = self.dfm.to_json(orient='table')
+        json_metadata = self.dfm.to_json(orient='records', indent=4, date_format='iso')
+        return json.loads(json_metadata)
+    def get_metadata_section(self, section_name: str):
+        """
+        Export metadata from dataframe to a json.
+        :return:
+        """
+        if section_name in self.sheet_names:
+            mdf_section = pd.read_excel(self.link, engine='openpyxl', sheet_name=section_name, index_col=0)
+        else:
+            raise SourceNotFound("there is no sheet with the name: " + section_name)
+        # result_dict = pd.Series(mdf_section.first_or_single.values, index=mdf_section.Property).to_dict()
+        json_metadata = mdf_section.to_json(indent=4, date_format='iso')
        return json.loads(json_metadata)
--- a/services/backend_regimo/components/oep_access/oemeta.py
+++ b/services/backend_regimo/components/oep_access/oemeta.py
-from pydantic import BaseModel
-class OEMeta(BaseModel):
-    pass
--- a/services/backend_regimo/components/oep_access/oemeta/latest/build_source/schemas/fields.py
+++ b/services/backend_regimo/components/oep_access/oemeta/latest/build_source/schemas/fields.py
-# generated by datamodel-codegen:
+# generated by data-model-codegen:
 #   filename:  fields.json
 #   timestamp: 2025-01-27T15:02:42+00:00
@@ -16,7 +16,7 @@ class IsAboutItem(BaseModel):
        examples=['wind energy converting unit'],
        title='Is About Name',
    )
-    field_id: Optional[AnyUrl] = Field(
+    field_id: Optional[AnyUrl | None | str] = Field(
        None,
        alias='@id',
        description='The path of the ontology term (IRI).',
@@ -50,7 +50,8 @@ class ValueReferenceItem(BaseModel):
 class FieldModel(BaseModel):
    name: Optional[str] = Field(
        ...,
-        description='The name of the field. The name may only consist of lowercase alphanumeric characters or underscores. It must not begin with a number or an underscore.',
+        description='The name of the field. The name may only consist of lowercase alphanumeric characters or '
+                    'underscores. It must not begin with a number or an underscore.',
        examples=['year'],
        title='Column Name',
    )

--- a/services/backend_regimo/components/oep_access/oemeta/latest/build_source/schemas/provenance.py
+++ b/services/backend_regimo/components/oep_access/oemeta/latest/build_source/schemas/provenance.py
@@ -34,7 +34,7 @@ class Contributor(BaseModel):
        description='An array describing the roles of the contributor.',
        title='Roles',
    )
-    date: Optional[date] = Field(
+    date_: Optional[date] = Field(
        None,
        description='The date of the final contribution. Date Format is ISO 8601.',
        examples=['2024-10-21'],

--- a/services/backend_regimo/components/oep_access/oemeta/latest/example.json
+++ b/services/backend_regimo/components/oep_access/oemeta/latest/example.json
 {
    "name": "oep_oemetadata",
    "title": "OEP OEMetadata",
-    "description": "A collection of tables for the OEMetadata examples.",
+    "description": "A dataset for the OEMetadata examples.",
    "id": "https://databus.openenergyplatform.org/oeplatform/reference",
    "resources": [
        {
@@ -22,7 +22,7 @@
            "subject": [
                {
                    "name": "energy",
-                    "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000150"
+                    "@id": "https://openenergyplatform.org/ontology/oeo/OEO_00000150"
                }
            ],
            "keywords": [
@@ -85,14 +85,14 @@
                {
                    "title": "IPCC Sixth Assessment Report (AR6) - Climate Change 2023 - Synthesis Report",
                    "authors": [
-                        "Hoesung Lee",
+                        "Lang Lee",
                        "José Romero",
                        "The Core Writing Team"
                    ],
                    "description": "A Report of the Intergovernmental Panel on Climate Change.",
                    "publicationYear": "2023",
-                    "path": "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf",
+                    "@id": "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf",
-                    "licenses": [
+                    "sourceLicenses": [
                        {
                            "name": "ODbL-1.0",
                            "title": "Open Data Commons Open Database License 1.0",
@@ -124,7 +124,7 @@
                    ],
                    "date": "2024-10-21",
                    "object": "data and metadata",
-                    "comment": "Add general context."
+                    "comment": "Add metadata example."
                }
            ],
            "type": "table",
@@ -141,14 +141,14 @@
                        "isAbout": [
                            {
                                "name": "wind energy converting unit",
-                                "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000044"
+                                "@id": "https://openenergyplatform.org/ontology/oeo/OEO_00000044"
                            }
                        ],
                        "valueReference": [
                            {
                                "value": "onshore",
                                "name": "onshore wind farm",
-                                "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000311"
+                                "@id": "https://openenergyplatform.org/ontology/oeo/OEO_00000311"
                            }
                        ]
                    }

--- a/services/backend_regimo/components/oep_access/oemeta/target_meta.py
+++ b/services/backend_regimo/components/oep_access/oemeta/target_meta.py
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.linked_data import Model as LinkedDataModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.general import Model as GeneralModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.context import Model as ContextModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.spatial import Model as SpatialModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.temporal import Model as TemporalModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.sources import Model as SourceModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.licenses import Model as LicenseModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.provenance import Model as ProvenanceModel
+from backend_regimo.components.oep_access.oemeta.latest.build_source.schemas.fields import (Model as FieldsModel,
+                                                                                            FieldModel,
+                                                                                            IsAboutItem,
+                                                                                            ValueReferenceItem,
+                                                                                            Schema)
+import json
+from pydantic.networks import AnyUrl
+from pydantic import ValidationError
+class OEMeta:
+    def __init__(self, model_path, schema_path):
+        super().__init__()
+        self.model_path = model_path
+        self.schema_path = schema_path
+        self.metadata = {}
+        self.linkedData = LinkedDataModel()
+        self.general = GeneralModel(name='general_name')
+        self.context = ContextModel()
+        self.spatial = SpatialModel()
+        self.temporal = TemporalModel()
+        self.sources = SourceModel()
+        self.licenses = LicenseModel()
+        self.provenance = ProvenanceModel()
+        self.resource = FieldsModel()
+        self.resource.schema_ = Schema(primaryKey=[])
+        self.resource.schema_.fields = []
+    def merge_field_from_meta_source(self, meta_source):
+        """
+        append a new field to the target meta.
+        :param meta_source:
+        """
+        is_about_url = meta_source["isAbout.path"]
+        value_reference_url = meta_source["valueReference.path"]
+        # The first iteration is marked by the length of the fields array
+        if len(self.resource.schema_.fields) == 0:
+            self.resource.type = 'table'
+            self.resource.format = 'xlsx'
+            self.resource.encoding = 'utf-8'
+        current_is_about = IsAboutItem(name=meta_source["isAbout.name"])
+        current_is_about.field_id = is_about_url  # AnyUrl(url=is_about_url)
+        current_value_reference = ValueReferenceItem(name=meta_source["valueReference.name"])
+        current_value_reference.value = meta_source["valueReference.value"]
+        current_value_reference.field_id = value_reference_url  # AnyUrl(url=value_reference_url)
+        current_field = FieldModel(name=meta_source["name"], type=meta_source["type"], nullable=True)
+        current_field.isAbout = [current_is_about]
+        current_field.valueReference = current_value_reference
+        current_field.unit = meta_source["unit"]
+        self.resource.schema_.fields.append(current_field)
+    def merge_general_from_meta_source(self, general_metadata):
+        name = general_metadata["first_or_single"]["name"]
+        description = general_metadata["first_or_single"]["description"]
+        publication_date = general_metadata["first_or_single"]["publicationDate"]
+        self.general.name = name
+        self.general.description = description
+        self.general.publicationDate = publication_date
+    def merge_licenses_from_meta_source(self, licenses_metadata):
+        pass
+    def merge_provenance_from_meta_source(self, provenance_metadata):
+        pass
+    def merge_spatial_from_meta_source(self, spatial_metadata):
+        pass
+    def merge_temporal_from_meta_source(self, temporal_metadata):
+        pass
+    def get_metadata(self, metadata_target_path="table_as_current_meta.json"):
+        # self.metadata["linkedData"] = self.linkedData.model_dump()
+        self.metadata["general"] = self.general.model_dump()
+        self.metadata["context"] = self.context.model_dump()
+        self.metadata["spatial"] = self.spatial.model_dump()
+        self.metadata["temporal"] = self.temporal.model_dump()
+        self.metadata["sources"] = self.sources.model_dump()
+        self.metadata["licenses"] = self.licenses.model_dump()
+        self.metadata["provenance"] = self.provenance.model_dump()
+        self.metadata["resource"] = self.resource.model_dump()
+        with open(metadata_target_path, "w") as f:
+            f.write(json.dumps(self.metadata, indent=4))
--- a/services/backend_regimo/components/oep_access/settings.py
+++ b/services/backend_regimo/components/oep_access/settings.py
@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: MIT
 from pathlib import Path
+from random import randint
 LOG_FORMAT = "[%(asctime)s %(module)16s %(levelname)7s] %(message)s"
@@ -22,3 +24,6 @@ EXAMPLE_PATH = VERSION_PATH / "example.json"
 PATH_TO_SOURCE = ('/Users/ot2661/Documents/01_dev/aed_pub/regimo/regimo/services/backend_regimo/data' +
                  '/LLEC_Data/dataset_sample_2rows.xlsx')
 URL_TO_SOURCE = "https://github.com/koubaa-hmc/LLEC_Data/raw/refs/heads/main/dataset_sample_2rows.xlsx"
+TOPIC = "sandbox"
+TARGET_TABLE_NAME = f"living_lab_table_{randint(0, 100000)}"
--- a/services/backend_regimo/components/oep_access/table_as_current_meta.json
+++ b/services/backend_regimo/components/oep_access/table_as_current_meta.json
--- a/services/backend_regimo/components/oep_access/table_as_current_meta_wizard.json
+++ b/services/backend_regimo/components/oep_access/table_as_current_meta_wizard.json
--- a/services/backend_regimo/components/oep_access/table_as_on_oep.json
+++ b/services/backend_regimo/components/oep_access/table_as_on_oep.json
--- a/services/backend_regimo/components/oep_access/upload_LLEC.py
+++ b/services/backend_regimo/components/oep_access/upload_LLEC.py
@@ -5,66 +5,131 @@ import logging
 from pathlib import Path
 from getpass import getpass
 from os import environ
-from random import randint
 from oep_client import OepClient
 from oemeta.target_meta import OEMeta
 from excel_sheet_adapter import ExcelSheetAdapter
-from settings import (RESOLVED_SCHEMA_FILE_NAME, TEMPLATE_PATH, LOG_FORMAT,
+from settings import (MODEL_PATH, LOG_FORMAT, PATH_TO_SOURCE,
-                      PATH_TO_SOURCE, URL_TO_SOURCE)
+                      URL_TO_SOURCE, TOPIC, TARGET_TABLE_NAME,
+                      RESOLVED_SCHEMA_FILE_NAME)
-# set default value for the oep token
-environ.setdefault('OEP_API_TOKEN',
-                   "f00fa56fa4554da3714832a9452525248f9c4988")
+def main():
+    """
-topic = "sandbox"
+    An excel file containing both data and metadata is uploaded to oep.
-table = f"living_lab_table_{randint(0, 100000)}"
+    Metadata are attached to the data's table.
+    A review process will be triggered, as a result the data are published to
-# configure logger. Please select your logging level
+    the databus.open-energy-platform.org.
-# INFO or DEBUG. Logs are to find under log/
+    """
-logging.basicConfig(level=logging.DEBUG, filename='log/'+table+'.log',
-                    format=LOG_FORMAT, filemode='w')
+    # set default value for the oep token
-logger = logging.getLogger(__name__)
+    environ.setdefault('OEP_API_TOKEN',
-logger.info('Starting LLEC upload')
+                       "f00fa56fa4554da3714832a9452525248f9c4988")
-path_to_source = PATH_TO_SOURCE
+    # set values from settings
-url_to_source = URL_TO_SOURCE
+    path_to_source = PATH_TO_SOURCE
-token = environ.get("OEP_API_TOKEN") or getpass("Enter your OEP API token: ")
+    url_to_source = URL_TO_SOURCE
+    model_path = MODEL_PATH
-# for read/write, we need to add authorization header
+    schema_path = RESOLVED_SCHEMA_FILE_NAME
-auth_headers = {"Authorization": "Token %s" % token}
+    table_name = TARGET_TABLE_NAME
-table_api_url = f"https://openenergyplatform.org/api/v0/schema/{topic}/tables/{table}/"
+    topic = TOPIC
-logger.info("Table API URL: %s" % table_api_url)
+    token = environ.get("OEP_API_TOKEN") or getpass("Enter your OEP API token: ")
-# instantiate table source
+    # INFO or DEBUG. Logs are to find under log/
-table_source = ExcelSheetAdapter(table_path=Path(path_to_source),
+    logging.basicConfig(level=logging.DEBUG, filename='log/' + table_name + '.log',
-                                 data_sheet_name='dataset_sample_2rows_comp',
+                        format=LOG_FORMAT, filemode='w')
-                                 metadata_sheet_name='dataset_sample_2rows_meta')
+    logger = logging.getLogger(__name__)
-# instantiate target meta
+    logger.info('Starting LLEC upload')
-target_meta = OEMeta()
+    # for read/write, we need to add authorization header  # auth_headers = {"Authorization": "Token %s" % token}
-# get header
+    table_api_url = f"https://openenergyplatform.org/api/v0/schema/{topic}/tables/{table_name}/"
-table_schema_definition = table_source.get_header()
+    logger.info("Table API URL: %s" % table_api_url)
-# load data from table source
+    # instantiate table source
-table_data = table_source.get_data()
+    table_source = ExcelSheetAdapter(table_path=Path(path_to_source), table_url=url_to_source,
+                                     data_sheet_name='dataset_sample_2rows_comp',
-# load metadata from table source
+                                     metadata_sheet_name='dataset_sample_2rows_meta')
-table_metadata = table_source.get_metadata()
+    # - target metadata
-# todo: handle metadata with in an object(oemeta)
+    target_meta = OEMeta(model_path=model_path,
+                         schema_path=schema_path)
-# attach a model to target_meta
+    # - client to oep
-target_meta.attach_schema(MODEL_PATH)
+    cli = OepClient(token=token,
+                    default_schema=topic)
-# instantiate client to oep
-cli = OepClient(token=token, default_schema=topic)
+    # get header from table source
-cli.create_table(table, table_schema_definition)
+    table_schema_definition = table_source.get_header()
-cli.insert_into_table(table, table_data)
+    # load data from table source
-# get metadata (from example file)
+    table_data = table_source.get_data()
-# metadata = req.get(
-#    "https://raw.githubusercontent.com/OpenEnergyPlatform/academy/production/docs/data/tutorial_example_table.metadata.json").json()
+    # merge metadata from table source into target meta
+    ##
-# metadata = cli.set_metadata(table, metadata)
+    # load metadata header from table source in <sheet[1]|metadata_sheet_name>
-# print(json.dumps(metadata, indent=4))
+    table_metadata_header = table_source.get_metadata_header()
+    # example of a field in table_metadata_header
+    # {
+    #   "index": 0,
+    #   "name": "t_amb",
+    #   "Description": "Ambient temperature",
+    #   "type": "Number",
+    #   "isAbout.name": "temperature",
+    #   "isAbout.path": "http://openenergy-platform.org/ontology/oeo/OEO_00010453",
+    #   "valueReference.value": "temperature",
+    #   "valueReference.name": "temperature",
+    #   "valueReference.path": "http://openenergy-platform.org/ontology/oeo/OEO_00010453",
+    #   "unit": "[°C]"
+    # },
+    for field_item in table_metadata_header:
+        target_meta.merge_field_from_meta_source(field_item)
+    general_metadata = table_source.get_metadata_section("general")
+    #  example of
+    # {'first_or_single': {'name': 'Living Lab Measurements', 'topics': 'Measurement', 'title': 'Living Lab Meas',
+    #                      'path': 'https://github.com/koubaa-hmc/LLEC_Data/raw/refs/heads/main/dataset_2rows.xlsx',
+    #                      'description': 'The table is a collection of measurements done in a Living Lab',
+    #                      'languages': '"en-GB"', 'subject.name': 'energy use',
+    #                      'subject.path': 'http://openenergy-platform.org/ontology/oeo/OEO_00010210',
+    #                      'keywords': 'http://openenergy-platform.org/ontology/oeo/OEO_00000150',
+    #                      'publicationDate': '2025-01-28T00:00:00.000', 'embargoPeriod.start': None,
+    #                      'embargoPeriod.end': None, 'embargoPeriod.isActive': False},
+    #  'second': {'name': None, 'topics': 'Energy', 'title': None, 'path': None, 'description': None, 'languages': None,
+    #             'subject.name': None, 'subject.path': None,
+    #             'keywords': 'http://openenergy-platform.org/ontology/oeo/OEO_00000384', 'publicationDate': None,
+    #             'embargoPeriod.start': None, 'embargoPeriod.end': None, 'embargoPeriod.isActive': None},
+    #  'third': {'name': None, 'topics': 'Temperature', 'title': None, 'path': None, 'description': None,
+    #            'languages': None, 'subject.name': None, 'subject.path': None, 'keywords': None, 'pubDate': None,
+    #            'embargoPeriod.start': None, 'embargoPeriod.end': None, 'embargoPeriod.isActive': None},
+    #  'fourth': {'name': None, 'topics': None, 'title': None, 'path': None, 'description': None, 'languages': None,
+    #             'subject.name': None, 'subject.path': None, 'keywords': None, 'publicationDate': None,
+    #             'embargoPeriod.start': None, 'embargoPeriod.end': None, 'embargoPeriod.isActive': None}}
+    target_meta.merge_general_from_meta_source(general_metadata)
+    licenses_metadata = table_source.get_metadata_section("licenses")
+    target_meta.merge_licenses_from_meta_source(licenses_metadata)
+    provenance_metadata = table_source.get_metadata_section("provenance")
+    target_meta.merge_provenance_from_meta_source(provenance_metadata)
+    spatial_metadata = table_source.get_metadata_section("spatial")
+    target_meta.merge_spatial_from_meta_source(spatial_metadata)
+    temporal_metadata = table_source.get_metadata_section("temporal")
+    target_meta.merge_temporal_from_meta_source(temporal_metadata)
+    target_meta.get_metadata()
+    # create table on oep and upload data
+    cli.create_table(table_name, table_schema_definition)
+    cli.insert_into_table(table_name, table_data)
+    # get metadata (from example file)
+    # metadata = req.get(
+    #    "https://raw.githubusercontent.com/OpenEnergyPlatform/academy/production/docs/data/tutorial_example_table.metadata.json").json()
+    # metadata = cli.set_metadata(table, metadata)
+    # print(json.dumps(metadata, indent=4))
+if __name__ == '__main__':
+    main()
--- a/LLEC_Data @ 22cf19d6
+++ b/LLEC_Data @ 22cf19d6
-Subproject commit ed02372d79e68110b8a3a9d2ff977eac77a024f9
+Subproject commit 22cf19d66416505f257a07d1221f3b8d7455c60e