diff --git a/docs/toardb_fastapi.md b/docs/toardb_fastapi.md index 3c391769d77e0ee22cd752cb2909f98b6976a927..1ba1b576a61d32db8cb50af6b9d143e2585cb146 100644 --- a/docs/toardb_fastapi.md +++ b/docs/toardb_fastapi.md @@ -52,6 +52,7 @@ | name | string | Name of person | Yes | | email | string | Email address of person | Yes | | phone | string | Phone number of person | Yes | +| orcid | string | ORCID-iD of person | Yes | | isprivate | boolean | Set this flag to true if the contact details shall not be exposed publicly | Yes | #### Stationmeta @@ -206,13 +207,13 @@ | access_rights | string | Access rights of timeseries data (see controlled vocabulary: Data Access Right) | Yes | | sampling_frequency | string | Sampling frequency of data in this timeseries (see controlled vocabulary: Sampling Frequency) | Yes | | aggregation | string | Aggregation type in this timeseries (see controlled vocabulary: Aggregation Type) | Yes | -| source | string | source of data (see controlled vocabulary: Data Source) | Yes | | data_start_date | dateTime | Start date of the variable data available for this station | Yes | | data_end_date | dateTime | End date of the variable data available for this station | Yes | -| measurement_method | string | instrument principle of measurement (see controlled vocabulary: Measurement Method) | Yes | +| data_origin | string | origin of data (model name or instrument) (see controlled vocabulary: Data Origin) | Yes | +| data_origin_type | string | type of data origin (see controlled vocabulary: Data Origin Type) | Yes | +| version | string | provider data version | Yes | +| data_license_accepted | dateTime | date when provider accepted the data license agreement | Yes | | sampling_height | number | Height above the ground of the inlet/instrument/sampler (in m) | Yes | -| date_added | dateTime | Date of timeseries metadata entry into TOAR database | Yes | -| date_modified | dateTime | Date of last timeseries metadata modification | Yes | | additional_metadata | string (json-string) | | No | | station | [StationmetaCoreBase](#stationmetacorebase) | | Yes | | variable | [Variable](#variable) | | Yes | @@ -333,14 +334,15 @@ [4, 'MeanMonth', 'monthly mean'], [5, 'None', 'none'], [6, 'Unknown', 'unknown']] -#### Data Source #### - * Data Source: [ - [0, 'Model', 'model'], - [1, 'Measurement', 'measurement']] -#### Measurement Method #### - * Measurement Method: [ - [0, 'UVAbsorption', 'UV absorption'], - [1, 'UnknownInstrument', 'unknown instrument']] +#### Data Origin Type #### + * Data Origin Type: [ + [0, 'Measurement', 'measurement'], + [1, 'Model', 'model']] +#### Data Origin #### + * Data Origin: [ + [0, 'Instrument', 'instrument'], + [1, 'COSMOREA6', 'COSMO REA 6'], + [2, 'ERA5', 'ERA 5']] #### Climatic Zone #### * Climatic Zone: [ [-1, 'Undefined', 'undefined'], diff --git a/toardb/contacts/models_person.py b/toardb/contacts/models_person.py index d02a723593d344ab6ec955677c9b626c856a64bb..f2031867a53d7454b2567030deb45ad51a40d365 100644 --- a/toardb/contacts/models_person.py +++ b/toardb/contacts/models_person.py @@ -10,19 +10,21 @@ class Person(Base): """ Table "public.persons" - +-----------+------------------------+-----------+----------+-------------------------------------+ - | Column | Type | Collation | Nullable | Default | - +===========+========================+===========+==========+=====================================+ - | id | integer | | not null | nextval('persons_id_seq'::regclass) | - +-----------+------------------------+-----------+----------+-------------------------------------+ - | name | character varying(64) | | not null | | - +-----------+------------------------+-----------+----------+-------------------------------------+ - | email | character varying(128) | | not null | | - +-----------+------------------------+-----------+----------+-------------------------------------+ - | phone | character varying(32) | | not null | ''::character varying | - +-----------+------------------------+-----------+----------+-------------------------------------+ - | isprivate | boolean | | not null | true | - +-----------+------------------------+-----------+----------+-------------------------------------+ + +-----------+------------------------+-----------+----------+------------------------------------------+ + | Column | Type | Collation | Nullable | Default | + +===========+========================+===========+==========+==========================================+ + | id | integer | | not null | nextval('persons_id_seq'::regclass) | + +-----------+------------------------+-----------+----------+------------------------------------------+ + | name | character varying(64) | | not null | | + +-----------+------------------------+-----------+----------+------------------------------------------+ + | email | character varying(128) | | not null | | + +-----------+------------------------+-----------+----------+------------------------------------------+ + | phone | character varying(32) | | not null | ''::character varying | + +-----------+------------------------+-----------+----------+------------------------------------------+ + | orcid | character varying(19) | | not null | '0000-0002-0309-8010'::character varying | + +-----------+------------------------+-----------+----------+------------------------------------------+ + | isprivate | boolean | | not null | true | + +-----------+------------------------+-----------+----------+------------------------------------------+ Indexes: "persons_pkey" PRIMARY KEY, btree (id) @@ -41,5 +43,6 @@ class Person(Base): name = Column(String(64), nullable=False) email = Column(String(128), nullable=False) phone = Column(String(32), nullable=False, server_default=text("''::character varying")) + orcid = Column(String(19), nullable=False, server_default=text("'0000-0000-0000-0000'::character varying")) isprivate = Column(Boolean, nullable=False, server_default=text("true")) diff --git a/toardb/contacts/schemas.py b/toardb/contacts/schemas.py index 6c52e68bea9c25632c0e44f6fb9a71c24af52b78..c735aa9896b510db55619dbb4faf500a1ed3073a 100644 --- a/toardb/contacts/schemas.py +++ b/toardb/contacts/schemas.py @@ -60,6 +60,7 @@ class PersonBase(BaseModel): name: str = Field(..., description="Name of person") email: str = Field(..., description="Email address of person") phone: str = Field(..., description="Phone number of person") + orcid: str = Field(..., description="ORCID-iD of person") isprivate: bool = Field(..., description="Set this flag to true if the contact details shall not be exposed publicly") def __str__(self): diff --git a/toardb/timeseries/crud.py b/toardb/timeseries/crud.py index b289c618975de8f41e51811edb06d80eb2477e02..1c17c30540de75d59faa166be65da9410f242840 100644 --- a/toardb/timeseries/crud.py +++ b/toardb/timeseries/crud.py @@ -119,8 +119,8 @@ def create_timeseries(db: Session, timeseries: TimeseriesCreate): db_timeseries.access_rights = get_value_from_str(toardb.toardb.DA_vocabulary,db_timeseries.access_rights) db_timeseries.sampling_frequency = get_value_from_str(toardb.toardb.SF_vocabulary,db_timeseries.sampling_frequency) db_timeseries.aggregation = get_value_from_str(toardb.toardb.AT_vocabulary,db_timeseries.aggregation) - db_timeseries.source = get_value_from_str(toardb.toardb.DS_vocabulary,db_timeseries.source) - db_timeseries.measurement_method= get_value_from_str(toardb.toardb.MM_vocabulary,db_timeseries.measurement_method) + db_timeseries.data_origin_type = get_value_from_str(toardb.toardb.OT_vocabulary,db_timeseries.data_origin_type) + db_timeseries.data_origin = get_value_from_str(toardb.toardb.DO_vocabulary,db_timeseries.data_origin) db.add(db_timeseries) result = db.commit() db.refresh(db_timeseries) diff --git a/toardb/timeseries/models.py b/toardb/timeseries/models.py index 815a73d7e8101c245679ab7998e2ceef975e6827..80c28ca31522c75c842b47362c9acd338e3d6333 100644 --- a/toardb/timeseries/models.py +++ b/toardb/timeseries/models.py @@ -34,16 +34,16 @@ AT_enum_table = Table("at_vocabulary", Column("enum_display_str", String) ) -# Data Sources -DS_enum_table = Table("ds_vocabulary", +# Data Origin Types +OT_enum_table = Table("ot_vocabulary", Base.metadata, Column("enum_val", Integer, primary_key=True), Column("enum_str", String), Column("enum_display_str", String) ) -# Measurement Methods -MM_enum_table = Table("mm_vocabulary", +# Data Origins +DO_enum_table = Table("do_vocabulary", Base.metadata, Column("enum_val", Integer, primary_key=True), Column("enum_str", String), diff --git a/toardb/timeseries/models_core.py b/toardb/timeseries/models_core.py index 070d542ca4ac04929548e94067f57d23cc895a44..d4646e97937c97ecf254503127a3b1b2c35a7152 100644 --- a/toardb/timeseries/models_core.py +++ b/toardb/timeseries/models_core.py @@ -18,43 +18,43 @@ class Timeseries(Base): """ Table "public.timeseries" - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | Column | Type | Collation | Nullable | Default | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | Column | Type | Collation | Nullable | Default | +=====================+==========================+===========+==========+========================================+ - | id | integer | | not null | nextval('timeseries_id_seq'::regclass) | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | label | character varying(128) | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | order | integer | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | access_rights | integer | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | sampling_frequency | integer | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | aggregation | integer | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | source | integer | | not null | 1 | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | data_start_date | timestamp with time zone | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | data_end_date | timestamp with time zone | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | measurement_method | integer | | not null | 1 | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | sampling_height | double precision | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | additional_metadata | jsonb | | not null | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | date_added | timestamp with time zone | | not null | now() | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | date_modified | timestamp with time zone | | not null | now() | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | station_id | integer | | | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | variable_id | integer | | | | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ - | programme_id | integer | | not null | 0 | - +---------------------+--------------------------+-----------+----------+----------------------------------------+ + | id | integer | | not null | nextval('timeseries_id_seq'::regclass) | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | label | character varying(128) | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | order | integer | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | access_rights | integer | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | sampling_frequency | integer | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | aggregation | integer | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | data_start_date | timestamp with time zone | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | data_end_date | timestamp with time zone | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | data_origin_type | integer | | not null | 0 | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | data_origin | integer | | not null | 0 | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | sampling_height | double precision | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | additional_metadata | jsonb | | not null | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | version | character varying(28) | | not null | '' | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | data_license_accepted | timestamp with time zone | | not null | '1900-01-01' | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | station_id | integer | | | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | variable_id | integer | | | | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ + | programme_id | integer | | not null | 0 | + +-----------------------+--------------------------+-----------+----------+----------------------------------------+ Indexes: "timeseries_pkey" PRIMARY KEY, btree (id) @@ -71,9 +71,9 @@ class Timeseries(Base): "timeseries_variable_id_fk_variables_id" FOREIGN KEY (variable_id) REFERENCES variables(id) DEFERRABLE INITIALLY DEFERREDForeign-key constraints: "timeseries_access_rights_fk_da_vocabulary_enum_val" FOREIGN KEY (access_rights) REFERENCES da_vocabulary(enum_val) "timeseries_aggregation_fk_at_vocabulary_enum_val" FOREIGN KEY (aggregation) REFERENCES at_vocabulary(enum_val) - "timeseries_measurement_method_fk_mm_vocabulary_enum_val" FOREIGN KEY (measurement_method) REFERENCES mm_vocabulary(enum_val) "timeseries_sampling_frequency_fk_sf_vocabulary_enum_val" FOREIGN KEY (sampling_frequency) REFERENCES sf_vocabulary(enum_val) - "timeseries_source_fk_ds_vocabulary_enum_val" FOREIGN KEY (source) REFERENCES ds_vocabulary(enum_val) + "timeseries_data_origin_fk_do_vocabulary_enum_val" FOREIGN KEY (data_origin) REFERENCES do_vocabulary(enum_val) + "timeseries_data_origin_type_fk_ot_vocabulary_enum_val" FOREIGN KEY (data_origin_type) REFERENCES ot_vocabulary(enum_val) "timeseries_programme_id_fk_timeseries_programmes_id" FOREIGN KEY (programme_id) REFERENCES timeseries_programmes(id) Referenced by: TABLE "data" CONSTRAINT "data_timeseries_id_fk_timeseries_id" FOREIGN KEY (timeseries_id) REFERENCES timeseries(id) DEFERRABLE INITIALLY DEFERRED @@ -97,13 +97,13 @@ class Timeseries(Base): access_rights = Column(ForeignKey('da_vocabulary.enum_val'), nullable=False) sampling_frequency = Column(ForeignKey('sf_vocabulary.enum_val'), nullable=False) aggregation = Column(ForeignKey('at_vocabulary.enum_val'), nullable=False) - source = Column(ForeignKey('ds_vocabulary.enum_val'), nullable=False, server_default=text("1")) data_start_date = Column(DateTime(True), nullable=False) data_end_date = Column(DateTime(True), nullable=False) - measurement_method = Column(ForeignKey('mm_vocabulary.enum_val'), nullable=False, server_default=text("1")) + data_origin_type = Column(ForeignKey('ot_vocabulary.enum_val'), nullable=False, server_default=text("0")) + data_origin = Column(ForeignKey('do_vocabulary.enum_val'), nullable=False, server_default=text("0")) sampling_height = Column(Float(53), nullable=False) - date_added = Column(DateTime(True), nullable=False, server_default=text("now()")) - date_modified = Column(DateTime(True), nullable=False, server_default=text("now()")) + version = Column(String(128), nullable=False, server_default=text("''")) + data_license_accepted = Column(DateTime(True), nullable=False, server_default=text("'1900-01-01 00:00:00+00'::timestamp with time zone")) # do not use string declaration here (not working for pytest) # use the explicit class name here, # see: https://groups.google.com/forum/#!topic/sqlalchemy/YjGhE4d6K4U @@ -120,9 +120,3 @@ class Timeseries(Base): additional_metadata = Column(JSONB(astext_type=Text()), nullable=True) -# da_vocabulary = relationship('DaVocabulary') -# at_vocabulary = relationship('AtVocabulary') -# mm_vocabulary = relationship('MmVocabulary') -# sf_vocabulary = relationship('SfVocabulary') -# ds_vocabulary = relationship('DsVocabulary') - diff --git a/toardb/timeseries/schemas.py b/toardb/timeseries/schemas.py index f6a67f540ebde60b09a9b88a8801f9a2b0235e24..fc5fa0accb38d3e05ad4dc5f6960edbad5c16240 100644 --- a/toardb/timeseries/schemas.py +++ b/toardb/timeseries/schemas.py @@ -26,13 +26,13 @@ class TimeseriesCoreBase(BaseModel): access_rights: str = Field(..., description="Access rights of timeseries data (see controlled vocabulary: Data Access Right)") sampling_frequency: str = Field(..., description="Sampling frequency of data in this timeseries (see controlled vocabulary: Sampling Frequency)") aggregation: str = Field(..., description="Aggregation type in this timeseries (see controlled vocabulary: Aggregation Type)") - source: str = Field(..., description="source of data (see controlled vocabulary: Data Source)") data_start_date: dt.datetime = Field(..., description="Start date of the variable data available for this station") data_end_date: dt.datetime = Field(..., description="End date of the variable data available for this station") - measurement_method: str = Field(..., description="instrument principle of measurement (see controlled vocabulary: Measurement Method)") + data_origin: str = Field(..., description="origin of data (model name or instrument) (see controlled vocabulary: Data Origin)") + data_origin_type: str = Field(..., description="type of data origin (see controlled vocabulary: Data Origin Type)") + version: str = Field(..., description="provider data version") + data_license_accepted: dt.datetime = Field(..., description="date when provider accepted the data license agreement") sampling_height: float = Field(..., description="Height above the ground of the inlet/instrument/sampler (in m)") - date_added: dt.datetime = Field(..., description="Date of timeseries metadata entry into TOAR database") - date_modified: dt.datetime = Field(..., description="Date of last timeseries metadata modification") additional_metadata: Json = Field(..., description="Additional information about the timeseries as JSON structure.") # still missing: "Score values from automated data QA (5-star evaluation)" @@ -51,13 +51,13 @@ class TimeseriesCoreBase(BaseModel): def check_aggregation(cls, v): return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary))[0].display_str - @validator('source') + @validator('data_origin_type') def check_source(cls, v): - return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DS_vocabulary))[0].display_str + return tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary))[0].display_str - @validator('measurement_method') + @validator('data_origin') def check_measurement_method(cls, v): - return tuple(filter(lambda x: x.value == int(v), toardb.toardb.MM_vocabulary))[0].display_str + return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary))[0].display_str class TimeseriesCoreCreate(TimeseriesCoreBase): @@ -86,16 +86,16 @@ class TimeseriesCoreCreate(TimeseriesCoreBase): else: raise ValueError(f"aggregation type not known: {v}") - @validator('source') + @validator('data_origin_type') def check_source(cls, v): - if tuple(filter(lambda x: x.string == v, toardb.toardb.DS_vocabulary)): + if tuple(filter(lambda x: x.string == v, toardb.toardb.OT_vocabulary)): return v else: raise ValueError(f"data source not known: {v}") - @validator('measurement_method') + @validator('data_origin') def check_measurement_method(cls, v): - if tuple(filter(lambda x: x.string == v, toardb.toardb.MM_vocabulary)): + if tuple(filter(lambda x: x.string == v, toardb.toardb.DO_vocabulary)): return v else: raise ValueError(f"measurement method not known: {v}") @@ -264,8 +264,6 @@ class TimeseriesPatch(BaseModel): data_end_date: dt.datetime = None measurement_method: str = None sampling_height: float = None - date_added: dt.datetime = None - date_modified: dt.datetime = None # roles: List[TimeseriesRole] = None # annotations: List[TimeseriesAnnotation] = None # variable: Variable = None diff --git a/toardb/toardb.py b/toardb/toardb.py index 706fa6afbe157584e80dad9c7ff160bb74a4ae48..6558508f6129a7bb5b9461bf2babd6cd4928b9fc 100644 --- a/toardb/toardb.py +++ b/toardb/toardb.py @@ -27,8 +27,8 @@ OK_vocabulary = 0 DA_vocabulary = 0 SF_vocabulary = 0 AT_vocabulary = 0 -DS_vocabulary = 0 -MM_vocabulary = 0 +OT_vocabulary = 0 +DO_vocabulary = 0 CZ_vocabulary = 0 CV_vocabulary = 0 CN_vocabulary = 0 @@ -61,8 +61,8 @@ async def info(): "Data Access Right": DA_vocabulary, "Sampling Frequency": SF_vocabulary, "Aggregation Type": AT_vocabulary, - "Data Source": DS_vocabulary, - "Measurement Method": MM_vocabulary, + "Data Origin Type": OT_vocabulary, + "Data Origin": DO_vocabulary, "Climatic Zone": CZ_vocabulary, "Country Code": CN_vocabulary, "Timezone": TZ_vocabulary, @@ -88,8 +88,8 @@ async def info(name: str): "data access right": DA_vocabulary, "sampling frequency": SF_vocabulary, "aggregation type": AT_vocabulary, - "data source": DS_vocabulary, - "measurement method": MM_vocabulary, + "data origin": DO_vocabulary, + "data origin type": OT_vocabulary, "climatic zone": CZ_vocabulary, "coordinate validity": CV_vocabulary, "country code": CN_vocabulary, @@ -161,8 +161,8 @@ async def startup_event(): global DA_vocabulary global SF_vocabulary global AT_vocabulary - global DS_vocabulary - global MM_vocabulary + global OT_vocabulary + global DO_vocabulary global CZ_vocabulary global CV_vocabulary global CN_vocabulary @@ -185,8 +185,8 @@ async def startup_event(): DA_vocabulary = __get_enum_dict(fake_cur, "da_vocabulary") SF_vocabulary = __get_enum_dict(fake_cur, "sf_vocabulary") AT_vocabulary = __get_enum_dict(fake_cur, "at_vocabulary") - DS_vocabulary = __get_enum_dict(fake_cur, "ds_vocabulary") - MM_vocabulary = __get_enum_dict(fake_cur, "mm_vocabulary") + OT_vocabulary = __get_enum_dict(fake_cur, "ot_vocabulary") + DO_vocabulary = __get_enum_dict(fake_cur, "do_vocabulary") CZ_vocabulary = __get_enum_dict(fake_cur, "cz_vocabulary") CV_vocabulary = __get_enum_dict(fake_cur, "cv_vocabulary") CN_vocabulary = __get_enum_dict(fake_cur, "cn_vocabulary")