Skip to content
Snippets Groups Projects
Commit 17223013 authored by Utz-Uwe Haus's avatar Utz-Uwe Haus
Browse files

Complete a rough version of YAML attribute parsing

good enough to parse a few simple maestro schema attributes, and notice
a missing attribute declaration encountered in the parse.
parent 45ae38ed
No related branches found
No related tags found
No related merge requests found
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <inttypes.h>
#include <cyaml/cyaml.h> #include <cyaml/cyaml.h>
...@@ -15,6 +16,7 @@ ...@@ -15,6 +16,7 @@
#include "maestro.h" #include "maestro.h"
#include "maestro/logging.h" #include "maestro/logging.h"
#include "maestro-schema.h" #include "maestro-schema.h"
#include "schema_type_parse.h"
#include "symtab.h" #include "symtab.h"
#include "maestro/i_uthash.h" #include "maestro/i_uthash.h"
...@@ -75,6 +77,12 @@ struct mstro_schema_type_ { ...@@ -75,6 +77,12 @@ struct mstro_schema_type_ {
mstro_symbol type_symbol; /**< symbol created for this typename; key when hashing */ mstro_symbol type_symbol; /**< symbol created for this typename; key when hashing */
char *unparsed_typespec; /**< "typespec: include('maestro-user-typespec')", unparsed */ char *unparsed_typespec; /**< "typespec: include('maestro-user-typespec')", unparsed */
char *documentation; /**< "documentation: str()" */ char *documentation; /**< "documentation: str()" */
struct mstro_stp_val *parsed_type; /**< parsed type handle */
/* A closure each for parser, serializer, deserializer. May be
* NULL. Main use: instantiated regex parser automaton for string-ish types */
void *parser_closure;
}; };
/** mapping definition for user-defined schema type */ /** mapping definition for user-defined schema type */
...@@ -714,7 +722,7 @@ mstro_schema_merge(mstro_schema main, ...@@ -714,7 +722,7 @@ mstro_schema_merge(mstro_schema main,
return MSTRO_UNIMPL; return MSTRO_UNIMPL;
} }
/* these are 1:1 corresponding to the user-facing ones from enum mstro_stp_val_kind */
enum mstro_schema_builtin_type { enum mstro_schema_builtin_type {
MSTRO_SCHEMA_BUILTIN_BOOL, MSTRO_SCHEMA_BUILTIN_BOOL,
MSTRO_SCHEMA_BUILTIN_UINT, MSTRO_SCHEMA_BUILTIN_UINT,
...@@ -723,29 +731,52 @@ enum mstro_schema_builtin_type { ...@@ -723,29 +731,52 @@ enum mstro_schema_builtin_type {
MSTRO_SCHEMA_BUILTIN_DOUBLE, MSTRO_SCHEMA_BUILTIN_DOUBLE,
MSTRO_SCHEMA_BUILTIN_STRING, MSTRO_SCHEMA_BUILTIN_STRING,
MSTRO_SCHEMA_BUILTIN_REGEX, MSTRO_SCHEMA_BUILTIN_REGEX,
MSTRO_SCHEMA_BUILTIN_BLOB,
MSTRO_SCHEMA_BUILTIN_TYPE__MAX MSTRO_SCHEMA_BUILTIN_TYPE__MAX
}; };
static struct { static struct {
enum mstro_schema_builtin_type type; enum mstro_schema_builtin_type type;
const char *basename; const char *basename;
enum mstro_stp_val_kind stp_kind;
} builtin_types[MSTRO_SCHEMA_BUILTIN_TYPE__MAX] = { } builtin_types[MSTRO_SCHEMA_BUILTIN_TYPE__MAX] = {
[MSTRO_SCHEMA_BUILTIN_BOOL] = { .type = MSTRO_SCHEMA_BUILTIN_BOOL, .basename = "bool" }, [MSTRO_SCHEMA_BUILTIN_BOOL] = { .type = MSTRO_SCHEMA_BUILTIN_BOOL,
[MSTRO_SCHEMA_BUILTIN_UINT] = { .type = MSTRO_SCHEMA_BUILTIN_UINT, .basename = "uint" }, .basename = "bool",
[MSTRO_SCHEMA_BUILTIN_INT] = { .type = MSTRO_SCHEMA_BUILTIN_INT, .basename = "int" }, .stp_kind = MSTRO_STP_BOOL},
[MSTRO_SCHEMA_BUILTIN_FLOAT] = { .type = MSTRO_SCHEMA_BUILTIN_FLOAT, .basename = "float" }, [MSTRO_SCHEMA_BUILTIN_UINT] = { .type = MSTRO_SCHEMA_BUILTIN_UINT,
[MSTRO_SCHEMA_BUILTIN_DOUBLE] = { .type = MSTRO_SCHEMA_BUILTIN_DOUBLE, .basename = "double" }, .basename = "uint",
[MSTRO_SCHEMA_BUILTIN_STRING] = { .type = MSTRO_SCHEMA_BUILTIN_STRING, .basename = "str" }, .stp_kind = MSTRO_STP_UINT},
[MSTRO_SCHEMA_BUILTIN_REGEX] = { .type = MSTRO_SCHEMA_BUILTIN_REGEX, .basename = "regex" } [MSTRO_SCHEMA_BUILTIN_INT] = { .type = MSTRO_SCHEMA_BUILTIN_INT,
.basename = "int",
.stp_kind = MSTRO_STP_INT},
[MSTRO_SCHEMA_BUILTIN_FLOAT] = { .type = MSTRO_SCHEMA_BUILTIN_FLOAT,
.basename = "float",
.stp_kind = MSTRO_STP_FLOAT},
[MSTRO_SCHEMA_BUILTIN_DOUBLE] = { .type = MSTRO_SCHEMA_BUILTIN_DOUBLE,
.basename = "double",
.stp_kind = MSTRO_STP_DOUBLE},
[MSTRO_SCHEMA_BUILTIN_STRING] = { .type = MSTRO_SCHEMA_BUILTIN_STRING,
.basename = "str",
.stp_kind = MSTRO_STP_STR},
[MSTRO_SCHEMA_BUILTIN_REGEX] = { .type = MSTRO_SCHEMA_BUILTIN_REGEX,
.basename = "regex",
.stp_kind = MSTRO_STP_REGEX},
[MSTRO_SCHEMA_BUILTIN_BLOB] = { .type = MSTRO_SCHEMA_BUILTIN_BLOB,
.basename = "blob",
.stp_kind = MSTRO_STP_BLOB}
}; };
/** lookup or create builtin type */
static inline
mstro_status mstro_status
mstro_schema_lookup_type__builtins( mstro_schema_lookup_type__builtins(
mstro_schema schema,
const char *typename, const char *typename,
mstro_schema_type *result) mstro_schema_type *result)
{ {
/* FIXME: improve this */
size_t i; size_t i;
mstro_status s = MSTRO_NOENT;
for(i=0; i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX; i++) { for(i=0; i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX; i++) {
/* hand-coded to avoid 3 strlen ops */ /* hand-coded to avoid 3 strlen ops */
size_t j; size_t j;
...@@ -763,20 +794,59 @@ mstro_schema_lookup_type__builtins( ...@@ -763,20 +794,59 @@ mstro_schema_lookup_type__builtins(
; ;
} }
if(i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX) { if(i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX) {
/* found something */ /* found something matching basename */
DEBUG("Found |%s| to match built-in type %s\n", mstro_symbol sym;
typename, builtin_types[i].basename); mstro_status status;
if(strlen(typename)!=strlen(builtin_types[i].basename)+strlen("()")) { status = mstro_symtab_lookup(schema->symtab, typename, &sym);
/* FIXME: parse restrictions of type if there are any and if(status!=MSTRO_OK) {
* instantiate a restricted version */ ERR("Failed to lookup builtin type in schema symtab: %d\n");
WARN("type has restriction specifiers that we ignore\n"); goto BAILOUT;
} }
*result = builtin_types+i; if(sym==NULL) {
return MSTRO_OK; DEBUG("Symbol for built-in type |%s| not yet in symtab, adding\n", typename);
s=mstro_symtab_intern(&schema->symtab, typename, &sym);
if(s!=MSTRO_OK) {
ERR("Failed to intern built-in type-name |%s|\n", typename);
goto BAILOUT;
}
}
/* now symbol exists */
HASH_FIND(hh, schema->type_table, &sym, sizeof(mstro_symbol), *result);
if(*result!=NULL) {
DEBUG("Found existing entry for built-in type |%s|\n", typename);
s=MSTRO_OK;
goto BAILOUT;
} else {
DEBUG("Failed to find builtin type |%s| in type table, instantiating\n", typename);
/* try to parse */
struct mstro_stp_val *parsed_type;
mstro_status s = mstro_schema_type_parse(typename, &parsed_type);
if(s!=MSTRO_OK) {
if(parsed_type && parsed_type->kind == MSTRO_STP_ERROR) {
ERR("error parsing built-in type, error message %s\n",
parsed_type->errmsg);
} else { } else {
return MSTRO_NOENT; ERR("error parsing built-in type: %d (no parser error message available)\n",
s);
}
}
*result = malloc(sizeof(struct mstro_schema_type_));
if(*result) {
(*result)->typename = strdup(typename);
(*result)->type_symbol = sym;
(*result)->unparsed_typespec = strdup("?");
(*result)->documentation = strdup("builtin type");
(*result)->parsed_type = parsed_type;
HASH_ADD(hh, schema->type_table, type_symbol, sizeof(mstro_symbol), *result);
s = MSTRO_OK;
} else {
s= MSTRO_NOMEM;
} }
} }
}
BAILOUT:
return s;
}
mstro_status mstro_status
...@@ -787,13 +857,10 @@ mstro_schema_lookup_type(mstro_schema schema, ...@@ -787,13 +857,10 @@ mstro_schema_lookup_type(mstro_schema schema,
mstro_symbol sym; mstro_symbol sym;
mstro_status status; mstro_status status;
/* built-in types first. We do this instead of duplicating them into /* built-in types first. This will create them and intern them on the fly, with restrictions if needed. */
* the schema for compile-time optimization. status = mstro_schema_lookup_type__builtins(schema, typename, result);
*
* FIXME: This could be improved by interning the base type name in
* the symbol table of all schemata */
status = mstro_schema_lookup_type__builtins(typename, result);
if(status==MSTRO_OK) { if(status==MSTRO_OK) {
DEBUG("type |%s| recognized as built-in\n", typename);
goto BAILOUT; goto BAILOUT;
} else { } else {
DEBUG("|%s| is not a builtin type\n"); DEBUG("|%s| is not a builtin type\n");
...@@ -855,6 +922,7 @@ struct mstro_attribute_entry_ { ...@@ -855,6 +922,7 @@ struct mstro_attribute_entry_ {
void *val; /** a value, to be interpreted by looking up the void *val; /** a value, to be interpreted by looking up the
* expected type of the attribute in the * expected type of the attribute in the
* appropriate schema */ * appropriate schema */
size_t valsize; /** allocated space for val */
/* FIXME: this is the place where serialized versions of the entry /* FIXME: this is the place where serialized versions of the entry
* should be cached if needed */ * should be cached if needed */
/* these may be unset -- this can be checked by comparing the string /* these may be unset -- this can be checked by comparing the string
...@@ -882,6 +950,7 @@ struct partial_key { ...@@ -882,6 +950,7 @@ struct partial_key {
/* parse VAL and fill in *entry */
static inline static inline
mstro_status mstro_status
mstro_attributes_parse_val(mstro_schema schema, mstro_attributes_parse_val(mstro_schema schema,
...@@ -900,11 +969,187 @@ mstro_attributes_parse_val(mstro_schema schema, ...@@ -900,11 +969,187 @@ mstro_attributes_parse_val(mstro_schema schema,
if(s!=MSTRO_OK) { if(s!=MSTRO_OK) {
ERR("Failed to find type declaration for type |%s| (attribute |%s|)\n", ERR("Failed to find type declaration for type |%s| (attribute |%s|)\n",
tname, aname); tname, aname);
return MSTRO_INVARG;
} else {
DEBUG("Found type declaration for type |%s| (attribute |%s|)\n", tname, aname);
}
/* FIXME: this could be handled better by pre-allocated parsers
* (needs to be reentrant!) built with a schema-typeval.peg */
if(!tdecl->parsed_type) {
ERR("Parsed type not available\n");
return MSTRO_UNIMPL;
}
/* all built-ins recognized by built-in regexps for now */
size_t num_re;
if(tdecl->parsed_type->kind == MSTRO_STP_REGEX) {
num_re = tdecl->parsed_type->regex_numpatterns;
} else {
num_re = 1;
}
regex_t regex[num_re];
int err=0;
bool need_regmatch = true; /* except for BLOB we need it */
size_t minlen =0, maxlen=val_len; /* might be changed for strings */
switch(tdecl->parsed_type->kind) {
case MSTRO_STP_BOOL: {
entry->valsize = sizeof(bool);
err = regcomp(&(regex[0]), "1|0|On|Off|True|False",REG_ICASE|REG_NOSUB|REG_EXTENDED);
break;
}
case MSTRO_STP_UINT: {
entry->valsize = sizeof(uint64_t);
err = regcomp(&(regex[0]), "[+]?[[:blank:]]*[0-9]+",REG_EXTENDED|REG_NOSUB);
WARN("Not checking numeric bounds on types\n");
break;
}
case MSTRO_STP_INT: {
entry->valsize = sizeof(int64_t);
err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]+",REG_EXTENDED|REG_NOSUB);
WARN("Not checking numeric bounds on types\n");
break;
}
case MSTRO_STP_FLOAT: {
entry->valsize = sizeof(float);
err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?",REG_EXTENDED);
WARN("Not checking numeric bounds on types\n");
break;
}
case MSTRO_STP_DOUBLE: {
entry->valsize = sizeof(double);
err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?",REG_EXTENDED);
WARN("Not checking numeric bounds on types\n");
break;
}
case MSTRO_STP_STR: {
entry->valsize = sizeof(char)*(val_len+1);
minlen = tdecl->parsed_type->str_minlen;
maxlen = tdecl->parsed_type->str_maxlen;
char *re;
if(tdecl->parsed_type->str_excludedchars) {
size_t l = strlen(tdecl->parsed_type->str_excludedchars)+strlen("[^]*") + 1;
re=malloc(sizeof(char)*l);
if(re) {
re[0]='['; re[1]='^';
strcpy(re+2,tdecl->parsed_type->str_excludedchars);
re[l-3] = ']'; re[l-2]='*'; re[l-1] = '\0';
}
} else {
re = strdup(".*");
}
if(re==NULL) {
ERR("Failed to allocate string regex\n");
s=MSTRO_NOMEM;
goto BAILOUT;
}
err = regcomp(&(regex[0]), re, REG_NOSUB);
free(re);
break;
}
case MSTRO_STP_REGEX:
entry->valsize = sizeof(char)*(val_len+1);
for(size_t i=0; i<num_re; i++) {
err |= regcomp(&(regex[i]), tdecl->parsed_type->regex_patterns[i],
REG_EXTENDED|REG_NOSUB| (tdecl->parsed_type->regex_ignorecase ? REG_ICASE : 0));
}
break;
case MSTRO_STP_BLOB:
entry->valsize = sizeof(char)*(val_len);
need_regmatch = false;
minlen = tdecl->parsed_type->blob_minlen;
maxlen = tdecl->parsed_type->blob_maxlen;
break;
default:
ERR("Unexpected parsed type %d\n", tdecl->parsed_type->kind);
s=MSTRO_UNIMPL;
}
if(err) {
ERR("Failed to construct regex\n");
// regerror ...
}
if(! (minlen<=val_len && val_len <=maxlen)) {
ERR("Argument |%s| (len %zu, strlen %zu) not within length bounds for type: min=%zu, max=%zu\n",
val, val_len, strlen((const char*)val), minlen, maxlen);
s=MSTRO_FAIL;
goto BAILOUT;
}
s=MSTRO_OK;
if(need_regmatch) {
s=MSTRO_NOMATCH;
for(size_t i=0; i<num_re; i++) {
s = mstro_schema__check_regex((const char*)val, &(regex[i]), 0);
/* DEBUG("Checked regex against |%s|, result %d (%s)\n", val, s, mstro_status_description(s)); */
if(s==MSTRO_OK)
break;
/* NOMATCH: try further */
if(s==MSTRO_NOMATCH) {
continue;
} else {
break;
}
}
for(size_t i=0; i<num_re; i++) {
regfree(&regex[i]);
}
}
entry->val = malloc(entry->valsize);
if(entry->val==NULL) {
ERR("Failed to allocate for attribute value\n");
s=MSTRO_NOMEM;
goto BAILOUT; goto BAILOUT;
} }
if(s==MSTRO_OK) {
switch(tdecl->parsed_type->kind) {
case MSTRO_STP_BOOL: {
/* we are alredy checked by regex above */
switch(tolower(val[0])) {
case 't': // TRUE
case '1': // 1
case 'o': // ON
case 'y': // YES
*((bool*)(entry->val)) = true;
break;
default:
*((bool*)(entry->val)) = false;
break;
}
break;
}
case MSTRO_STP_UINT:
*((uint64_t*)(entry->val)) = strtoumax((const char*)val, NULL, 10);
break;
case MSTRO_STP_INT:
*((int64_t*)(entry->val)) = strtoimax((const char*)val, NULL, 10);
break;
case MSTRO_STP_FLOAT:
*((float*)(entry->val)) = strtof((const char*)val, NULL);
break;
case MSTRO_STP_DOUBLE:
*((double*)(entry->val)) = strtod((const char*)val, NULL);
break;
case MSTRO_STP_STR:
case MSTRO_STP_REGEX:
strcpy((char*)entry->val, (const char*)val);
((char*)(entry->val))[entry->valsize-1] = '\0';
break;
case MSTRO_STP_BLOB:
memcpy((char*)entry->val, val, val_len);
break;
default:
ERR("Unexpected parsed type %d\n", tdecl->parsed_type->kind);
s=MSTRO_UNIMPL; s=MSTRO_UNIMPL;
}
}
BAILOUT: BAILOUT:
return s; return s;
...@@ -919,7 +1164,21 @@ mstro_attributes__parse_helper(yaml_parser_t parser, ...@@ -919,7 +1164,21 @@ mstro_attributes__parse_helper(yaml_parser_t parser,
{ {
mstro_status status = MSTRO_UNIMPL; mstro_status status = MSTRO_UNIMPL;
struct partial_key *keystack =NULL; struct partial_key *keystack =NULL;
*result = NULL; if(schema==NULL) {
ERR("Can't parse without schema\n");
return MSTRO_INVARG;
}
if(result==NULL) {
ERR("NULL attribute dict\n");
return MSTRO_INVOUT;
}
*result = malloc(sizeof(struct mstro_attribute_dict_));
if(*result==NULL) {
ERR("Failed to allocate attribute dict\n");
return MSTRO_NOMEM;
}
(*result)->dict=NULL;
(*result)->schema=NULL; /* for now, will be added when successful */
yaml_event_t event; yaml_event_t event;
...@@ -1055,6 +1314,10 @@ mstro_attributes__parse_helper(yaml_parser_t parser, ...@@ -1055,6 +1314,10 @@ mstro_attributes__parse_helper(yaml_parser_t parser,
ERR("Failed to parse |%s| as value for attribute |%s|\n", ERR("Failed to parse |%s| as value for attribute |%s|\n",
val, mstro_symbol_name(decl->key_symbol)); val, mstro_symbol_name(decl->key_symbol));
goto BAILOUT; goto BAILOUT;
} else {
DEBUG("Parsed |%s| as valid value for attribute |%s|\n",
val, mstro_symbol_name(decl->key_symbol));
/* FIXME: describe_entry function call here */
} }
HASH_ADD(hh, (*result)->dict, key, sizeof(mstro_symbol), entry); HASH_ADD(hh, (*result)->dict, key, sizeof(mstro_symbol), entry);
...@@ -1077,8 +1340,10 @@ mstro_attributes__parse_helper(yaml_parser_t parser, ...@@ -1077,8 +1340,10 @@ mstro_attributes__parse_helper(yaml_parser_t parser,
} while(event.type != YAML_STREAM_END_EVENT); } while(event.type != YAML_STREAM_END_EVENT);
yaml_event_delete(&event); yaml_event_delete(&event);
(*result)->schema = schema;
status = MSTRO_OK; status = MSTRO_OK;
/* char *last_key=NULL; */ /* char *last_key=NULL; */
/* enum { */ /* enum { */
/* INVALID, */ /* INVALID, */
......
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <errno.h> #include <errno.h>
#include "attribute_schema.h"
#include "maestro-schema.h" #include "maestro-schema.h"
#ifndef TOPSRCDIR #ifndef TOPSRCDIR
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment