diff --git a/attributes/maestro-schema.c b/attributes/maestro-schema.c index e570932aae12cf47d593bfe7c612eb1b5517f319..d8c21d992ed6301c766f749fbe64ad898ddd55ba 100644 --- a/attributes/maestro-schema.c +++ b/attributes/maestro-schema.c @@ -7,6 +7,7 @@ #include <stdlib.h> #include <stdio.h> #include <stdint.h> +#include <inttypes.h> #include <cyaml/cyaml.h> @@ -15,6 +16,7 @@ #include "maestro.h" #include "maestro/logging.h" #include "maestro-schema.h" +#include "schema_type_parse.h" #include "symtab.h" #include "maestro/i_uthash.h" @@ -75,6 +77,12 @@ struct mstro_schema_type_ { mstro_symbol type_symbol; /**< symbol created for this typename; key when hashing */ char *unparsed_typespec; /**< "typespec: include('maestro-user-typespec')", unparsed */ char *documentation; /**< "documentation: str()" */ + + struct mstro_stp_val *parsed_type; /**< parsed type handle */ + /* A closure each for parser, serializer, deserializer. May be + * NULL. Main use: instantiated regex parser automaton for string-ish types */ + void *parser_closure; + }; /** mapping definition for user-defined schema type */ @@ -714,7 +722,7 @@ mstro_schema_merge(mstro_schema main, return MSTRO_UNIMPL; } - +/* these are 1:1 corresponding to the user-facing ones from enum mstro_stp_val_kind */ enum mstro_schema_builtin_type { MSTRO_SCHEMA_BUILTIN_BOOL, MSTRO_SCHEMA_BUILTIN_UINT, @@ -723,29 +731,52 @@ enum mstro_schema_builtin_type { MSTRO_SCHEMA_BUILTIN_DOUBLE, MSTRO_SCHEMA_BUILTIN_STRING, MSTRO_SCHEMA_BUILTIN_REGEX, + MSTRO_SCHEMA_BUILTIN_BLOB, MSTRO_SCHEMA_BUILTIN_TYPE__MAX }; static struct { enum mstro_schema_builtin_type type; const char *basename; + enum mstro_stp_val_kind stp_kind; } builtin_types[MSTRO_SCHEMA_BUILTIN_TYPE__MAX] = { - [MSTRO_SCHEMA_BUILTIN_BOOL] = { .type = MSTRO_SCHEMA_BUILTIN_BOOL, .basename = "bool" }, - [MSTRO_SCHEMA_BUILTIN_UINT] = { .type = MSTRO_SCHEMA_BUILTIN_UINT, .basename = "uint" }, - [MSTRO_SCHEMA_BUILTIN_INT] = { .type = MSTRO_SCHEMA_BUILTIN_INT, .basename = "int" }, - [MSTRO_SCHEMA_BUILTIN_FLOAT] = { .type = MSTRO_SCHEMA_BUILTIN_FLOAT, .basename = "float" }, - [MSTRO_SCHEMA_BUILTIN_DOUBLE] = { .type = MSTRO_SCHEMA_BUILTIN_DOUBLE, .basename = "double" }, - [MSTRO_SCHEMA_BUILTIN_STRING] = { .type = MSTRO_SCHEMA_BUILTIN_STRING, .basename = "str" }, - [MSTRO_SCHEMA_BUILTIN_REGEX] = { .type = MSTRO_SCHEMA_BUILTIN_REGEX, .basename = "regex" } + [MSTRO_SCHEMA_BUILTIN_BOOL] = { .type = MSTRO_SCHEMA_BUILTIN_BOOL, + .basename = "bool", + .stp_kind = MSTRO_STP_BOOL}, + [MSTRO_SCHEMA_BUILTIN_UINT] = { .type = MSTRO_SCHEMA_BUILTIN_UINT, + .basename = "uint", + .stp_kind = MSTRO_STP_UINT}, + [MSTRO_SCHEMA_BUILTIN_INT] = { .type = MSTRO_SCHEMA_BUILTIN_INT, + .basename = "int", + .stp_kind = MSTRO_STP_INT}, + [MSTRO_SCHEMA_BUILTIN_FLOAT] = { .type = MSTRO_SCHEMA_BUILTIN_FLOAT, + .basename = "float", + .stp_kind = MSTRO_STP_FLOAT}, + [MSTRO_SCHEMA_BUILTIN_DOUBLE] = { .type = MSTRO_SCHEMA_BUILTIN_DOUBLE, + .basename = "double", + .stp_kind = MSTRO_STP_DOUBLE}, + [MSTRO_SCHEMA_BUILTIN_STRING] = { .type = MSTRO_SCHEMA_BUILTIN_STRING, + .basename = "str", + .stp_kind = MSTRO_STP_STR}, + [MSTRO_SCHEMA_BUILTIN_REGEX] = { .type = MSTRO_SCHEMA_BUILTIN_REGEX, + .basename = "regex", + .stp_kind = MSTRO_STP_REGEX}, + [MSTRO_SCHEMA_BUILTIN_BLOB] = { .type = MSTRO_SCHEMA_BUILTIN_BLOB, + .basename = "blob", + .stp_kind = MSTRO_STP_BLOB} }; +/** lookup or create builtin type */ +static inline mstro_status mstro_schema_lookup_type__builtins( + mstro_schema schema, const char *typename, mstro_schema_type *result) { - /* FIXME: improve this */ size_t i; + mstro_status s = MSTRO_NOENT; + for(i=0; i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX; i++) { /* hand-coded to avoid 3 strlen ops */ size_t j; @@ -763,19 +794,58 @@ mstro_schema_lookup_type__builtins( ; } if(i<MSTRO_SCHEMA_BUILTIN_TYPE__MAX) { - /* found something */ - DEBUG("Found |%s| to match built-in type %s\n", - typename, builtin_types[i].basename); - if(strlen(typename)!=strlen(builtin_types[i].basename)+strlen("()")) { - /* FIXME: parse restrictions of type if there are any and - * instantiate a restricted version */ - WARN("type has restriction specifiers that we ignore\n"); + /* found something matching basename */ + mstro_symbol sym; + mstro_status status; + status = mstro_symtab_lookup(schema->symtab, typename, &sym); + if(status!=MSTRO_OK) { + ERR("Failed to lookup builtin type in schema symtab: %d\n"); + goto BAILOUT; + } + if(sym==NULL) { + DEBUG("Symbol for built-in type |%s| not yet in symtab, adding\n", typename); + s=mstro_symtab_intern(&schema->symtab, typename, &sym); + if(s!=MSTRO_OK) { + ERR("Failed to intern built-in type-name |%s|\n", typename); + goto BAILOUT; + } + } + /* now symbol exists */ + HASH_FIND(hh, schema->type_table, &sym, sizeof(mstro_symbol), *result); + if(*result!=NULL) { + DEBUG("Found existing entry for built-in type |%s|\n", typename); + s=MSTRO_OK; + goto BAILOUT; + } else { + DEBUG("Failed to find builtin type |%s| in type table, instantiating\n", typename); + /* try to parse */ + struct mstro_stp_val *parsed_type; + mstro_status s = mstro_schema_type_parse(typename, &parsed_type); + if(s!=MSTRO_OK) { + if(parsed_type && parsed_type->kind == MSTRO_STP_ERROR) { + ERR("error parsing built-in type, error message %s\n", + parsed_type->errmsg); + } else { + ERR("error parsing built-in type: %d (no parser error message available)\n", + s); + } + } + *result = malloc(sizeof(struct mstro_schema_type_)); + if(*result) { + (*result)->typename = strdup(typename); + (*result)->type_symbol = sym; + (*result)->unparsed_typespec = strdup("?"); + (*result)->documentation = strdup("builtin type"); + (*result)->parsed_type = parsed_type; + HASH_ADD(hh, schema->type_table, type_symbol, sizeof(mstro_symbol), *result); + s = MSTRO_OK; + } else { + s= MSTRO_NOMEM; + } } - *result = builtin_types+i; - return MSTRO_OK; - } else { - return MSTRO_NOENT; } +BAILOUT: + return s; } @@ -787,13 +857,10 @@ mstro_schema_lookup_type(mstro_schema schema, mstro_symbol sym; mstro_status status; - /* built-in types first. We do this instead of duplicating them into - * the schema for compile-time optimization. - * - * FIXME: This could be improved by interning the base type name in - * the symbol table of all schemata */ - status = mstro_schema_lookup_type__builtins(typename, result); + /* built-in types first. This will create them and intern them on the fly, with restrictions if needed. */ + status = mstro_schema_lookup_type__builtins(schema, typename, result); if(status==MSTRO_OK) { + DEBUG("type |%s| recognized as built-in\n", typename); goto BAILOUT; } else { DEBUG("|%s| is not a builtin type\n"); @@ -855,6 +922,7 @@ struct mstro_attribute_entry_ { void *val; /** a value, to be interpreted by looking up the * expected type of the attribute in the * appropriate schema */ + size_t valsize; /** allocated space for val */ /* FIXME: this is the place where serialized versions of the entry * should be cached if needed */ /* these may be unset -- this can be checked by comparing the string @@ -862,7 +930,7 @@ struct mstro_attribute_entry_ { * MSTRO__POOL__AVAL__VAL__NOT_SET */ char *serialized_yaml; /** string-version of the attribute value */ Mstro__Pool__AVal serialized_pb; /** protobuf-converted version of - * the attribute value */ + * the attribute value */ }; /** attribute dictionary */ @@ -882,6 +950,7 @@ struct partial_key { +/* parse VAL and fill in *entry */ static inline mstro_status mstro_attributes_parse_val(mstro_schema schema, @@ -900,12 +969,188 @@ mstro_attributes_parse_val(mstro_schema schema, if(s!=MSTRO_OK) { ERR("Failed to find type declaration for type |%s| (attribute |%s|)\n", tname, aname); + return MSTRO_INVARG; + } else { + DEBUG("Found type declaration for type |%s| (attribute |%s|)\n", tname, aname); + } + /* FIXME: this could be handled better by pre-allocated parsers + * (needs to be reentrant!) built with a schema-typeval.peg */ + if(!tdecl->parsed_type) { + ERR("Parsed type not available\n"); + return MSTRO_UNIMPL; + } + + /* all built-ins recognized by built-in regexps for now */ + size_t num_re; + if(tdecl->parsed_type->kind == MSTRO_STP_REGEX) { + num_re = tdecl->parsed_type->regex_numpatterns; + } else { + num_re = 1; + } + regex_t regex[num_re]; + int err=0; + bool need_regmatch = true; /* except for BLOB we need it */ + size_t minlen =0, maxlen=val_len; /* might be changed for strings */ + + switch(tdecl->parsed_type->kind) { + case MSTRO_STP_BOOL: { + entry->valsize = sizeof(bool); + err = regcomp(&(regex[0]), "1|0|On|Off|True|False",REG_ICASE|REG_NOSUB|REG_EXTENDED); + break; + } + case MSTRO_STP_UINT: { + entry->valsize = sizeof(uint64_t); + err = regcomp(&(regex[0]), "[+]?[[:blank:]]*[0-9]+",REG_EXTENDED|REG_NOSUB); + WARN("Not checking numeric bounds on types\n"); + break; + } + case MSTRO_STP_INT: { + entry->valsize = sizeof(int64_t); + err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]+",REG_EXTENDED|REG_NOSUB); + WARN("Not checking numeric bounds on types\n"); + break; + } + case MSTRO_STP_FLOAT: { + entry->valsize = sizeof(float); + err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?",REG_EXTENDED); + WARN("Not checking numeric bounds on types\n"); + break; + } + case MSTRO_STP_DOUBLE: { + entry->valsize = sizeof(double); + err = regcomp(&(regex[0]), "[-+]?[[:blank:]]*[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?",REG_EXTENDED); + WARN("Not checking numeric bounds on types\n"); + break; + } + case MSTRO_STP_STR: { + entry->valsize = sizeof(char)*(val_len+1); + minlen = tdecl->parsed_type->str_minlen; + maxlen = tdecl->parsed_type->str_maxlen; + char *re; + if(tdecl->parsed_type->str_excludedchars) { + size_t l = strlen(tdecl->parsed_type->str_excludedchars)+strlen("[^]*") + 1; + re=malloc(sizeof(char)*l); + if(re) { + re[0]='['; re[1]='^'; + strcpy(re+2,tdecl->parsed_type->str_excludedchars); + re[l-3] = ']'; re[l-2]='*'; re[l-1] = '\0'; + } + } else { + re = strdup(".*"); + } + if(re==NULL) { + ERR("Failed to allocate string regex\n"); + s=MSTRO_NOMEM; + goto BAILOUT; + } + err = regcomp(&(regex[0]), re, REG_NOSUB); + free(re); + break; + } + case MSTRO_STP_REGEX: + entry->valsize = sizeof(char)*(val_len+1); + for(size_t i=0; i<num_re; i++) { + err |= regcomp(&(regex[i]), tdecl->parsed_type->regex_patterns[i], + REG_EXTENDED|REG_NOSUB| (tdecl->parsed_type->regex_ignorecase ? REG_ICASE : 0)); + } + break; + case MSTRO_STP_BLOB: + entry->valsize = sizeof(char)*(val_len); + need_regmatch = false; + minlen = tdecl->parsed_type->blob_minlen; + maxlen = tdecl->parsed_type->blob_maxlen; + break; + default: + ERR("Unexpected parsed type %d\n", tdecl->parsed_type->kind); + s=MSTRO_UNIMPL; + } + if(err) { + ERR("Failed to construct regex\n"); + // regerror ... + } + if(! (minlen<=val_len && val_len <=maxlen)) { + ERR("Argument |%s| (len %zu, strlen %zu) not within length bounds for type: min=%zu, max=%zu\n", + val, val_len, strlen((const char*)val), minlen, maxlen); + s=MSTRO_FAIL; goto BAILOUT; } - - - s=MSTRO_UNIMPL; - + + s=MSTRO_OK; + if(need_regmatch) { + s=MSTRO_NOMATCH; + for(size_t i=0; i<num_re; i++) { + s = mstro_schema__check_regex((const char*)val, &(regex[i]), 0); + /* DEBUG("Checked regex against |%s|, result %d (%s)\n", val, s, mstro_status_description(s)); */ + if(s==MSTRO_OK) + break; + /* NOMATCH: try further */ + if(s==MSTRO_NOMATCH) { + continue; + } else { + break; + } + } + for(size_t i=0; i<num_re; i++) { + regfree(®ex[i]); + } + } + + entry->val = malloc(entry->valsize); + if(entry->val==NULL) { + ERR("Failed to allocate for attribute value\n"); + s=MSTRO_NOMEM; + goto BAILOUT; + } + + if(s==MSTRO_OK) { + switch(tdecl->parsed_type->kind) { + case MSTRO_STP_BOOL: { + /* we are alredy checked by regex above */ + switch(tolower(val[0])) { + case 't': // TRUE + case '1': // 1 + case 'o': // ON + case 'y': // YES + *((bool*)(entry->val)) = true; + break; + default: + *((bool*)(entry->val)) = false; + break; + } + break; + } + case MSTRO_STP_UINT: + *((uint64_t*)(entry->val)) = strtoumax((const char*)val, NULL, 10); + break; + + case MSTRO_STP_INT: + *((int64_t*)(entry->val)) = strtoimax((const char*)val, NULL, 10); + break; + + case MSTRO_STP_FLOAT: + *((float*)(entry->val)) = strtof((const char*)val, NULL); + break; + + case MSTRO_STP_DOUBLE: + *((double*)(entry->val)) = strtod((const char*)val, NULL); + break; + + case MSTRO_STP_STR: + case MSTRO_STP_REGEX: + strcpy((char*)entry->val, (const char*)val); + ((char*)(entry->val))[entry->valsize-1] = '\0'; + break; + + case MSTRO_STP_BLOB: + memcpy((char*)entry->val, val, val_len); + break; + + default: + ERR("Unexpected parsed type %d\n", tdecl->parsed_type->kind); + s=MSTRO_UNIMPL; + } + } + BAILOUT: return s; } @@ -919,7 +1164,21 @@ mstro_attributes__parse_helper(yaml_parser_t parser, { mstro_status status = MSTRO_UNIMPL; struct partial_key *keystack =NULL; - *result = NULL; + if(schema==NULL) { + ERR("Can't parse without schema\n"); + return MSTRO_INVARG; + } + if(result==NULL) { + ERR("NULL attribute dict\n"); + return MSTRO_INVOUT; + } + *result = malloc(sizeof(struct mstro_attribute_dict_)); + if(*result==NULL) { + ERR("Failed to allocate attribute dict\n"); + return MSTRO_NOMEM; + } + (*result)->dict=NULL; + (*result)->schema=NULL; /* for now, will be added when successful */ yaml_event_t event; @@ -1055,8 +1314,12 @@ mstro_attributes__parse_helper(yaml_parser_t parser, ERR("Failed to parse |%s| as value for attribute |%s|\n", val, mstro_symbol_name(decl->key_symbol)); goto BAILOUT; + } else { + DEBUG("Parsed |%s| as valid value for attribute |%s|\n", + val, mstro_symbol_name(decl->key_symbol)); + /* FIXME: describe_entry function call here */ } - + HASH_ADD(hh, (*result)->dict, key, sizeof(mstro_symbol), entry); DEBUG("Handled entry for %s, cleaning keystack\n", keystack->fqkey); @@ -1077,8 +1340,10 @@ mstro_attributes__parse_helper(yaml_parser_t parser, } while(event.type != YAML_STREAM_END_EVENT); yaml_event_delete(&event); + (*result)->schema = schema; status = MSTRO_OK; + /* char *last_key=NULL; */ /* enum { */ /* INVALID, */ diff --git a/tests/check_schema_parse.c b/tests/check_schema_parse.c index cc8b6f5ae477dbba9558326365e43b4a9cbc66ef..9ff3b258acf556f9d62887b93cbd24f6b447a564 100644 --- a/tests/check_schema_parse.c +++ b/tests/check_schema_parse.c @@ -43,7 +43,6 @@ #include <sys/stat.h> #include <errno.h> -#include "attribute_schema.h" #include "maestro-schema.h" #ifndef TOPSRCDIR