diff --git a/toardb/utils/OntoComp.py b/toardb/utils/OntoComp.py new file mode 100644 index 0000000000000000000000000000000000000000..e24a06a27028137a58f338883ca7c6ae65a3e3bc --- /dev/null +++ b/toardb/utils/OntoComp.py @@ -0,0 +1,84 @@ +""" +Max Lensing 26.03.2021 +------------------------- +the script compares the entries of the TOARII-Ontology with the controlled vocabulary +requirements: +- rdflib (python package) +- VPN to the Forschungszentrum to be able to open the link to the controlled vocabulary +- ontology file (.owl) +- 'TOARII_ontology.py' class +------------------------- +""" + +__author__ = 'Max Lensing' +__email__ = 'm.lensing@fz-juelich.de' + +# output options: +# get all entries with tag (True/False) if they are equal in ontology and controlled vocabulary: +entriesOutput = False +# get only entries which are not equal in ontology and controlled vocabulary (only works if 'entriesOutput = False') +onlyWrongEntriesOutput = False +# get a list of all classnames: +classnamesListOutput = False +# get all classnames and the amount of the mistakes inside: +classnamesMistakesOutput = True +# get a total amount of mistakes counting the differences between ontology and controlled vocabulary: +totalMistakesOutput = True + + + +import TOARII_ontology +import requests + +insecure_ssl = False +controlled_vocabulary = requests.get('https://join-dev.fz-juelich.de:8443/controlled_vocabulary',verify=insecure_ssl) +ontology = TOARII_ontology.TOAR_Ontology('TOAR-II_Ontology_v0.4.owl') +controlled_vocabulary_elements = controlled_vocabulary.text[1:-1].split("]],") #splits the controlled vocabulary in groups with the classname and the refering entries + +i = 0 +equal = True +totalmistakes = 0 +classnames = [] + + +while i < len(controlled_vocabulary_elements): + mistakes = 0 + tmp = controlled_vocabulary_elements[i].split(":") #classname is seperated by ':' to the class elements + classname = tmp[0][1:-1] #extracting the classname + elements = tmp[1][2:].split("],[") #elements refering to classname + classnames.append(classname) #list of classnames in the controlled vocabulary (only relevant to get an overview of the classnames) + try: + ontology_class = ontology.get_class(classname) + except: + print(classname + ' can not be found in the ontology') + ontology_classentries = ontology.getEntries(ontology_class) #get a list of entries refering to the classname inside the ontology + j = 0 + while j < len(elements): + cv_se = (elements[j].split(",\""))[1][:-1] #splits up into the short elements of the controlled vocabulary + if cv_se not in ontology_classentries: + equal = False + mistakes += 1 + if onlyWrongEntriesOutput and not entriesOutput: + print(cv_se) + if entriesOutput: + print(cv_se + " : " + str(cv_se in ontology_classentries)) + j += 1 + if classnamesMistakesOutput: + if mistakes == 0: + print('No mistakes in ' + classname) + else: + print(classname + ' mistakes: ' + str(mistakes) + ' total entries: ' + str(len(elements))) + print('-------------------------') + totalmistakes += mistakes + i += 1 + +if classnamesListOutput: + print('List of classnames:') + print(classnames) +if totalMistakesOutput: + print('\nTotal mistakes: ' + str(totalmistakes)) +if equal: + print('Ontology and controlled vocabulary are equal :)') +else: + print('Ontology and controlled vocabulary are not equal :(') +