diff --git a/toardb/utils/prepare_OWLDoc.py b/toardb/utils/prepare_OWLDoc.py new file mode 100755 index 0000000000000000000000000000000000000000..158a61745915ea2564692dc9dc709fd2db7595d6 --- /dev/null +++ b/toardb/utils/prepare_OWLDoc.py @@ -0,0 +1,110 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +""" +OWLDoc preprocessing +==================== + +see: https://gitlab.version.fz-juelich.de/esde/toar-data/toardb_fastapi/-/issues/41 + +deleting all lines + from the first <div id='isdefinedby_(1)'> line + and including the last but one line </div><!-- usage (1) + --> [Note: The (1) could be a different number e.g. (3)] +There are cases where the usage line is not present +""" + +__author__ = "Sabine Schroeder" +__version__ = "0.5" +__date__ = "2021/03/24" +__maintainer__ = "Sabine Schroeder" +__email__ = "s.schroeder@fz-juelich.de" + +import sys +import os +import getopt + + +def usage(): + msg = ["Usage: prepare_OWLDoc.py options", + "Options: -h, --help help", + " -f, --filename str filename"] + print("\n".join([m for m in msg])) + + +def get_options(argv): + """analyse command line options and argument""" + + long_opt = ["help", "filename="] + try: + opts, args = getopt.gnu_getopt(argv, "hf:", long_opt) + except getopt.GetoptError as e: + print(str(e)) + usage() + sys.exit(2) + filename = '' + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-f", "--filename"): + filename = a + if filename == '': + usage() + sys.exit(2) + return filename + + +def shrink_file(filename): + + # open input file + rd = open (filename, "r") + + # open output file + + wr = open ("../individuals/"+filename, "w") + + # read list of lines + lines = rd.readlines() + + # determine lines to copy to output file + index_head = 0 + try: + index_head = lines.index("<div id='isdefinedby_(1)'>\n") + 1 + except ValueError as e: + pass + if index_head == 0: + print (f"no 'isdefinedby_(1)' found for file {filename} --> unchanged") + for line in lines: + wr.write(line) + else: + index_pos_list =[] + index_pos = 0 + while True: + try: + # Search for item in list from indexPos to the end of list + index_pos = lines.index("</div><!-- usage (1) -->\n", index_pos) + # Add the index position in list + index_pos_list.append(index_pos) + index_pos += 1 + except ValueError as e: + break + if len(index_pos_list) <= 1: + print (f"no or only one 'usage (1)' found for file {filename} --> unchanged") + for line in lines: + wr.write(line) + else: + index_tail = index_pos_list[-2] + for i in range(index_head+1): + wr.write(lines[i]) + for i in range(index_tail+1,len(lines),1): + wr.write(lines[i]) + + # Close file + rd.close() + + +if __name__ == "__main__": + + filename = get_options(sys.argv) + shrink_file(filename) + diff --git a/toardb/utils/prepare_OWLDoc.sh b/toardb/utils/prepare_OWLDoc.sh new file mode 100755 index 0000000000000000000000000000000000000000..763c5b7b7e6a8646609996e041c4ff171e7f4d60 --- /dev/null +++ b/toardb/utils/prepare_OWLDoc.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +for FILENAME in OWL*html +do + echo $FILENAME + ./prepare_OWLDoc.py -f "$FILENAME" +done