Skip to content
Snippets Groups Projects
Commit fe61d08d authored by Sabine Schröder's avatar Sabine Schröder
Browse files

#41: first script to shrink OWLdoc files

parent b308e02e
No related branches found
No related tags found
3 merge requests!10merge dev into testing,!9merge ontology to dev,!8merged Max's developments into ontology branch
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
OWLDoc preprocessing
====================
see: https://gitlab.version.fz-juelich.de/esde/toar-data/toardb_fastapi/-/issues/41
deleting all lines
from the first <div id='isdefinedby_(1)'> line
and including the last but one line </div><!-- usage (1)
--> [Note: The (1) could be a different number e.g. (3)]
There are cases where the usage line is not present
"""
__author__ = "Sabine Schroeder"
__version__ = "0.5"
__date__ = "2021/03/24"
__maintainer__ = "Sabine Schroeder"
__email__ = "s.schroeder@fz-juelich.de"
import sys
import os
import getopt
def usage():
msg = ["Usage: prepare_OWLDoc.py options",
"Options: -h, --help help",
" -f, --filename str filename"]
print("\n".join([m for m in msg]))
def get_options(argv):
"""analyse command line options and argument"""
long_opt = ["help", "filename="]
try:
opts, args = getopt.gnu_getopt(argv, "hf:", long_opt)
except getopt.GetoptError as e:
print(str(e))
usage()
sys.exit(2)
filename = ''
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-f", "--filename"):
filename = a
if filename == '':
usage()
sys.exit(2)
return filename
def shrink_file(filename):
# open input file
rd = open (filename, "r")
# open output file
wr = open ("../individuals/"+filename, "w")
# read list of lines
lines = rd.readlines()
# determine lines to copy to output file
index_head = 0
try:
index_head = lines.index("<div id='isdefinedby_(1)'>\n") + 1
except ValueError as e:
pass
if index_head == 0:
print (f"no 'isdefinedby_(1)' found for file {filename} --> unchanged")
for line in lines:
wr.write(line)
else:
index_pos_list =[]
index_pos = 0
while True:
try:
# Search for item in list from indexPos to the end of list
index_pos = lines.index("</div><!-- usage (1) -->\n", index_pos)
# Add the index position in list
index_pos_list.append(index_pos)
index_pos += 1
except ValueError as e:
break
if len(index_pos_list) <= 1:
print (f"no or only one 'usage (1)' found for file {filename} --> unchanged")
for line in lines:
wr.write(line)
else:
index_tail = index_pos_list[-2]
for i in range(index_head+1):
wr.write(lines[i])
for i in range(index_tail+1,len(lines),1):
wr.write(lines[i])
# Close file
rd.close()
if __name__ == "__main__":
filename = get_options(sys.argv)
shrink_file(filename)
#!/bin/bash
for FILENAME in OWL*html
do
echo $FILENAME
./prepare_OWLDoc.py -f "$FILENAME"
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment