Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
generateIndex.py 13.47 KiB
import os, re
from os import path
from natsort import natsorted
from pathlib import Path

def line_prepender(filename, line):
    with open(filename, 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        f.write(line.rstrip('\r\n') + '\n' + content)

def build_link(title, href):

    # strip the number of the title
    if ('handbook' in href):
        title = re.sub('[0-9.]', '', title).strip()

    # add relative url
    href = "{{ '" + href + "' | relative_url }}"

    return f'\t\t\t<li><a href="{href}">{title}</a></li>\n'

def root_handbook(href):
    # remove sym link reference in href
    if ('handbook-additional' in href):
        href = href.replace('handbook-additional', 'handbook').strip()
    elif ('handbook-annexes' in href):
        href = href.replace('handbook-annexes', 'handbook').strip()

    return href

def root_lab(href):
    # remove sym link reference in href
    if ('lab-software' in href):
        href = href.replace('lab-software', 'lab').strip()
    elif ('lab-equipment' in href):
        href = href.replace('lab-equipment', 'lab').strip()
    elif ('lab-hsa' in href):
        href = href.replace('lab-hsa', 'lab').strip()

    return href

def build_section_start(title, shortcut):

    title = title.replace("Gdpr", "GDPR")
    title = title.replace("Handbook additional", "Handbook: PI/Supervisor specifics")
    title = title.replace("Handbook annexes", "Handbook: Annexes")
    title = title.replace("Covid 19", "COVID-19")
    title = title.replace("Ppc", "PPC")
    title = title.replace("Lab software", "Lab: Software")
    title = title.replace("Lab equipment", "Lab: Equipment")
    title = title.replace("Lab hsa", "Lab: Health & Safety, Access")

    return f'\n\t<div class="index-box noborderbox" id="{shortcut}-card">\n\t\t<h3>{title}</h3>\n\t\t<ul>\n'

def build_section_end():
    return "\t\t</ul>\n\t</div>"

def save_tag(localroot, root, filename, tag):
    return_tag = ""
    os.chdir(root)

    length_tag = len(tag)

    with open(filename, 'r') as f:
        for line in f:
            # check for the start of the section
            if line[0:length_tag+1] == tag + ":":
                return_tag = line[length_tag+2:]
                break

    # change back to the local root
    os.chdir(localroot)

    return return_tag.rstrip()

def get_ignore():
    return ["404.html",
            "Gemfile",
            "Gemfile.lock",
            "README.md",
            "_config.yml",
            "_site",
            "assets",
            "help.md",
            "index.md",
            "pagination.md"
            ]

def save_legacy_from(localroot, root, filename):
    legacy_from = []
    os.chdir(root)

    count = 0
    legacy_from_flag = False
    with open(filename, 'r') as f:
        for line in f:
            count += 1
            # check for the start of the section
            if line[0:12] == "legacy_from:":
                legacy_from_flag = True

            # append lines from the legacy section
            if legacy_from_flag:
                legacy_from.append(line)

            # check for the end of the header
            if legacy_from_flag and line[0:3] == "---":
               legacy_from_flag = False
               break

    # change back to the local root
    os.chdir(localroot)

    return legacy_from

def remove_header(localroot, root, filename):
    nfirstlines = []

    os.chdir(root)

    # count the number of lines
    count = 0
    n = 0
    headerCheck = False
    with open(filename, 'r') as f:
        for line in f:
            count += 1

            # check if the header is actually a header
            if count > 1 and line[0:3] == "---":
                headerCheck = True
                n = count

    # remove the header
    if headerCheck:
        with open(filename) as f, open("tmp"+filename, "w") as out:
            for _ in range(n):
                nfirstlines.append(next(f))
            for line in f:
                out.write(line)

        os.remove(filename)
        os.rename("tmp"+filename, filename)
        print(" - Old header removed.")

    # change back to the local root
    os.chdir(localroot)

    return n

def generate_header(folder, permalink, shortcut, order, legacy_from, title, description):
    header = "---\n"

    if len(order) > 0:
        header += "card_order: " + str(order) + "\n"

    header += "layout: page\n"
    header += "permalink: " + permalink + "\n"
    header += "shortcut: " + root_handbook(shortcut) + "\n"
    header += "redirect_from:\n"
    header += "  - /cards/" + shortcut + "\n"
    header += "  - /" + folder + "/cards/" + shortcut + "\n"
    # special case of handbook
    if "handbook-annexes" in permalink or "handbook-additional" in permalink:
        header += "  - /cards/" + root_handbook(shortcut) + "\n"
        header += "  - /" + folder + "/cards/" + root_handbook(shortcut) + "\n"
    # separate lab section
    if "lab-software" in permalink or "lab-equipment" in permalink or "lab-hsa" in permalink:
        header += "  - /cards/" + root_lab(shortcut) + "\n"
        header += "  - /" + folder + "/cards/" + root_lab(shortcut) + "\n"

    # include the legacy section in the redirect_from section
    if len(legacy_from) > 0:
        for item in legacy_from[1:-1]:
            header += str(item)

    # add the title and description
    if len(title) > 0:
        header += "title: " + title + "\n"
    if len(description) > 0:
        header += "description: " + description + "\n"

    # include the legacy section
    if len(legacy_from) > 0:
        for item in legacy_from:
            header += str(item)
    else:
        header += "---"

    return header

def generate_whitelist_entry(folder, permalink, shortcut):
    wl_entry = permalink + "\n"
    wl_entry += "/?" + shortcut + "\n"
    wl_entry += "/cards/" + shortcut + "\n"
    wl_entry += "/" + folder + "/cards/" + shortcut + "\n"
    if "handbook-annexes" in permalink or "handbook-additional" in permalink:
        wl_entry += "/cards/" + root_handbook(shortcut) + "\n"
        wl_entry += "/" + folder + "/cards/" + root_handbook(shortcut) + "\n"
    # separate lab section
    if "lab-software" in permalink or "lab-equipment" in permalink or "lab-hsa" in permalink:
        wl_entry += "/cards/" + root_lab(shortcut) + "\n"
        wl_entry += "/" + folder + "/cards/" + root_lab(shortcut) + "\n"

    return wl_entry

# loop through the entire internal tree
localroot = os.getcwd()

# retrieve ignore list
ignore = get_ignore()

# generate the index properly speaking
cardDirs = ["internal", "external", "policies"]
sections = []

# determine first the directories
for direct in cardDirs:
    if path.isdir(direct):
        dirs = os.listdir(direct)
        dirs = natsorted(dirs)

        for d in dirs:
            if d[0] != "." and d not in ignore:
                sections.append(d)

sections = list(set(sections))
sections = natsorted(sections)

# Index contains the generated content, init it with an empty container
index = ''
index += '\n<div class="index-box-container">\n'
whiteList = ''

localIndexArr = {k: [] for k in range(len(sections))}
orderArr = {k: [] for k in range(len(sections))}

for folder in cardDirs:
    # FolderFlag gets set to true at the first iteration
    folderFlag = True

    # check if folder exists
    if path.isdir(folder) and folder not in ignore:
        dirs = os.listdir(folder)
        dirs = natsorted(dirs)

        for d in dirs:
            if d[0] != "." and d not in ignore:
                # set the header of the section
                #index += "\n### " + d.replace("-", " ").capitalize() + "\n"

                # get the index of the section
                indexS = sections.index(d)
                maxOrder = 0

                if len(localIndexArr[indexS]) == 0:
                    localIndexArr[indexS] = ["\n"]

                # walk through the folders with all the cards
                for root, dirs, files in os.walk(folder+"/"+d):
                    for file in files:
                        if file.endswith(".md"):
                            fileName = os.path.join(root, file)

                            # ignore subsections (.md files that start with _)
                            if file[0] != "_":
                                print(" > Generating header for: " + fileName)

                                # save order and legacy section
                                order = save_tag(localroot, root, file, "card_order")
                                legacy_from = save_legacy_from(localroot, root, file)
                                title = save_tag(localroot, root, file, "title")
                                description = save_tag(localroot, root, file, "description")

                                # remove the previous header
                                n = remove_header(localroot, root, file)

                                # generate a permalink
                                permalink = "/" + root + "/"

                                # generate the shortcut
                                shortcut = re.sub(folder, '', root)

                                # remove the first /
                                shortcut = shortcut[1:]

                                # replace the / with a :
                                shortcut = re.sub('/', ':', shortcut)

                                if len(order) > 0:
                                     # find the maximum of existing orders
                                    if folderFlag:
                                        if len(orderArr[indexS]) > 0:
                                            maxOrder = max(orderArr[indexS])
                                        else:
                                            maxOrder = 0
                                        # after determining the max order, set the folder flag to False to avoid another entry into the same block of code
                                        folderFlag = False

                                    tmp = orderArr[indexS].copy()
                                    tmp.append(maxOrder + int(order))
                                    orderArr[indexS] = tmp
                                else:
                                    orderArr[indexS] = []

                                # generate the header for each card
                                header = generate_header(folder, permalink, shortcut, order, legacy_from, title, description)

                                # add autogenerated links to whitelist
                                whiteList += generate_whitelist_entry(folder, permalink, shortcut)

                                # add the header properly speaking
                                line_prepender(fileName, header)

                                # open file and get the title after the header
                                count = 0
                                title = ""
                                bp = n + 1

                                with open(fileName, 'r') as f:
                                    for line in f:
                                        count += 1
                                        if count == bp:
                                            if len(line) > 2:
                                                title = line
                                                break
                                            else:
                                                bp += 1

                                # remove first and last chars
                                title = title.rstrip("\n\r")
                                title = title[2:]

                                localIndexArr[indexS].append(build_link(title, root))

                                # output
                                print(" + New header added.")
                                print("-----------------------")


# ordering of cards
for d in sections:
    indexS = sections.index(d)

    # join all subcategories to the index
    # if all subcategories have a predefined order
    if len(orderArr[indexS]) == len(localIndexArr[indexS])-1 and len(orderArr[indexS]) > 0:
        print("")
        X = localIndexArr[indexS][1:]
        Y = orderArr[indexS]
        localIndexArr[indexS] = [x for _, x in sorted(zip(Y, X))]
    # natural sorting otherwise
    else:
        localIndexArr[indexS] = natsorted(localIndexArr[indexS])

print(localIndexArr)

# determine the index
k = 0
for s in sections:
    index += build_section_start(s.replace("-", " ").capitalize(), s)
    index += ''.join(localIndexArr[k])
    index += build_section_end()
    k += 1

# close the container
index += "\n</div>"

## add link to return to main index
index += """<br><center><a href="{{ '/' | relative_url }}">go back</a></center>"""
index += """<br><center><a href="{{ '/cards' | relative_url }}">Overview of all HowTo cards</a></center>"""

# output the index
#print(index)
# Read in the file
indexFile = "cards.md"
filedata = ""
with open(indexFile, 'r') as file :
    for line in file:
        filedata += line

        # stop reading once the index place holder has been reached
        if re.search("<!-- index -->", line):
            filedata += "[[ index ]]"
            break

# Replace the target string
filedata = filedata.replace('[[ index ]]', index)

# Write the file out again
with open(indexFile, 'w') as file:
  file.write(filedata)

print("\n > New index generated and saved in " + indexFile)

# write link whitelist out
whiteListFile = ".ci/whitelist.txt"
if Path(whiteListFile).exists():
    with open(whiteListFile, 'r') as file :
        for line in file:
            whiteList += line

with open(whiteListFile, 'w') as file:
    file.write(whiteList)