123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449 |
- #!/usr/bin/env python3
- #
- # updateDocumentToC.py
- #
- # Insert table of contents at top of Catch markdown documents.
- #
- # This script is distributed under the GNU General Public License v3.0
- #
- # It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
- # https://github.com/rasbt/markdown-toclify
- #
- from __future__ import print_function
- import argparse
- import glob
- import os
- import re
- import sys
- from scriptCommon import catchPath
- # Configuration:
- minTocEntries = 4
- headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
- headingExcludeRelease = [1,3,4,5] # use level 1 headers for release-notes.md
- documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
- releaseNotesName = 'release-notes.md'
- contentTitle = '**Contents**'
- contentLineNo = 4
- contentLineNdx = contentLineNo - 1
- # End configuration
- VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
- def readLines(in_file):
- """Returns a list of lines from a input markdown file."""
- with open(in_file, 'r') as inf:
- in_contents = inf.read().split('\n')
- return in_contents
- def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
- """Removes existing [back to top] links and <a id> tags."""
- if not remove:
- return lines[:]
- out = []
- for l in lines:
- if l.startswith(remove):
- continue
- out.append(l)
- return out
- def removeToC(lines):
- """Removes existing table of contents starting at index contentLineNdx."""
- if not lines[contentLineNdx ].startswith(contentTitle):
- return lines[:]
- result_top = lines[:contentLineNdx]
- pos = contentLineNdx + 1
- while lines[pos].startswith('['):
- pos = pos + 1
- result_bottom = lines[pos + 1:]
- return result_top + result_bottom
- def dashifyHeadline(line):
- """
- Takes a header line from a Markdown document and
- returns a tuple of the
- '#'-stripped version of the head line,
- a string version for <a id=''></a> anchor tags,
- and the level of the headline as integer.
- E.g.,
- >>> dashifyHeadline('### some header lvl3')
- ('Some header lvl3', 'some-header-lvl3', 3)
- """
- stripped_right = line.rstrip('#')
- stripped_both = stripped_right.lstrip('#')
- level = len(stripped_right) - len(stripped_both)
- stripped_wspace = stripped_both.strip()
- # GitHub's sluggification works in an interesting way
- # 1) '+', '/', '(', ')' and so on are just removed
- # 2) spaces are converted into '-' directly
- # 3) multiple -- are not collapsed
- dashified = ''
- for c in stripped_wspace:
- if c in VALIDS:
- dashified += c.lower()
- elif c.isspace():
- dashified += '-'
- else:
- # Unknown symbols are just removed
- continue
- return [stripped_wspace, dashified, level]
- def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
- """
- Gets headlines from the markdown document and creates anchor tags.
- Keyword arguments:
- lines: a list of sublists where every sublist
- represents a line from a Markdown document.
- id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
- back_links: if true, adds "back to top" links below each headline
- exclude_h: header levels to exclude. E.g., [2, 3]
- excludes level 2 and 3 headings.
- Returns a tuple of 2 lists:
- 1st list:
- A modified version of the input list where
- <a id="some-header"></a> anchor tags where inserted
- above the header lines (if github is False).
- 2nd list:
- A list of 3-value sublists, where the first value
- represents the heading, the second value the string
- that was inserted assigned to the IDs in the anchor tags,
- and the third value is an integer that represents the headline level.
- E.g.,
- [['some header lvl3', 'some-header-lvl3', 3], ...]
- """
- out_contents = []
- headlines = []
- for l in lines:
- saw_headline = False
- orig_len = len(l)
- l_stripped = l.lstrip()
- if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
- # comply with new markdown standards
- # not a headline if '#' not followed by whitespace '##no-header':
- if not l.lstrip('#').startswith(' '):
- continue
- # not a headline if more than 6 '#':
- if len(l) - len(l.lstrip('#')) > 6:
- continue
- # headers can be indented by at most 3 spaces:
- if orig_len - len(l_stripped) > 3:
- continue
- # ignore empty headers
- if not set(l) - {'#', ' '}:
- continue
- saw_headline = True
- dashified = dashifyHeadline(l)
- if not exclude_h or not dashified[-1] in exclude_h:
- if id_tag:
- id_tag = '<a class="mk-toclify" id="%s"></a>'\
- % (dashified[1])
- out_contents.append(id_tag)
- headlines.append(dashified)
- out_contents.append(l)
- if back_links and saw_headline:
- out_contents.append('[[back to top](#table-of-contents)]')
- return out_contents, headlines
- def positioningHeadlines(headlines):
- """
- Strips unnecessary whitespaces/tabs if first header is not left-aligned
- """
- left_just = False
- for row in headlines:
- if row[-1] == 1:
- left_just = True
- break
- if not left_just:
- for row in headlines:
- row[-1] -= 1
- return headlines
- def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
- """
- Creates the table of contents from the headline list
- that was returned by the tagAndCollect function.
- Keyword Arguments:
- headlines: list of lists
- e.g., ['Some header lvl3', 'some-header-lvl3', 3]
- hyperlink: Creates hyperlinks in Markdown format if True,
- e.g., '- [Some header lvl1](#some-header-lvl1)'
- top_link: if True, add a id tag for linking the table
- of contents itself (for the back-to-top-links)
- no_toc_header: suppresses TOC header if True.
- Returns a list of headlines for a table of contents
- in Markdown format,
- e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
- """
- processed = []
- if not no_toc_header:
- if top_link:
- processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
- processed.append(contentTitle + '<br>')
- for line in headlines:
- if hyperlink:
- item = '[%s](#%s)' % (line[0], line[1])
- else:
- item = '%s- %s' % ((line[2]-1)*' ', line[0])
- processed.append(item + '<br>')
- processed.append('\n')
- return processed
- def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
- """
- Returns a string with the Markdown output contents incl.
- the table of contents.
- Keyword arguments:
- toc_headlines: lines for the table of contents
- as created by the createToc function.
- body: contents of the Markdown file including
- ID-anchor tags as returned by the
- tagAndCollect function.
- spacer: Adds vertical space after the table
- of contents. Height in pixels.
- placeholder: If a placeholder string is provided, the placeholder
- will be replaced by the TOC instead of inserting the TOC at
- the top of the document
- """
- if spacer:
- spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
- toc_markdown = "\n".join(toc_headlines + spacer_line)
- else:
- toc_markdown = "\n".join(toc_headlines)
- if placeholder:
- body_markdown = "\n".join(body)
- markdown = body_markdown.replace(placeholder, toc_markdown)
- else:
- body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
- body_markdown_p2 = "\n".join(body[ contentLineNdx:])
- markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
- return markdown
- def outputMarkdown(markdown_cont, output_file):
- """
- Writes to an output file if `outfile` is a valid path.
- """
- if output_file:
- with open(output_file, 'w') as out:
- out.write(markdown_cont)
- def markdownToclify(
- input_file,
- output_file=None,
- min_toc_len=2,
- github=False,
- back_to_top=False,
- nolink=False,
- no_toc_header=False,
- spacer=0,
- placeholder=None,
- exclude_h=None):
- """ Function to add table of contents to markdown files.
- Parameters
- -----------
- input_file: str
- Path to the markdown input file.
- output_file: str (default: None)
- Path to the markdown output file.
- min_toc_len: int (default: 2)
- Miniumum number of entries to create a table of contents for.
- github: bool (default: False)
- Uses GitHub TOC syntax if True.
- back_to_top: bool (default: False)
- Inserts back-to-top links below headings if True.
- nolink: bool (default: False)
- Creates the table of contents without internal links if True.
- no_toc_header: bool (default: False)
- Suppresses the Table of Contents header if True
- spacer: int (default: 0)
- Inserts horizontal space (in pixels) after the table of contents.
- placeholder: str (default: None)
- Inserts the TOC at the placeholder string instead
- of inserting the TOC at the top of the document.
- exclude_h: list (default None)
- Excludes header levels, e.g., if [2, 3], ignores header
- levels 2 and 3 in the TOC.
- Returns
- -----------
- changed: Boolean
- True if the file has been updated, False otherwise.
- """
- cleaned_contents = removeLines(
- removeToC(readLines(input_file)),
- remove=('[[back to top]', '<a class="mk-toclify"'))
- processed_contents, raw_headlines = tagAndCollect(
- cleaned_contents,
- id_tag=not github,
- back_links=back_to_top,
- exclude_h=exclude_h)
- # add table of contents?
- if len(raw_headlines) < min_toc_len:
- processed_headlines = []
- else:
- leftjustified_headlines = positioningHeadlines(raw_headlines)
- processed_headlines = createToc(
- leftjustified_headlines,
- hyperlink=not nolink,
- top_link=not nolink and not github,
- no_toc_header=no_toc_header)
- if nolink:
- processed_contents = cleaned_contents
- cont = buildMarkdown(
- toc_headlines=processed_headlines,
- body=processed_contents,
- spacer=spacer,
- placeholder=placeholder)
- if output_file:
- outputMarkdown(cont, output_file)
- def isReleaseNotes(f):
- return os.path.basename(f) == releaseNotesName
- def excludeHeadingsFor(f):
- return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
- def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
- """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
- if verbose :
- print( 'file: {}'.format(input_file))
- output_file = input_file + '.tmp'
- markdownToclify(
- input_file=input_file,
- output_file=output_file,
- min_toc_len=min_toc_len,
- github=True,
- back_to_top=False,
- nolink=False,
- no_toc_header=False,
- spacer=False,
- placeholder=False,
- exclude_h=excludeHeadingsFor(input_file))
- # prevent race-condition (Python 3.3):
- if sys.version_info >= (3, 3):
- os.replace(output_file, input_file)
- else:
- os.remove(input_file)
- os.rename(output_file, input_file)
- return 1
- def updateDocumentToC(paths, min_toc_len, verbose):
- """Add or update table of contents to specified paths. Return number of changed files"""
- n = 0
- for g in paths:
- for f in glob.glob(g):
- if os.path.isfile(f):
- n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
- return n
- def updateDocumentToCMain():
- """Add or update table of contents to specified paths."""
- parser = argparse.ArgumentParser(
- description='Add or update table of contents in markdown documents.',
- epilog="""""",
- formatter_class=argparse.RawTextHelpFormatter)
- parser.add_argument(
- 'Input',
- metavar='file',
- type=str,
- nargs=argparse.REMAINDER,
- help='files to process, at default: docs/*.md')
- parser.add_argument(
- '-v', '--verbose',
- action='store_true',
- help='report the name of the file being processed')
- parser.add_argument(
- '--min-toc-entries',
- dest='minTocEntries',
- default=minTocEntries,
- type=int,
- metavar='N',
- help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries))
- parser.add_argument(
- '--remove-toc',
- action='store_const',
- dest='minTocEntries',
- const=99,
- help='remove all tables of contents')
- args = parser.parse_args()
- paths = args.Input if args.Input else [documentsDefault]
- changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
- if changedFiles > 0:
- print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
- else:
- print( "No table of contents added or updated" )
- if __name__ == '__main__':
- updateDocumentToCMain()
- # end of file
|