| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449 | 
							- #!/usr/bin/env python3
 
- #
 
- # updateDocumentToC.py
 
- #
 
- # Insert table of contents at top of Catch markdown documents.
 
- #
 
- # This script is distributed under the GNU General Public License v3.0
 
- #
 
- # It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
 
- # https://github.com/rasbt/markdown-toclify
 
- #
 
- from  __future__  import print_function
 
- import argparse
 
- import glob
 
- import os
 
- import re
 
- import sys
 
- from scriptCommon import catchPath
 
- # Configuration:
 
- minTocEntries = 4
 
- headingExcludeDefault = [1,3,4,5]  # use level 2 headers for at default
 
- headingExcludeRelease = [1,3,4,5]  # use level 1 headers for release-notes.md
 
- documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
 
- releaseNotesName = 'release-notes.md'
 
- contentTitle = '**Contents**'
 
- contentLineNo = 4
 
- contentLineNdx = contentLineNo - 1
 
- # End configuration
 
- VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
 
- def readLines(in_file):
 
-     """Returns a list of lines from a input markdown file."""
 
-     with open(in_file, 'r') as inf:
 
-         in_contents = inf.read().split('\n')
 
-     return in_contents
 
- def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
 
-     """Removes existing [back to top] links and <a id> tags."""
 
-     if not remove:
 
-         return lines[:]
 
-     out = []
 
-     for l in lines:
 
-         if l.startswith(remove):
 
-             continue
 
-         out.append(l)
 
-     return out
 
- def removeToC(lines):
 
-     """Removes existing table of contents starting at index contentLineNdx."""
 
-     if not lines[contentLineNdx ].startswith(contentTitle):
 
-         return lines[:]
 
-     result_top = lines[:contentLineNdx]
 
-     pos = contentLineNdx + 1
 
-     while lines[pos].startswith('['):
 
-         pos = pos + 1
 
-     result_bottom = lines[pos + 1:]
 
-     return result_top + result_bottom
 
- def dashifyHeadline(line):
 
-     """
 
-     Takes a header line from a Markdown document and
 
-     returns a tuple of the
 
-         '#'-stripped version of the head line,
 
-         a string version for <a id=''></a> anchor tags,
 
-         and the level of the headline as integer.
 
-     E.g.,
 
-     >>> dashifyHeadline('### some header lvl3')
 
-     ('Some header lvl3', 'some-header-lvl3', 3)
 
-     """
 
-     stripped_right = line.rstrip('#')
 
-     stripped_both = stripped_right.lstrip('#')
 
-     level = len(stripped_right) - len(stripped_both)
 
-     stripped_wspace = stripped_both.strip()
 
-     # GitHub's sluggification works in an interesting way
 
-     # 1) '+', '/', '(', ')' and so on are just removed
 
-     # 2) spaces are converted into '-' directly
 
-     # 3) multiple -- are not collapsed
 
-     dashified = ''
 
-     for c in stripped_wspace:
 
-         if c in VALIDS:
 
-             dashified += c.lower()
 
-         elif c.isspace():
 
-             dashified += '-'
 
-         else:
 
-             # Unknown symbols are just removed
 
-             continue
 
-     return [stripped_wspace, dashified, level]
 
- def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
 
-     """
 
-     Gets headlines from the markdown document and creates anchor tags.
 
-     Keyword arguments:
 
-         lines: a list of sublists where every sublist
 
-             represents a line from a Markdown document.
 
-         id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
 
-         back_links: if true, adds "back to top" links below each headline
 
-         exclude_h: header levels to exclude. E.g., [2, 3]
 
-             excludes level 2 and 3 headings.
 
-     Returns a tuple of 2 lists:
 
-         1st list:
 
-             A modified version of the input list where
 
-             <a id="some-header"></a> anchor tags where inserted
 
-             above the header lines (if github is False).
 
-         2nd list:
 
-             A list of 3-value sublists, where the first value
 
-             represents the heading, the second value the string
 
-             that was inserted assigned to the IDs in the anchor tags,
 
-             and the third value is an integer that represents the headline level.
 
-             E.g.,
 
-             [['some header lvl3', 'some-header-lvl3', 3], ...]
 
-     """
 
-     out_contents = []
 
-     headlines = []
 
-     for l in lines:
 
-         saw_headline = False
 
-         orig_len = len(l)
 
-         l_stripped = l.lstrip()
 
-         if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
 
-             # comply with new markdown standards
 
-             # not a headline if '#' not followed by whitespace '##no-header':
 
-             if not l.lstrip('#').startswith(' '):
 
-                 continue
 
-             # not a headline if more than 6 '#':
 
-             if len(l) - len(l.lstrip('#')) > 6:
 
-                 continue
 
-             # headers can be indented by at most 3 spaces:
 
-             if orig_len - len(l_stripped) > 3:
 
-                 continue
 
-             # ignore empty headers
 
-             if not set(l) - {'#', ' '}:
 
-                 continue
 
-             saw_headline = True
 
-             dashified = dashifyHeadline(l)
 
-             if not exclude_h or not dashified[-1] in exclude_h:
 
-                 if id_tag:
 
-                     id_tag = '<a class="mk-toclify" id="%s"></a>'\
 
-                               % (dashified[1])
 
-                     out_contents.append(id_tag)
 
-                 headlines.append(dashified)
 
-         out_contents.append(l)
 
-         if back_links and saw_headline:
 
-             out_contents.append('[[back to top](#table-of-contents)]')
 
-     return out_contents, headlines
 
- def positioningHeadlines(headlines):
 
-     """
 
-     Strips unnecessary whitespaces/tabs if first header is not left-aligned
 
-     """
 
-     left_just = False
 
-     for row in headlines:
 
-         if row[-1] == 1:
 
-             left_just = True
 
-             break
 
-     if not left_just:
 
-         for row in headlines:
 
-             row[-1] -= 1
 
-     return headlines
 
- def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
 
-     """
 
-     Creates the table of contents from the headline list
 
-     that was returned by the tagAndCollect function.
 
-     Keyword Arguments:
 
-         headlines: list of lists
 
-             e.g., ['Some header lvl3', 'some-header-lvl3', 3]
 
-         hyperlink: Creates hyperlinks in Markdown format if True,
 
-             e.g., '- [Some header lvl1](#some-header-lvl1)'
 
-         top_link: if True, add a id tag for linking the table
 
-             of contents itself (for the back-to-top-links)
 
-         no_toc_header: suppresses TOC header if True.
 
-     Returns  a list of headlines for a table of contents
 
-     in Markdown format,
 
-     e.g., ['        - [Some header lvl3](#some-header-lvl3)', ...]
 
-     """
 
-     processed = []
 
-     if not no_toc_header:
 
-         if top_link:
 
-             processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
 
-         processed.append(contentTitle + '<br>')
 
-     for line in headlines:
 
-         if hyperlink:
 
-             item = '[%s](#%s)' % (line[0], line[1])
 
-         else:
 
-             item = '%s- %s' % ((line[2]-1)*'    ', line[0])
 
-         processed.append(item + '<br>')
 
-     processed.append('\n')
 
-     return processed
 
- def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
 
-     """
 
-     Returns a string with the Markdown output contents incl.
 
-     the table of contents.
 
-     Keyword arguments:
 
-         toc_headlines: lines for the table of contents
 
-             as created by the createToc function.
 
-         body: contents of the Markdown file including
 
-             ID-anchor tags as returned by the
 
-             tagAndCollect function.
 
-         spacer: Adds vertical space after the table
 
-             of contents. Height in pixels.
 
-         placeholder: If a placeholder string is provided, the placeholder
 
-             will be replaced by the TOC instead of inserting the TOC at
 
-             the top of the document
 
-     """
 
-     if spacer:
 
-         spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
 
-         toc_markdown = "\n".join(toc_headlines + spacer_line)
 
-     else:
 
-         toc_markdown = "\n".join(toc_headlines)
 
-     if placeholder:
 
-         body_markdown = "\n".join(body)
 
-         markdown = body_markdown.replace(placeholder, toc_markdown)
 
-     else:
 
-         body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
 
-         body_markdown_p2 = "\n".join(body[ contentLineNdx:])
 
-         markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
 
-     return markdown
 
- def outputMarkdown(markdown_cont, output_file):
 
-     """
 
-     Writes to an output file if `outfile` is a valid path.
 
-     """
 
-     if output_file:
 
-         with open(output_file, 'w') as out:
 
-             out.write(markdown_cont)
 
- def markdownToclify(
 
-     input_file,
 
-     output_file=None,
 
-     min_toc_len=2,
 
-     github=False,
 
-     back_to_top=False,
 
-     nolink=False,
 
-     no_toc_header=False,
 
-     spacer=0,
 
-     placeholder=None,
 
-     exclude_h=None):
 
-     """ Function to add table of contents to markdown files.
 
-     Parameters
 
-     -----------
 
-       input_file: str
 
-         Path to the markdown input file.
 
-       output_file: str (default: None)
 
-         Path to the markdown output file.
 
-       min_toc_len: int (default: 2)
 
-         Miniumum number of entries to create a table of contents for.
 
-       github: bool (default: False)
 
-         Uses GitHub TOC syntax if True.
 
-       back_to_top: bool (default: False)
 
-         Inserts back-to-top links below headings if True.
 
-       nolink: bool (default: False)
 
-         Creates the table of contents without internal links if True.
 
-       no_toc_header: bool (default: False)
 
-         Suppresses the Table of Contents header if True
 
-       spacer: int (default: 0)
 
-         Inserts horizontal space (in pixels) after the table of contents.
 
-       placeholder: str (default: None)
 
-         Inserts the TOC at the placeholder string instead
 
-         of inserting the TOC at the top of the document.
 
-       exclude_h: list (default None)
 
-         Excludes header levels, e.g., if [2, 3], ignores header
 
-         levels 2 and 3 in the TOC.
 
-     Returns
 
-     -----------
 
-     changed: Boolean
 
-       True if the file has been updated, False otherwise.
 
-     """
 
-     cleaned_contents = removeLines(
 
-         removeToC(readLines(input_file)),
 
-         remove=('[[back to top]', '<a class="mk-toclify"'))
 
-     processed_contents, raw_headlines = tagAndCollect(
 
-         cleaned_contents,
 
-         id_tag=not github,
 
-         back_links=back_to_top,
 
-         exclude_h=exclude_h)
 
-     # add table of contents?
 
-     if len(raw_headlines) < min_toc_len:
 
-         processed_headlines = []
 
-     else:
 
-         leftjustified_headlines = positioningHeadlines(raw_headlines)
 
-         processed_headlines = createToc(
 
-             leftjustified_headlines,
 
-             hyperlink=not nolink,
 
-             top_link=not nolink and not github,
 
-             no_toc_header=no_toc_header)
 
-     if nolink:
 
-         processed_contents = cleaned_contents
 
-     cont = buildMarkdown(
 
-         toc_headlines=processed_headlines,
 
-         body=processed_contents,
 
-         spacer=spacer,
 
-         placeholder=placeholder)
 
-     if output_file:
 
-         outputMarkdown(cont, output_file)
 
- def isReleaseNotes(f):
 
-     return os.path.basename(f) == releaseNotesName
 
- def excludeHeadingsFor(f):
 
-     return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
 
- def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
 
-     """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
 
-     if verbose :
 
-         print( 'file: {}'.format(input_file))
 
-     output_file = input_file + '.tmp'
 
-     markdownToclify(
 
-         input_file=input_file,
 
-         output_file=output_file,
 
-         min_toc_len=min_toc_len,
 
-         github=True,
 
-         back_to_top=False,
 
-         nolink=False,
 
-         no_toc_header=False,
 
-         spacer=False,
 
-         placeholder=False,
 
-         exclude_h=excludeHeadingsFor(input_file))
 
-     # prevent race-condition (Python 3.3):
 
-     if sys.version_info >= (3, 3):
 
-         os.replace(output_file, input_file)
 
-     else:
 
-         os.remove(input_file)
 
-         os.rename(output_file, input_file)
 
-     return 1
 
- def updateDocumentToC(paths, min_toc_len, verbose):
 
-     """Add or update table of contents to specified paths. Return number of changed files"""
 
-     n = 0
 
-     for g in paths:
 
-         for f in glob.glob(g):
 
-             if os.path.isfile(f):
 
-                 n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
 
-     return n
 
- def updateDocumentToCMain():
 
-     """Add or update table of contents to specified paths."""
 
-     parser = argparse.ArgumentParser(
 
-         description='Add or update table of contents in markdown documents.',
 
-         epilog="""""",
 
-         formatter_class=argparse.RawTextHelpFormatter)
 
-     parser.add_argument(
 
-         'Input',
 
-         metavar='file',
 
-         type=str,
 
-         nargs=argparse.REMAINDER,
 
-         help='files to process, at default: docs/*.md')
 
-     parser.add_argument(
 
-         '-v', '--verbose',
 
-         action='store_true',
 
-         help='report the name of the file being processed')
 
-     parser.add_argument(
 
-         '--min-toc-entries',
 
-         dest='minTocEntries',
 
-         default=minTocEntries,
 
-         type=int,
 
-         metavar='N',
 
-         help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries))
 
-     parser.add_argument(
 
-         '--remove-toc',
 
-         action='store_const',
 
-         dest='minTocEntries',
 
-         const=99,
 
-         help='remove all tables of contents')
 
-     args = parser.parse_args()
 
-     paths = args.Input if args.Input else [documentsDefault]
 
-     changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
 
-     if changedFiles > 0:
 
-         print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
 
-     else:
 
-         print( "No table of contents added or updated" )
 
- if __name__ == '__main__':
 
-     updateDocumentToCMain()
 
- # end of file
 
 
  |