#!/usr/bin/env python3 # Copyright 2012-2022 David Robillard # SPDX-License-Identifier: ISC """ A simple literate programming tool for C, C++, and Turtle. Unlike many LP tools, this tool uses normal source code as input, there is no tangle/weave and no special file format. The literate parts of the program are written in comments, which are emitted as paragraphs of regular text interleaved with code. Asciidoc is both the comment and output syntax. """ import os import re import sys def format_text(text): "Format a text (comment) fragment and return it as a marked up string." return "\n\n" + re.sub("\n *", "\n", text.strip()) + "\n\n" def format_code(lang, code): "Format a block of code and return it as a marked up string." if code.strip() == "": return code head = f"[source,{lang}]" code = code.strip("\n") sep = "-" * len(head) return "\n".join([head, sep, code, sep]) + "\n" def format_c_source(filename, in_file): "Format an annotated C source file as a marked up string." output = f"=== {os.path.basename(filename)} ===\n" chunk = "" prev_c = 0 in_comment = False in_comment_start = False n_stars = 0 code = "".join(in_file) # Skip initial license comment if code[0:2] == "/*": end = code.find("*/") + 2 code = code[end:] def last_chunk(chunk): length = len(chunk) - 1 return chunk[0:length] for char in code: if prev_c == "/" and char == "*": in_comment_start = True n_stars = 1 elif in_comment_start: if char == "*": n_stars += 1 else: if n_stars > 1: output += format_code("c", last_chunk(chunk)) chunk = "" in_comment = True else: chunk += "*" + char in_comment_start = False elif in_comment and prev_c == "*" and char == "/": if n_stars > 1: output += format_text(last_chunk(chunk)) else: output += format_code("c", "/* " + last_chunk(chunk) + "*/") in_comment = False in_comment_start = False chunk = "" else: chunk += char prev_c = char return output + format_code("c", chunk) def format_ttl_source(filename, in_file): "Format an annotated Turtle source file as a marked up string." output = f"=== {os.path.basename(filename)} ===\n" in_comment = False chunk = "" for line in in_file: is_comment = line.strip().startswith("#") if in_comment: if is_comment: chunk += line.strip().lstrip("# ") + " \n" else: output += format_text(chunk) in_comment = False chunk = line else: if is_comment: output += format_code("turtle", chunk) in_comment = True chunk = line.strip().lstrip("# ") + " \n" else: chunk += line if in_comment: return output + format_text(chunk) return output + format_code("turtle", chunk) def gen(out, filenames): "Write markup generated from filenames to an output file." for filename in filenames: with open(filename, "r", encoding="utf-8") as in_file: if filename.endswith(".c") or filename.endswith(".h"): out.write(format_c_source(filename, in_file)) elif filename.endswith(".ttl") or filename.endswith(".ttl.in"): out.write(format_ttl_source(filename, in_file)) elif filename.endswith(".txt"): for line in in_file: out.write(line) out.write("\n") else: sys.stderr.write( f"Unknown source format `{filename.splitext()[1]}`\n" ) if __name__ == "__main__": if len(sys.argv) < 2: sys.stderr.write(f"Usage: {sys.argv[0]} OUT_FILE IN_FILE...\n") sys.exit(1) with open(sys.argv[1], "w", encoding="utf-8") as out_file: gen(out_file, sys.argv[2:])