#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright © 2016 Mathieu Duponchelle <mathieu.duponchelle@opencreed.com>
# Copyright © 2016 Collabora Ltd
#
# This library is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation; either version 2.1 of the License, or (at your option)
# any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library.  If not, see <http://www.gnu.org/licenses/>.

import os
import re
import sys
import codecs
import urllib.request
import urllib.parse
import urllib.error
import argparse
import urllib.parse
import shutil
import subprocess
import io
import cgi

from copy import deepcopy

import yaml

from lxml import etree

from hotdoc.utils.utils import OrderedSet, recursive_overwrite
from hotdoc.run_hotdoc import Application
from hotdoc.core.symbols import *
from hotdoc.core.config import Config
from hotdoc.core.project import Project
from hotdoc.core.exceptions import HotdocException
from hotdoc.core.database import Database
from hotdoc.core.links import Link
from hotdoc.parsers.gtk_doc import GtkDocStringFormatter
from hotdoc.extensions.gi.gi_extension import GIExtension
from hotdoc.utils.loggable import warn, info, debug, Logger

from hotdoc.utils.utils import all_subclasses, get_extension_classes


# pylint: disable=too-few-public-methods
class Patcher(object):
    """
    Banana banana
    """
    # pylint: disable=no-self-use

    def patch(self, filename, begin, end, new_comment):
        """
        Banana banana
        """
        info("Patching %s at %s-%s" % (filename, begin, end))
        if "Miscellaneous.default_page" in filename:
            return

        fix_quotes = str.maketrans({
            "\u2019": "'",
            "\u201c": '"',
            "\u201d": '"',
        })
        lines = []
        with open(filename, 'r', encoding='utf8') as _:
            for l in _.readlines():
                lines.append(l.translate(fix_quotes))

        res = lines[0:begin] + [new_comment + '\n'] + lines[end:]
        res = ''.join(res)
        with open(filename, 'w') as _:
            _.write(res)


class SymbolNotFoundException(HotdocException):
    pass


Logger.register_warning_code('section-symbol-not-found',
                             SymbolNotFoundException, 'gtk-doc-port')

# Some useful symbols

MD_OUTPUT_PATH = 'markdown'


def which(program):
    import os

    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            path = path.strip('"')
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None


def require_program(program):
    if which(program) is None:
        print("Checking for %s: no" % program)
        print("Please install %s" % program)
        sys.exit(1)
    print("Checking for %s: yes" % program)


def require_path(path):
    if not os.path.isfile(path):
        print("%s is not a valid path" % path)
        print("Please correct it, exiting")
        sys.exit(1)


def check_pandoc():
    require_program('pandoc')
    oformats = ['commonmark', 'markdown_github', 'markdown']
    pandoc_help = subprocess.check_output(['pandoc', '-h'])

    print("Checking pandoc output formats in preference order")

    found = False

    for oformat in oformats:
        if oformat in pandoc_help.decode():
            res = 'yes'
        else:
            found = True
            res = 'yes'

        print("Checking pandoc's '%s' output format: %s" % (oformat, res))

        if found:
            break

    if not found:
        print("No format found, please install a newer pandoc")
        sys.exit(1)

    return oformat


def stripped_basename(path):
    bname = os.path.basename(path)
    return os.path.splitext(bname)[0]


def quiet_call(cmd):
    with open(os.devnull, 'w') as shutup:
        return_code = subprocess.call(cmd, stdout=shutup, stderr=shutup)
    return return_code


def is_version_controlled(dir_, path):
    cwd = os.getcwd()
    os.chdir(dir_)
    result = quiet_call(['git', 'ls-files', '--error-unmatch', path])
    os.chdir(cwd)
    return result == 0


MD_FORMAT = check_pandoc()
require_program('git')


class DocRepoMonitor(object):
    def __init__(self):
        self.comments = []
        self.section_comments = {}
        self.class_comments = {}

    def build(self, conf_file, args):
        ext_classes = get_extension_classes(sort=True)
        self.app = Application(ext_classes)
        conf = self.conf = Config(conf_file=conf_file)
        self.app.parse_config(conf)
        self.rootpath = subprocess.check_output(
            ['git', 'rev-parse', '--show-toplevel'], cwd=os.path.dirname(os.path.abspath(conf_file))).decode().strip("\n")
        self.app.database.comment_added_signal.connect(self.__comment_added)
        self.doc_repo = Project(self.app)
        self.doc_repo.parse_name_from_config(conf)
        self.doc_repo.parse_config(conf, toplevel=True)
        self.doc_repo.setup()
        for name, page in list(self.doc_repo.tree.get_pages().items()):
            bname = os.path.basename(name)

            if not bname.startswith('gen-'):
                continue

            if bname == 'gen-gi-extension-index.md':
                continue

            bname = bname[4:]
            stripped = os.path.splitext(bname)[0]
            self.naive_pages[stripped] = page

    def load_section_comments(self, sections):
        for comment in self.comments:
            for section in sections:
                lines = comment.raw_comment.split("\n")
                is_section = len(lines) >= 2 and lines[1].startswith(
                    ' * SECTION:')
                if not is_section:
                    continue

                section_name = comment.raw_comment.split(
                    '\n')[1].split(':')[1].strip()
                for section in sections.values():
                    if section.ofile == section_name:
                        section.comment = comment

    def update_comments_lines(self, c, update_number):
        found = False
        comments = sorted([tc for tc in self.comments if tc.filename == c.filename],
                          key=lambda x: (x.filename, x.lineno))
        for comment in comments:
            if found:
                comment.lineno += update_number
                comment.endlineno += update_number
            elif c == comment:
                found = True

    def get_symbols_to_import_for_section(self, comment, sections):
        res = set()
        imported_symbols = comment.meta.get('symbols', [])
        section_name = comment.raw_comment.split('\n')[1].split(':')[1].strip()
        for section in sections.values():
            if section.ofile != section_name:
                continue

            for symbol in section.symbols:
                if symbol in imported_symbols:
                    continue
                symbol_comment = self.app.database.get_comment(symbol)
                if not symbol_comment:
                    info("%s not commented" % symbol)
                    res.add(symbol)
                elif comment.filename != symbol_comment.filename:
                    res.add(symbol)
            return res, section

        return res, None

    def __apply_docbook_hacks(self, lines, gtk_markdown):
        tick = '`'
        if gtk_markdown:
            open_code = "|["
            end_code = "]|\n * "
            lang_c = '<!-- language="C" -->'
            gt = "&gt;"
            lt = "&lt;"
            quote = '&quot;'
        else:
            end_code = "```\n *"
            open_code = "\n * ```"
            lang_c = ' c'
            gt = ">"
            lt = "<"
            quote = '"'
        res = []
        in_list = False
        in_note = False
        in_variable_list = False
        for i, l in enumerate(lines):
            if "<itemizedlist>" in l:
                in_list = True
            elif "</itemizedlist>" in l:
                in_list = False
            if "<variablelist>" in l:
                in_variable_list = True
            elif "</variablelist>" in l:
                in_variable_list = False

            if "<note>" in l:
                info("In note!")
                in_note = True

            to_prev = False
            if in_variable_list and "<listitem>" in l:
                to_prev = True

            l = l.replace(
                "", ""
            ).replace(
                "|[", open_code
            ).replace(
                "]|", end_code
            ).replace(
                '<!-- language="C" -->', lang_c
            ).replace(
                "<title>", "## "
            ).replace(
                "</title>", ""
            ).replace(
                "<informalexample>", ""
            ).replace(
                "</informalexample>", ""
            ).replace(
                "<programlisting>", open_code
            ).replace(
                "</programlisting>", end_code
            ).replace(
                "<programlisting language=\"c\">", open_code + lang_c
            ).replace(
                "<itemizedlist>", ""
            ).replace(
                "</itemizedlist>", ""
            ).replace(
                "<para>", ''
            ).replace(
                "</para>", ''
            ).replace(
                "<listitem>", '* ' if not in_variable_list else ':'
            ).replace(
                "</listitem>", ''
            ).replace(
                "</refsect2>", ''
            ).replace(
                "<refsect2>", ''
            ).replace(
                "<term>", '* ' + tick
            ).replace(
                "</term>", tick
            ).replace(
                "</variablelist>", ''
            ).replace(
                "<variablelist>", ''
            ).replace(
                "<varlistentry>", ''
            ).replace(
                "</varlistentry>", ''
            ).replace(
                "</filename>", tick
            ).replace(
                "&lt;", lt
            ).replace(
                "&gt;", gt
            ).replace(
                "&quot;", quote
            ).replace(
                "<classname>", tick
            ).replace(
                "</classname>", tick
            ).replace(
                "</literal>", tick
            ).replace(
                "<literal>", tick
            )

            found = False
            if "</note>" in l:
                info("NOT IN  note!")
                in_note = False
            for regex in [" \* *</?example>$", " \* *</?listitem>$",
                          " \* *</?note>$"]:
                if re.findall(regex, l):
                    found = True

            if l.startswith("<table"):
                info("===> TABLE TO FIX!")

            if found:
                self.update_comments_lines(c, -1)
                continue

            l = l.replace(
                "<note>", ''
            ).replace(
                "</note>", ''
            )

            if res and re.findall("^ \* * \*$", res[-1]):
                to_prev = True

            lines = []
            for l in l.split("\n"):
                l = l.rstrip()
                if in_variable_list:
                    l = re.sub("^ \* *", " * ", l)
                if in_note:
                    l = l.replace(" * ", " * > ")

                lines.append(l)

            if len(lines) > 1:
                self.update_comments_lines(c, len(lines) - 1)

            if to_prev:
                res[-1] += '\n'.join([" " + l.replace("*", "").strip()
                                      for l in lines])
                self.update_comments_lines(c, -1)
            else:
                res.extend(lines)
            return res

    def sort_section_comments(self, sections, gtk_markdown, private_symbols):
        SECTION_EXCEPTIONS = {
            " * SECTION:gsturihandler": "GstURIHandlerInterface"
        }
        self.comments.sort(key=lambda x: (x.filename, x.lineno))
        for c in self.comments:
            lines = c.raw_comment.split("\n")
            is_section = len(lines) >= 2 and lines[1].startswith(' * SECTION:')
            if is_section:
                n_section = SECTION_EXCEPTIONS.get(lines[1])
                if not n_section:
                    n_section = os.path.basename(
                        c.filename).replace('.c', ".h")

                if not gtk_markdown:
                    lines[1] = " * SECTION:%s" % n_section

                name = n_section
                fname = c.filename.replace('.c', '.h')
                pagenames = [fname.replace(self.rootpath, '')[
                    1:], os.path.basename(fname)]
                to_import_symbols, section = self.get_symbols_to_import_for_section(
                    c, sections)
                title = None
                if section:
                    title = section.title

                if not title:
                    # Figure out some title
                    for pagename in pagenames:
                        try:
                            page = self.doc_repo.tree.get_pages()[pagename]
                            title = page.get_title().strip('_')
                            if title == 'unnamed':
                                for s in page.symbols:
                                    if isinstance(s, QualifiedSymbol):
                                        if s.display_name:
                                            title = s.display_name
                                            break
                                if title == 'unnamed':
                                    if c.title:
                                        title = c.title.description
                                    else:
                                        title = None
                        except KeyError:
                            print("WARNING: Could not get page %s" % pagename)

                    info("Title is ready? %s" % title)
                    if not title:
                        name = os.path.splitext(name)[0]
                        title = name[0].upper() + name[1:]

                    if title.endswith('Class'):
                        title = title[:-5]
                    elif title.endswith('Private'):
                        title = title[:-len('Private')]

                title_str = " * @title: " + title
                info("==== > Title: %s" % title_str)
                if not c.title:
                    lines.insert(2, title_str)
                    self.update_comments_lines(c, 1)
                    info("Inserting %s" % lines[2])
                elif not [l for l in lines if '@title' in l and not l.endswith('.h')]:
                    info("Setting %s" % title_str)
                    lines[2] = title_str
                if to_import_symbols:
                    page_privates = {
                        s for s in to_import_symbols if s in private_symbols}
                    to_import_symbols -= page_privates

                    for sname, symbols in [('private-symbols', page_privates),
                                           ('symbols', to_import_symbols)]:
                        if not symbols:
                            continue

                        lines.insert(3, " * @%s:" % sname)
                        self.update_comments_lines(c, 1)
                        for i, symbol in enumerate(symbols):
                            lines.insert(4 + i, " * - %s" % symbol)
                            self.update_comments_lines(c, 1)

            # lines = self.__apply_docbook_hacks(lines, gtk_markdown)
            raw_lines = []
            nempty = 0
            for l in lines:
                # l = l.rstrip()
                # if l == " *":
                #     nempty += 1
                # else:
                #     nempty = 0
                # if nempty > 1:
                #     self.update_comments_lines(c, -1)
                # else:
                raw_lines.append(l)

            c.raw_comment = '\n'.join(raw_lines)

    def __comment_added(self, doc_db, comment):
        if comment.filename and comment.lineno:
            self.comments.append(comment)


def db_to_md(content):
    with open("tmpfile", 'w') as f:
        f.write(content.decode())
    cmd = ['pandoc', '-s', '-f', 'docbook', '-t', MD_FORMAT]
    cmd.append("tmpfile")
    converted = subprocess.check_output(cmd)
    os.unlink('tmpfile')
    return converted.decode()


class DTDResolver(etree.Resolver):
    def __init__(self, paths):
        self.urls = {}
        for p in paths or []:
            for f in os.listdir(p):
                url = os.path.join(p, f)
                if os.path.isfile(url):
                    self.urls[f] = url

    def resolve(self, url, id, context):
        comps = urllib.parse.urlparse(url)
        if not comps.scheme:
            bname = os.path.basename(url)
            my_url = self.urls.get(bname)
            if my_url:
                return self.resolve_filename(my_url, context)
        return etree.Resolver.resolve(self, url, id, context)


def get_free_md_path(inserted_pages, name):
    path = os.path.join(MD_OUTPUT_PATH, name.replace('/', '_') + ".md")

    n = 0
    while path in inserted_pages:
        n += 1
        path = os.path.join(MD_OUTPUT_PATH, name + '_' * n + ".md")

    inserted_pages[name] = path

    return path


def replace_with_link(node, md_name, label):
    parent = node.getparent()
    new_node = etree.Element('sect4')
    title = etree.Element('title')
    link = etree.Element('ulink', attrib={'url': md_name})
    link.text = label
    title.append(link)
    new_node.append(title)
    parent.replace(node, new_node)


def dump_gi_index(page, standalones, inserted_pages):
    md_content = '---\n'
    md_content += 'short-description: API Reference Manual\n'
    md_content += '...\n\n'
    md_content += '# API Reference\n\n'

    parent = standalones[0].getparent()
    if parent is not None:
        parent.remove(standalones[0])

    if len(standalones) == 1:
        db_content = etree.tostring(standalones[0])
        md_content += db_to_md(db_content)
    else:
        sectnum = 0
        for standalone in standalones:
            parent = standalone.getparent()
            if parent is not None:
                parent.remove(standalone)

            tnode = standalone.find('./title')
            if tnode is not None:
                title = "".join([x for x in tnode.itertext()])
            else:
                sectnum += 1
                title = "Section %d" % sectnum
            db_content = etree.tostring(standalone)
            href = title
            md_path = urllib.parse.unquote(
                get_free_md_path(inserted_pages, href))
            sub_md_content = db_to_md(db_content)
            with open(md_path, 'w') as f:
                f.write(sub_md_content)

    md_path = os.path.join(MD_OUTPUT_PATH, 'gi-index.md')
    with open(md_path, 'w') as f:
        f.write(md_content)


NSMAP = {'xml': 'http://www.w3.org/XML/1998/namespace'}


def collect_xrefs(app, filename, root):
    id_nodes = root.xpath('.//*[@id]')
    id_nodes.extend(root.xpath('.//*[@xml:id]', namespaces=NSMAP))
    if 'id' in root.attrib or '{%s}id' % NSMAP['xml'] in root.attrib:
        id_nodes.append(root)

    for node in id_nodes:
        title = node.find('.//refentrytitle')
        if title is None:
            title = node.find(
                './/{http://docbook.org/ns/docbook}refentrytitle')
        if title is None:
            title = node.find('.//title')
        if title is None:
            title = node.find('.//{http://docbook.org/ns/docbook}title')

        id_ = node.attrib.get('id') or node.attrib.get('{%s}id' % NSMAP['xml'])

        if title is None:
            title = id_
        else:
            title = "".join([x for x in title.itertext()]).strip()

        ref = stripped_basename(filename) + '.md'
        if node != root:
            ref += '#' + title.lower().replace(' ', '-')
        link = Link(ref, title, id_)
        app.link_resolver.add_link(link)


def output_title_to_md(inserted_pages, gi_index_title, node):
    title = node.find('title')

    if title.text == gi_index_title:
        name = 'gi-index'
        newname = get_free_md_path(inserted_pages, name)
        inserted_pages[name] = name
        name = 'gi-index'
    else:
        name = node.attrib.get('id', title.text)
        newname = get_free_md_path(inserted_pages, name)
        name = inserted_pages[name]

    with open(newname, 'w') as f:
        print('# %s' % title.text, file=f)
    return name


def add_subpage(parent_page, page):
    assert (isinstance(page, dict))
    if 'subpages' not in parent_page:
        parent_page['subpages'] = []

    parent_page['subpages'].append(page)


def translate_docbook(filename, sections, resolver, inserted_pages, new_name, app, files_to_render,
                      parent_page, cnode, gi_index_title):
    root = None
    if cnode is None:
        with open(filename, 'r') as _:
            xincluded = _.read()

        parser = etree.XMLParser(load_dtd=True, recover=True)
        parser.resolvers.add(resolver)
        root = cnode = etree.parse(filename, parser=parser).getroot()

        NSMAP.update(cnode.nsmap)
        NSMAP.pop(None, None)
        if cnode is None:
            return

        for error in parser.error_log:
            print(error)

        if parser.error_log:
            print("Continuing despite the error")

    dir_ = os.path.dirname(os.path.abspath(filename))
    for node in cnode.getchildren():
        if node.tag in ['chapter', 'part', 'reference']:

            csub = []
            cpage = {'url': output_title_to_md(
                inserted_pages, gi_index_title, node), 'subpages': csub}
            add_subpage(parent_page, cpage)
            translate_docbook(filename, sections, resolver, inserted_pages,
                              node.text, app, files_to_render, cpage, node, gi_index_title)
            continue
        elif node.tag != '{http://www.w3.org/2003/XInclude}include':
            translate_docbook(filename, sections, resolver, inserted_pages,
                              new_name, app, files_to_render, parent_page, node, gi_index_title)
            continue

        href = node.attrib.get('href')
        if href is None:
            continue

        bname = stripped_basename(href)
        parent = node.getparent()
        if bname == 'annotation-glossary' or bname.startswith('api-index') or \
                bname.startswith('tree_index'):
            parent.remove(node)
            continue

        md_path = get_free_md_path(inserted_pages, bname)
        md_path = os.path.basename(md_path)
        parent.remove(node)

        if is_version_controlled(dir_, href):
            cpage = {'subpages': []}
            translate_docbook(os.path.join(dir_, href), sections, resolver, inserted_pages, bname, app,
                              files_to_render, cpage, None, gi_index_title)
            add_subpage(parent_page, cpage)
        else:
            section = sections.get(bname)
            fname = None
            if section:
                if section.comment:
                    fname = os.path.basename(
                        section.comment.filename).replace('.c', '.h')
            if not fname:
                fname = bname + '.h'
            if fname not in inserted_pages:
                page = {'url': fname, 'subpages': []}
                inserted_pages[fname] = page
                add_subpage(parent_page, page)
            else:
                print("WARNING: %s has several SECTIONs?" % fname)

    if root is None:
        return

    metadict = {}
    refnamediv = root.find('./refnamediv')
    bookinfo = root.find('./bookinfo')
    md_content = ''
    if bookinfo is not None:
        sect1 = etree.Element('sect1')
        for elem in bookinfo:
            sect1.append(deepcopy(elem))
        root.replace(bookinfo, sect1)
    elif refnamediv is not None:
        root.remove(refnamediv)
        refname = refnamediv.find('./refname')
        if refname is not None:
            metadict['title'] = "".join(
                [x for x in refname.itertext()]).strip()
        refpurpose = refnamediv.find('./refpurpose')
        if refpurpose is not None:
            short = "".join([x for x in refpurpose.itertext()]).strip()
            metadict['short-description'] = short.replace('\n', '')

    if metadict:
        md_content += '---\n'
        md_content += yaml.dump(metadict, default_flow_style=False)
        md_content += '...\n\n'

    collect_xrefs(app, filename, root)
    files_to_render[inserted_pages[new_name]] = (root, md_content)
    parent_page['url'] = inserted_pages[new_name]


class Section(object):
    def __init__(self, node):
        self.node = node
        self.comment = None
        self.ofile = None
        self.title = None
        self.symbols = OrderedSet()
        if node is not None:
            self.ofile = str(node.find('FILE').text)
            tnode = node.find('TITLE')
            if tnode is not None:
                self.title = tnode.text
            else:
                self.title = None
            symbols = node.getchildren()[-1].tail.split("\n")
            self.symbols = OrderedSet([s for s in symbols if s])


def parse_section_file(sections_path, section_comments):
    privates = set()
    here = os.path.dirname(__file__)
    trans_shscript_path = os.path.join(here, 'translate_sections.sh')
    in_private = False
    with open(sections_path) as secfile:
        lines = secfile.read().split('\n')
        for l in lines:
            l = l.strip().lower()
            if l.lower().startswith("<subsection") and 'private>' in l:
                in_private = True
                continue
            if in_private:
                if l.startswith('<'):
                    in_private = False
                    continue
                if l:
                    privates.add(l)

    cmd = [trans_shscript_path, sections_path, 'hotdoc-tmp-sections.txt']
    subprocess.check_call(cmd)
    sections_root = etree.parse('hotdoc-tmp-sections.txt').getroot()
    sections = {}
    for section in sections_root.findall('.//SECTION'):
        section = Section(section)
        section.comment = section_comments.get(section.ofile)
        sections[section.ofile] = section

    os.unlink('hotdoc-tmp-sections.txt')

    return sections, privates


def translate(docstring):
    formatter = GtkDocStringFormatter()
    docstring = cgi.escape(docstring)
    return formatter.translate(docstring, None, 'markdown')


def write_symbols(monitor, section):
    not_found = 0
    found = 0
    opath = os.path.join(MD_OUTPUT_PATH, section.ofile + '.md')
    contents = ''

    with open(opath, 'w') as f:
        metadict = {}
        metadict['symbols'] = []
        if section.comment:
            if section.comment.title:
                metadict['title'] = cgi.escape(section.comment.title)
            elif section.title:
                metadict['title'] = cgi.escape(section.title)
            if section.comment.short_description:
                short = translate(section.comment.short_description)
                metadict['short-description'] = short.replace('\n', '')
            if section.comment.description:
                desc = section.comment.description.split('\n')
                for i, line in enumerate(desc):
                    desc[i] = line.strip()
                contents = '%s\n' % translate('\n'.join(desc))
        for symbol in section.symbols:
            sym = monitor.app.database.get_symbol(symbol)
            if sym is None:
                debug("Warning, the symbol %s referenced in the "
                      "section file under the %s output file could not "
                      "be found" % (symbol, section.ofile), 'gtk-doc-port')
                not_found += 1
            else:
                metadict['symbols'].append(symbol)
                found += 1
        f.write('---\n')
        f.write(str(yaml.dump(metadict, default_flow_style=False)))
        f.write('...\n\n')
        f.write(contents)

    return not_found


def translate_sections(monitor, sections):
    not_found = 0
    n_symbols = 0
    gi_subpages = []
    for sname, section in list(sections.items()):
        not_found += write_symbols(monitor, section)
        n_symbols += len(section.symbols)
        gi_subpages.append(section.ofile + '.md')

    if not_found:
        warn('section-symbol-not-found',
             "%d symbols out of %d could not be found, please verify that "
             "the list of input sources is complete. "
             "Note that some symbols might be hidden "
             "by the preprocessor. "
             "Rerun this tool with HOTDOC_DEBUG=2 to see the full list "
             "of missing symbols." % (not_found, n_symbols))

    return gi_subpages


def patch_comments(patcher, comments):
    if not comments:
        return

    print("Patching %d comments" % len(comments))
    for comment in comments:
        if comment.lineno < 0:
            continue
        patcher.patch(comment.filename, comment.lineno - 1,
                      comment.endlineno, comment.raw_comment)
        if comment.raw_comment == '':
            for other_comment in comments:
                if (other_comment.filename == comment.filename and
                        other_comment.lineno > comment.endlineno):
                    removed = comment.endlineno - comment.lineno
                    other_comment.lineno -= removed
                    other_comment.endlineno -= removed


def sections_from_naive_pages(monitor):
    sections = {}
    for nname, npage in list(monitor.doc_repo.tree.get_pages().items()):
        print("Items: %s %s" % (nname, npage))
        if not \
                npage.source_file.startswith(monitor.app.output):
            continue
        section = Section(None)
        relpath = os.path.relpath(nname,
                                  monitor.doc_repo.get_generated_doc_folder())
        if relpath == 'gen-gi-extension-index.md':
            continue
        dname = os.path.dirname(relpath)
        fname = os.path.basename(relpath)[4:]
        stripped = os.path.splitext(fname)[0]
        section.ofile = os.path.join(dname, fname)
        section.comment = monitor.section_comments.get(stripped)
        if section.comment:
            section.title = section.comment.title
        if not section.title:
            section.title = stripped
        sections[stripped] = section

    print("Returning %s" % sections)
    return sections


def dump_sitemap(page, level=0):
    if not page:
        return

    with open('sitemap.txt', 'a') as _:
        _.write('\t' * level + os.path.basename(page['url']) + '\n')
    for cpage in page['subpages']:
        dump_sitemap(cpage, level + 1)


def render_files(files_to_render, app):
    for md_path, tup in list(files_to_render.items()):
        root, md_content = tup
        links = root.findall('.//link')
        links.extend(root.findall('.//xref'))
        links.extend(root.findall('.//{http://docbook.org/ns/docbook}link'))
        links.extend(root.findall('.//{http://docbook.org/ns/docbook}xref'))

        for link_node in links:
            linkend = link_node.attrib.get('linkend')
            if linkend is None:
                continue

            sym_name = linkend.replace('-', '_')
            if sym_name.endswith(':CAPS'):
                sym_name = sym_name[:-5]
            link = app.link_resolver.get_named_link(sym_name)
            if link is None:
                link = app.link_resolver.get_named_link(linkend)

            symbol = app.database.get_symbol(sym_name)
            if link:
                if symbol:
                    link_node.attrib['url'] = link.id_
                else:
                    link_node.attrib['url'] = link.get_link(
                        app.link_resolver)[0]

                if link_node.tag == 'xref':
                    link_node.text = link.get_title()

                link_node.tag = 'ulink'
                continue

        # Removing refmetas as pandoc just output its text wihout making
        # any sense of it.
        for refmeta in root.findall('refmeta'):
            root.remove(refmeta)
        md_content += db_to_md(etree.tostring(root))
        with open(md_path, 'w') as f:
            f.write(md_content)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('--section-file', action='store',
                        dest='section_file', help='The path to the gtk-doc "sections file"')
    parser.add_argument('--docbook-index', action='store',
                        required=False, dest='docbook_index',
                        help='The path to the docbook index')
    parser.add_argument('conf_files', nargs='+',
                        help='The path to a hotdoc.json file configured for'
                        'parsing the C sources of the project')
    parser.add_argument('--extra-dtd-path', action='append',
                        help='Path to an extra DTD if needed, can be specified multiple times',
                        dest='extra_dtd_paths')
    parser.add_argument('-n', '--no-patching-comment', action='store_true',
                        help='Do not patch comment in source code')
    parser.add_argument('--gi-index-title',
                        help='The name of the title to use for the GI index.',
                        default="API Reference")

    args = parser.parse_args()
    # Ensure we build from scratch
    for dirname in os.listdir('.'):
        if dirname.startswith('hotdoc-private'):
            shutil.rmtree(dirname)
    # shutil.rmtree(MD_OUTPUT_PATH, ignore_errors=True)

    os.makedirs(MD_OUTPUT_PATH, exist_ok=True)

    for conf_file in args.conf_files:
        require_path(conf_file)
    if args.docbook_index:
        require_path(args.docbook_index)

    # hdargs = ['run', '--conf-file', args.conf_file, '--gi-smart-index']
    for conf_file in args.conf_files:
        tmpfile = os.path.join(os.path.dirname(
            os.path.abspath(conf_file)), "tmp.json")
        try:
            shutil.copyfile(conf_file, tmpfile)
        except shutil.SameFileError:
            continue
        conf_file = tmpfile
        monitor = DocRepoMonitor()
        monitor.build(conf_file, args)

        if args.section_file is not None:
            require_path(args.section_file)
            sections, privates = parse_section_file(args.section_file,
                                                    monitor.section_comments)
            monitor.load_section_comments(sections)
            if not args.no_patching_comment:
                monitor.sort_section_comments(sections, True, privates)

                patcher = Patcher()
                patch_comments(patcher, list(monitor.comments) +
                               list(monitor.section_comments.values()))
        else:
            sections = sections_from_naive_pages(monitor)
            if not args.no_patching_comment:
                monitor.sort_section_comments(sections, True, [])
            for sname, section in list(sections.items()):
                section_comment = monitor.section_comments.get(sname)
                if section_comment:
                    full_path = os.path.join(MD_OUTPUT_PATH, section.ofile)
                    dname = os.path.dirname(full_path)
                    if not os.path.exists(dname):
                        os.makedirs(dname)
                        with io.open(full_path, 'w', encoding='utf-8') as f:
                            if section_comment.title:
                                f.write("### %s\n\n" %
                                        cgi.escape(section_comment.title))
                            elif section.title:
                                f.write("### %s\n\n" %
                                        cgi.escape(section.title))
                            if section_comment.short_description:
                                f.write('%s\n\n' %
                                        translate(section_comment.short_description))
                            if section_comment.description:
                                f.write('%s\n\n' % translate(
                                    section_comment.description))

        resolver = DTDResolver(args.extra_dtd_paths)
        inserted_pages = {
            'gi-index': os.path.join(MD_OUTPUT_PATH, 'gi-index.md')}
        inserted_pages['index'] = os.path.join(MD_OUTPUT_PATH, 'index.md')

        files_to_render = {}
        sitemap_root = {}
        if args.docbook_index:
            translate_docbook(args.docbook_index, sections, resolver, inserted_pages, 'index',
                              monitor.app, files_to_render, sitemap_root, None, args.gi_index_title)
        try:
            os.unlink('sitemap.txt')
        except FileNotFoundError:
            pass
        dump_sitemap(sitemap_root)

        render_files(files_to_render, monitor.app)

        extra_conf = {
            'gi_index': 'gi-index.md',
            'languages': ['c', 'python', 'javascript'],
            'output': 'built_doc',
            'gtk_doc_escape_html': True,
            'page_parser_escape_html': True,
        }

        ncp = Config(conf_file=conf_file,
                     command_line_args=extra_conf)

        ncp.dump(conf_file)

        # recursive_overwrite(MD_OUTPUT_PATH, monitor.doc_repo.get_base_doc_folder())
        # shutil.rmtree(MD_OUTPUT_PATH)
