Thanks to visit codestin.com
Credit goes to chromium.googlesource.com

blob: 72975cc6206c0ef181eeb4c9cf592d806a756ebd [file] [log] [blame]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
r"""
For documentation and usage instructions, please see
https://rustc-dev-guide.rust-lang.org/rustdoc-internals/rustdoc-test-suite.html
"""
from __future__ import absolute_import, print_function, unicode_literals
import codecs
import io
import sys
import os.path
import re
import shlex
from collections import namedtuple
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
try:
from xml.etree import cElementTree as ET
except ImportError:
from xml.etree import ElementTree as ET
try:
from html.entities import name2codepoint
except ImportError:
from htmlentitydefs import name2codepoint
# "void elements" (no closing tag) from the HTML Standard section 12.1.2
VOID_ELEMENTS = {
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"menuitem",
"meta",
"param",
"source",
"track",
"wbr",
}
# Python 2 -> 3 compatibility
try:
unichr # noqa: B018 FIXME: py2
except NameError:
unichr = chr
channel = os.environ["DOC_RUST_LANG_ORG_CHANNEL"]
# Initialized in main
rust_test_path = None
bless = None
class CustomHTMLParser(HTMLParser):
"""simplified HTML parser.
this is possible because we are dealing with very regular HTML from
rustdoc; we only have to deal with i) void elements and ii) empty
attributes."""
def __init__(self, target=None):
HTMLParser.__init__(self)
self.__builder = target or ET.TreeBuilder()
def handle_starttag(self, tag, attrs):
attrs = {k: v or "" for k, v in attrs}
self.__builder.start(tag, attrs)
if tag in VOID_ELEMENTS:
self.__builder.end(tag)
def handle_endtag(self, tag):
self.__builder.end(tag)
def handle_startendtag(self, tag, attrs):
attrs = {k: v or "" for k, v in attrs}
self.__builder.start(tag, attrs)
self.__builder.end(tag)
def handle_data(self, data):
self.__builder.data(data)
def handle_entityref(self, name):
self.__builder.data(unichr(name2codepoint[name]))
def handle_charref(self, name):
code = int(name[1:], 16) if name.startswith(("x", "X")) else int(name, 10)
self.__builder.data(unichr(code))
def close(self):
HTMLParser.close(self)
return self.__builder.close()
Command = namedtuple("Command", "negated cmd args lineno context")
class FailedCheck(Exception):
pass
class InvalidCheck(Exception):
pass
def concat_multi_lines(f):
"""returns a generator out of the file object, which
- removes `\\` then `\n` then a shared prefix with the previous line then
optional whitespace;
- keeps a line number (starting from 0) of the first line being
concatenated."""
lastline = None # set to the last line when the last line has a backslash
firstlineno = None
catenated = ""
for lineno, line in enumerate(f):
line = line.rstrip("\r\n")
# strip the common prefix from the current line if needed
if lastline is not None:
common_prefix = os.path.commonprefix([line, lastline])
line = line[len(common_prefix) :].lstrip()
firstlineno = firstlineno or lineno
if line.endswith("\\"):
if lastline is None:
lastline = line[:-1]
catenated += line[:-1]
else:
yield firstlineno, catenated + line
lastline = None
firstlineno = None
catenated = ""
if lastline is not None:
print_err(lineno, line, "Trailing backslash at the end of the file")
LINE_PATTERN = re.compile(
r"""
//@\s+
(?P<negated>!?)(?P<cmd>.+?)
(?:[\s:](?P<args>.*))?$
""",
re.X | re.UNICODE,
)
DEPRECATED_LINE_PATTERN = re.compile(
r"//\s+@",
re.X | re.UNICODE,
)
def get_commands(template):
with io.open(template, encoding="utf-8") as f:
for lineno, line in concat_multi_lines(f):
if DEPRECATED_LINE_PATTERN.search(line):
print_err(
lineno,
line,
"Deprecated command syntax, replace `// @` with `//@ `",
)
continue
m = LINE_PATTERN.search(line)
if not m:
continue
cmd = m.group("cmd")
negated = m.group("negated") == "!"
args = m.group("args") or ""
try:
args = shlex.split(args)
except UnicodeEncodeError:
args = [
arg.decode("utf-8") for arg in shlex.split(args.encode("utf-8"))
]
except Exception as exc:
raise Exception("line {}: {}".format(lineno + 1, exc)) from None
yield Command(
negated=negated, cmd=cmd, args=args, lineno=lineno + 1, context=line
)
def _flatten(node, acc):
if node.text:
acc.append(node.text)
for e in node:
_flatten(e, acc)
if e.tail:
acc.append(e.tail)
def flatten(node):
acc = []
_flatten(node, acc)
return "".join(acc)
def make_xml(text):
xml = ET.XML("<xml>%s</xml>" % text)
return xml
def normalize_xpath(path):
path = path.replace("{{channel}}", channel)
if path.startswith("//"):
return "." + path # avoid warnings
elif path.startswith(".//"):
return path
else:
raise InvalidCheck(
"Non-absolute XPath is not supported due to implementation issues"
)
class CachedFiles(object):
def __init__(self, root):
self.root = root
self.files = {}
self.trees = {}
self.last_path = None
def resolve_path(self, path):
if path != "-":
path = os.path.normpath(path)
self.last_path = path
return path
elif self.last_path is None:
raise InvalidCheck("Tried to use the previous path in the first command")
else:
return self.last_path
def get_absolute_path(self, path):
return os.path.join(self.root, path)
def get_file(self, path):
path = self.resolve_path(path)
if path in self.files:
return self.files[path]
abspath = self.get_absolute_path(path)
if not (os.path.exists(abspath) and os.path.isfile(abspath)):
raise FailedCheck("File does not exist {!r}".format(path))
with io.open(abspath, encoding="utf-8") as f:
data = f.read()
self.files[path] = data
return data
def get_tree(self, path):
path = self.resolve_path(path)
if path in self.trees:
return self.trees[path]
abspath = self.get_absolute_path(path)
if not (os.path.exists(abspath) and os.path.isfile(abspath)):
raise FailedCheck("File does not exist {!r}".format(path))
with io.open(abspath, encoding="utf-8") as f:
try:
tree = ET.fromstringlist(f.readlines(), CustomHTMLParser())
except Exception as e:
raise RuntimeError( # noqa: B904 FIXME: py2
"Cannot parse an HTML file {!r}: {}".format(path, e)
)
self.trees[path] = tree
return self.trees[path]
def get_dir(self, path):
path = self.resolve_path(path)
abspath = self.get_absolute_path(path)
if not (os.path.exists(abspath) and os.path.isdir(abspath)):
raise FailedCheck("Directory does not exist {!r}".format(path))
def check_string(data, pat, regexp):
pat = pat.replace("{{channel}}", channel)
if not pat:
return True # special case a presence testing
elif regexp:
return re.search(pat, data, flags=re.UNICODE) is not None
else:
data = " ".join(data.split())
pat = " ".join(pat.split())
return pat in data
def check_tree_attr(tree, path, attr, pat, regexp):
path = normalize_xpath(path)
ret = False
for e in tree.findall(path):
if attr in e.attrib:
value = e.attrib[attr]
else:
continue
ret = check_string(value, pat, regexp)
if ret:
break
return ret
# Returns the number of occurrences matching the regex (`regexp`) and the text (`pat`).
def check_tree_text(tree, path, pat, regexp, stop_at_first):
path = normalize_xpath(path)
match_count = 0
try:
for e in tree.findall(path):
try:
value = flatten(e)
except KeyError:
continue
else:
if check_string(value, pat, regexp):
match_count += 1
if stop_at_first:
break
except Exception:
print('Failed to get path "{}"'.format(path))
raise
return match_count
def get_tree_count(tree, path):
path = normalize_xpath(path)
return len(tree.findall(path))
def check_snapshot(snapshot_name, actual_tree, normalize_to_text):
assert rust_test_path.endswith(".rs")
snapshot_path = "{}.{}.{}".format(rust_test_path[:-3], snapshot_name, "html")
try:
with open(snapshot_path, "r") as snapshot_file:
expected_str = snapshot_file.read().replace("{{channel}}", channel)
except FileNotFoundError:
if bless:
expected_str = None
else:
raise FailedCheck("No saved snapshot value") # noqa: B904 FIXME: py2
if not normalize_to_text:
actual_str = ET.tostring(actual_tree).decode("utf-8")
else:
actual_str = flatten(actual_tree)
# Conditions:
# 1. Is --bless
# 2. Are actual and expected tree different
# 3. Are actual and expected text different
if (
not expected_str
or (
not normalize_to_text
and not compare_tree(make_xml(actual_str), make_xml(expected_str), stderr)
)
or (normalize_to_text and actual_str != expected_str)
):
if bless:
with open(snapshot_path, "w") as snapshot_file:
actual_str = actual_str.replace(channel, "{{channel}}")
snapshot_file.write(actual_str)
else:
print("--- expected ---\n")
print(expected_str)
print("\n\n--- actual ---\n")
print(actual_str)
print()
raise FailedCheck("Actual snapshot value is different than expected")
# Adapted from https://github.com/formencode/formencode/blob/3a1ba9de2fdd494dd945510a4568a3afeddb0b2e/formencode/doctest_xml_compare.py#L72-L120
def compare_tree(x1, x2, reporter=None):
if x1.tag != x2.tag:
if reporter:
reporter("Tags do not match: %s and %s" % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if x2.attrib.get(name) != value:
if reporter:
reporter(
"Attributes do not match: %s=%r, %s=%r"
% (name, value, name, x2.attrib.get(name))
)
return False
for name in x2.attrib:
if name not in x1.attrib:
if reporter:
reporter("x2 has an attribute x1 is missing: %s" % name)
return False
if not text_compare(x1.text, x2.text):
if reporter:
reporter("text: %r != %r" % (x1.text, x2.text))
return False
if not text_compare(x1.tail, x2.tail):
if reporter:
reporter("tail: %r != %r" % (x1.tail, x2.tail))
return False
cl1 = list(x1)
cl2 = list(x2)
if len(cl1) != len(cl2):
if reporter:
reporter("children length differs, %i != %i" % (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not compare_tree(c1, c2, reporter=reporter):
if reporter:
reporter("children %i do not match: %s" % (i, c1.tag))
return False
return True
def text_compare(t1, t2):
if not t1 and not t2:
return True
if t1 == "*" or t2 == "*":
return True
return (t1 or "").strip() == (t2 or "").strip()
def stderr(*args):
if sys.version_info.major < 3:
file = codecs.getwriter("utf-8")(sys.stderr)
else:
file = sys.stderr
print(*args, file=file)
def print_err(lineno, context, err, message=None):
global ERR_COUNT
ERR_COUNT += 1
stderr("{}: {}".format(lineno, message or err))
if message and err:
stderr("\t{}".format(err))
if context:
stderr("\t{}".format(context))
def get_nb_matching_elements(cache, c, regexp, stop_at_first):
tree = cache.get_tree(c.args[0])
pat, sep, attr = c.args[1].partition("/@")
if sep: # attribute
tree = cache.get_tree(c.args[0])
return check_tree_attr(tree, pat, attr, c.args[2], False)
else: # normalized text
pat = c.args[1]
if pat.endswith("/text()"):
pat = pat[:-7]
return check_tree_text(
cache.get_tree(c.args[0]), pat, c.args[2], regexp, stop_at_first
)
def check_files_in_folder(c, cache, folder, files):
files = files.strip()
if not files.startswith("[") or not files.endswith("]"):
raise InvalidCheck(
"Expected list as second argument of {} (ie '[]')".format(c.cmd)
)
folder = cache.get_absolute_path(folder)
# First we create a set of files to check if there are duplicates.
files = shlex.split(files[1:-1].replace(",", ""))
files_set = set()
for file in files:
if file in files_set:
raise InvalidCheck("Duplicated file `{}` in {}".format(file, c.cmd))
files_set.add(file)
folder_set = set([f for f in os.listdir(folder) if f != "." and f != ".."])
# Then we remove entries from both sets (we clone `folder_set` so we can iterate it while
# removing its elements).
for entry in set(folder_set):
if entry in files_set:
files_set.remove(entry)
folder_set.remove(entry)
error = 0
if len(files_set) != 0:
print_err(
c.lineno,
c.context,
"Entries not found in folder `{}`: `{}`".format(folder, files_set),
)
error += 1
if len(folder_set) != 0:
print_err(
c.lineno,
c.context,
"Extra entries in folder `{}`: `{}`".format(folder, folder_set),
)
error += 1
return error == 0
ERR_COUNT = 0
def check_command(c, cache):
try:
cerr = ""
if c.cmd in ["has", "hasraw", "matches", "matchesraw"]: # string test
regexp = c.cmd.startswith("matches")
# has <path> = file existence
if len(c.args) == 1 and not regexp and "raw" not in c.cmd:
try:
cache.get_file(c.args[0])
ret = True
except FailedCheck as err:
cerr = str(err)
ret = False
# hasraw/matchesraw <path> <pat> = string test
elif len(c.args) == 2 and "raw" in c.cmd:
cerr = "`PATTERN` did not match"
if c.negated:
cerr = "`PATTERN` unexpectedly matched"
ret = check_string(cache.get_file(c.args[0]), c.args[1], regexp)
# has/matches <path> <pat> <match> = XML tree test
elif len(c.args) == 3 and "raw" not in c.cmd:
cerr = "`XPATH PATTERN` did not match"
if c.negated:
cerr = "`XPATH PATTERN` unexpectedly matched"
ret = get_nb_matching_elements(cache, c, regexp, True) != 0
else:
raise InvalidCheck("Invalid number of {} arguments".format(c.cmd))
elif c.cmd == "files": # check files in given folder
if len(c.args) != 2: # files <folder path> <file list>
raise InvalidCheck("Invalid number of {} arguments".format(c.cmd))
elif c.negated:
raise InvalidCheck("{} doesn't support negative check".format(c.cmd))
ret = check_files_in_folder(c, cache, c.args[0], c.args[1])
elif c.cmd == "count": # count test
if len(c.args) == 3: # count <path> <pat> <count> = count test
expected = int(c.args[2])
found = get_tree_count(cache.get_tree(c.args[0]), c.args[1])
cerr = "Expected {} occurrences but found {}".format(expected, found)
ret = expected == found
elif len(c.args) == 4: # count <path> <pat> <text> <count> = count test
expected = int(c.args[3])
found = get_nb_matching_elements(cache, c, False, False)
cerr = "Expected {} occurrences but found {}".format(expected, found)
ret = found == expected
else:
raise InvalidCheck("Invalid number of {} arguments".format(c.cmd))
elif c.cmd == "snapshot": # snapshot test
if len(c.args) == 3: # snapshot <snapshot-name> <html-path> <xpath>
[snapshot_name, html_path, pattern] = c.args
tree = cache.get_tree(html_path)
xpath = normalize_xpath(pattern)
normalize_to_text = False
if xpath.endswith("/text()"):
xpath = xpath[:-7]
normalize_to_text = True
subtrees = tree.findall(xpath)
if len(subtrees) == 1:
[subtree] = subtrees
try:
check_snapshot(snapshot_name, subtree, normalize_to_text)
ret = True
except FailedCheck as err:
cerr = str(err)
ret = False
elif len(subtrees) == 0:
raise FailedCheck("XPATH did not match")
else:
raise FailedCheck(
"Expected 1 match, but found {}".format(len(subtrees))
)
else:
raise InvalidCheck("Invalid number of {} arguments".format(c.cmd))
elif c.cmd == "has-dir": # has-dir test
if len(c.args) == 1: # has-dir <path> = has-dir test
try:
cache.get_dir(c.args[0])
ret = True
except FailedCheck as err:
cerr = str(err)
ret = False
else:
raise InvalidCheck("Invalid number of {} arguments".format(c.cmd))
else:
# Ignore unknown directives as they might be compiletest directives
# since they share the same `//@` prefix by convention. In any case,
# compiletest rejects unknown directives for us.
return
if ret == c.negated:
raise FailedCheck(cerr)
except FailedCheck as err:
message = "{}{} check failed".format("!" if c.negated else "", c.cmd)
print_err(c.lineno, c.context, str(err), message)
except InvalidCheck as err:
print_err(c.lineno, c.context, str(err))
def check(target, commands):
cache = CachedFiles(target)
for c in commands:
check_command(c, cache)
if __name__ == "__main__":
if len(sys.argv) not in [3, 4]:
stderr("Usage: {} <doc dir> <template> [--bless]".format(sys.argv[0]))
raise SystemExit(1)
rust_test_path = sys.argv[2]
if len(sys.argv) > 3 and sys.argv[3] == "--bless":
bless = True
else:
# We only support `--bless` at the end of the arguments.
# This assert is to prevent silent failures.
assert "--bless" not in sys.argv
bless = False
check(sys.argv[1], get_commands(rust_test_path))
if ERR_COUNT:
stderr("\nEncountered {} errors".format(ERR_COUNT))
raise SystemExit(1)