From 67572bf15aa193084887af8be470362004e222f1 Mon Sep 17 00:00:00 2001 From: Jody Klymak Date: Tue, 2 Feb 2021 21:10:19 -0800 Subject: [PATCH 01/18] UTILS: script --- _websiteutils/make_redirects_links.py | 203 ++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 _websiteutils/make_redirects_links.py diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py new file mode 100644 index 00000000000..20653d05126 --- /dev/null +++ b/_websiteutils/make_redirects_links.py @@ -0,0 +1,203 @@ +import argparse +import glob +import logging +import multiprocessing +import os +import pathlib +import re +import subprocess +import sys +import tempfile + + +""" +This script does two things that improve the website organization. + +First, we used to host in the root of the webpage, but have now moved to +``/stable/``. We do not want obsolete links to link to nothing (or that has +been our policy), so we currently just keep the old version at the top level. +Here, instead, we either softlink to the newest version, or replace the file by +an html refresh redirect. + +Second, it changes the canonical link in each html file to the newest version +found of the html file (including stable if its in the latest version.) + +This script takes a while, and is destructive, so should probably be run on a +branch and pushed as a PR so it can easily be reverted. +""" + +_log = logging.getLogger('make_redirect_links') + + +tocheck = ['stable'] + [f'{major}.{minor}.{micro}' + for major in range(6, -1, -1) + for minor in range(6, -1, -1) + for micro in range(6, -1, -1)] + +toignore = tocheck + ['mpl-probscale', 'mpl_examples', 'mpl_toolkits', + '_webpageutils', 'xkcd', 'sitemap.xml', + 'robots.txt', 'CNAME', '.git'] + +logging.basicConfig(level=logging.DEBUG) + + +def findlast(fname, tocheck): + """ + Check the directories listed in ``tocheck`` to see if they have + ``fname`` in them. Return the first one found, or None + """ + p = pathlib.Path(fname) + for t in tocheck: + pnew = pathlib.Path(t, p) + if pnew.exists(): + return t + else: + return None + +html_redirect = """ + + + + + + + + +

+ The page been moved to +

+ + +""" + + +def do_links(root0): + """ + Either soft link a file at the top level to its newest position, + or make an html redirect if it is an html file. + """ + _log.info(f'Doing links on {root0}') + for root, dirs, files in os.walk(root0): + for name in files: + fullname = os.path.join(root, name) + last = findlast(fullname, tocheck) + _log.debug(f'Checking: {fullname} found {last}') + if last is not None: + os.remove(fullname) + if name.endswith(('.htm', '.html')): + # make an html redirect. + _log.info(f'Rewriting HTML: {fullname} in {last}') + with open(fullname, 'w') as fout: + oldname = '/' + os.path.join(last, fullname) + st = html_redirect % (oldname, oldname, oldname) + fout.write(st) + else: + # soft link + # Need to do these relative to where the link is + # so if it is a level down `ln -s ../3.1.1/boo/who boo/who` + last = os.path.join('..', last) + depth = root.count('/') + for i in range(depth): + last = os.path.join('..', last) + oldname = os.path.join(last, fullname) + _log.info(f'Linking {fullname} to {oldname}') + os.symlink(oldname, fullname) + for d in dirs: + do_links(d) + + +def do_canonicals(dname): + """ + For each html file in the versioned docs, make the canonical link point + to the newest version. + """ + _log.debug(f'Walking {dname}') + for root, dirs, files in os.walk(dname): + for name in files: + fullname = os.path.join(root, name) + p = pathlib.Path(fullname) + _log.debug(f'Checking {fullname}') + if name.endswith(('.htm', '.html')): + basename = pathlib.Path(*p.parts[1:]) + last = findlast(basename, tocheck) + if last is not None: + update_canonical(fullname, last) + + for d in dirs: + _log.info(f'DIR: {d}') + do_canonicals(os.path.join(dname,d)) + + +def update_canonical(fullname, last): + """ + Change the canonical link in *fullname* to the same link in the + version given by *last*. We do this with a regexp to prevent + removing any other content on a line that has the canonical link. + + Note that if for some reason there are more than one canonical link + this will change all of them. + """ + p = pathlib.Path(fullname) + pre = 'https://matplotlib.org/' + pnew = pathlib.Path(last, *p.parts[1:]) + newcanon = f'{pre+str(pnew)}' + _log.info(f'{p} to {pre+str(pnew)}') + with tempfile.NamedTemporaryFile(delete=False) as fout: + with open(fullname, 'rb') as fin: + for line in fin: + if b'{ll}') + fout.write(ll) + else: + fout.write(line) + os.rename(fout.name, fullname) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Optional app description') + + parser.add_argument('--np', type=int, help='Number of processors to use') + parser.add_argument('--no_canonicals', help='do not do canonical links', + action="store_true") + parser.add_argument('--no_redirects', help='do not do redirects links', + action="store_true") + + args = parser.parse_args() + if args.np: + np = args.np + else: + np = None + + # html redirect or soft link most things in the top-level directory that + # are not other modules or versioned docs. + if not args.no_redirects: + for entry in os.scandir('./'): + if not (entry.name in toignore): + if entry.is_dir(): + do_links(entry.name) + elif entry.name.endswith(('.htm', '.html')): + fullname = entry.name + last = findlast(fullname, tocheck) + _log.debug(f'Checking: {fullname} found {last}') + if last is not None: + os.remove('./'+fullname) + _log.info(f'Rewriting HTML: {fullname} in {last}') + with open(fullname, 'w') as fout: + oldname = '/' + os.path.join(last, fullname) + st = html_redirect % (oldname, oldname, oldname) + fout.write(st) + _log.info('Done links and redirects') + + # change the canonical url for all html to the newest version in the docs: + if not args.no_canonicals: + if np is not None: + with multiprocessing.Pool(np) as pool: + pool.map(do_canonicals, tocheck[1:]) + else: + for t in tocheck[1:]: + do_canonicals(t) From 35bc91c34675998279751996ab5dee29816662b7 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 00:12:33 -0500 Subject: [PATCH 02/18] MNT: make move portable os.raname does not work across filesystems --- _websiteutils/make_redirects_links.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 20653d05126..adcf135024b 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -8,7 +8,7 @@ import subprocess import sys import tempfile - +import shutil """ This script does two things that improve the website organization. @@ -154,7 +154,7 @@ def update_canonical(fullname, last): fout.write(ll) else: fout.write(line) - os.rename(fout.name, fullname) + shutil.move(fout.name, fullname) if __name__ == "__main__": From 86a6021a7b16aa1602d67df790f243bbcc0a0b63 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 00:13:10 -0500 Subject: [PATCH 03/18] MNT: make re-direct relative This is helpful to people who want to host off-line versions of the docs. --- _websiteutils/make_redirects_links.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index adcf135024b..7ebe74f317c 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -54,17 +54,18 @@ def findlast(fname, tocheck): else: return None + html_redirect = """ - - + +

- The page been moved to + The page been moved to

From 95c3a6a35eaed61a49816524832e2f88d8f54630 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 00:19:46 -0500 Subject: [PATCH 04/18] MNT: pre-filter tocheck --- _websiteutils/make_redirects_links.py | 33 ++++++++++++++++++--------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 7ebe74f317c..7a4b25c05e9 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -26,17 +26,28 @@ branch and pushed as a PR so it can easily be reverted. """ -_log = logging.getLogger('make_redirect_links') - - -tocheck = ['stable'] + [f'{major}.{minor}.{micro}' - for major in range(6, -1, -1) - for minor in range(6, -1, -1) - for micro in range(6, -1, -1)] - -toignore = tocheck + ['mpl-probscale', 'mpl_examples', 'mpl_toolkits', - '_webpageutils', 'xkcd', 'sitemap.xml', - 'robots.txt', 'CNAME', '.git'] +_log = logging.getLogger("make_redirect_links") + + +tocheck = ["stable"] + [ + f"{major}.{minor}.{micro}" + for major in range(6, -1, -1) + for minor in range(6, -1, -1) + for micro in range(6, -1, -1) + if pathlib.Path(f"{major}.{minor}.{micro}").exists() +] + +toignore = tocheck + [ + "mpl-probscale", + "mpl_examples", + "mpl_toolkits", + "_webpageutils", + "xkcd", + "sitemap.xml", + "robots.txt", + "CNAME", + ".git", +] logging.basicConfig(level=logging.DEBUG) From 681f21b52b26a170ad9b4d02021172ca4c821ea8 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 01:20:22 -0500 Subject: [PATCH 05/18] WIP: make the script faster and redirects relative --- _websiteutils/make_redirects_links.py | 96 +++++++++++++++------------ 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 7a4b25c05e9..8188f4dbcc7 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -52,17 +52,22 @@ logging.basicConfig(level=logging.DEBUG) -def findlast(fname, tocheck): +# beware of triksy mutable defaults! +def findlast(fname, tocheck, *, _cache={}): """ Check the directories listed in ``tocheck`` to see if they have ``fname`` in them. Return the first one found, or None """ p = pathlib.Path(fname) + if p in _cache: + return _cache[p] for t in tocheck: pnew = pathlib.Path(t, p) if pnew.exists(): + _cache[p] = t return t else: + _cache[p] = None return None @@ -88,34 +93,37 @@ def do_links(root0): Either soft link a file at the top level to its newest position, or make an html redirect if it is an html file. """ - _log.info(f'Doing links on {root0}') + + _log.info(f"Doing links on {root0}") for root, dirs, files in os.walk(root0): for name in files: fullname = os.path.join(root, name) last = findlast(fullname, tocheck) - _log.debug(f'Checking: {fullname} found {last}') + _log.debug(f"Checking: {fullname} found {last}") + depth = root.count("/") if last is not None: os.remove(fullname) - if name.endswith(('.htm', '.html')): + if name.endswith((".htm", ".html")): # make an html redirect. - _log.info(f'Rewriting HTML: {fullname} in {last}') - with open(fullname, 'w') as fout: - oldname = '/' + os.path.join(last, fullname) - st = html_redirect % (oldname, oldname, oldname) + _log.info(f"Rewriting HTML: {fullname} in {last}") + with open(fullname, "w") as fout: + oldname = os.path.join(last, fullname) + st = html_redirect % ( + "../" * (depth + 1) + oldname, + "/" + oldname, + "../" * (depth + 1) + oldname, + ) fout.write(st) else: # soft link # Need to do these relative to where the link is # so if it is a level down `ln -s ../3.1.1/boo/who boo/who` - last = os.path.join('..', last) - depth = root.count('/') + last = os.path.join("..", last) for i in range(depth): - last = os.path.join('..', last) + last = os.path.join("..", last) oldname = os.path.join(last, fullname) - _log.info(f'Linking {fullname} to {oldname}') + _log.info(f"Linking {fullname} to {oldname}") os.symlink(oldname, fullname) - for d in dirs: - do_links(d) def do_canonicals(dname): @@ -123,21 +131,21 @@ def do_canonicals(dname): For each html file in the versioned docs, make the canonical link point to the newest version. """ - _log.debug(f'Walking {dname}') + _log.debug(f"Walking {dname}") for root, dirs, files in os.walk(dname): for name in files: fullname = os.path.join(root, name) p = pathlib.Path(fullname) - _log.debug(f'Checking {fullname}') - if name.endswith(('.htm', '.html')): + _log.debug(f"Checking {fullname}") + if name.endswith((".htm", ".html")): basename = pathlib.Path(*p.parts[1:]) last = findlast(basename, tocheck) if last is not None: update_canonical(fullname, last) for d in dirs: - _log.info(f'DIR: {d}') - do_canonicals(os.path.join(dname,d)) + _log.info(f"DIR: {d}") + do_canonicals(os.path.join(dname, d)) def update_canonical(fullname, last): @@ -150,19 +158,19 @@ def update_canonical(fullname, last): this will change all of them. """ p = pathlib.Path(fullname) - pre = 'https://matplotlib.org/' + pre = "https://matplotlib.org/" pnew = pathlib.Path(last, *p.parts[1:]) - newcanon = f'{pre+str(pnew)}' - _log.info(f'{p} to {pre+str(pnew)}') + newcanon = f"{pre+str(pnew)}" + _log.info(f"{p} to {pre+str(pnew)}") with tempfile.NamedTemporaryFile(delete=False) as fout: - with open(fullname, 'rb') as fin: + with open(fullname, "rb") as fin: for line in fin: if b'{ll}') + new = bytes( + f'{ll}") fout.write(ll) else: fout.write(line) @@ -171,13 +179,15 @@ def update_canonical(fullname, last): if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Optional app description') + parser = argparse.ArgumentParser(description="Optional app description") - parser.add_argument('--np', type=int, help='Number of processors to use') - parser.add_argument('--no_canonicals', help='do not do canonical links', - action="store_true") - parser.add_argument('--no_redirects', help='do not do redirects links', - action="store_true") + parser.add_argument("--np", type=int, help="Number of processors to use") + parser.add_argument( + "--no_canonicals", help="do not do canonical links", action="store_true" + ) + parser.add_argument( + "--no_redirects", help="do not do redirects links", action="store_true" + ) args = parser.parse_args() if args.np: @@ -188,22 +198,22 @@ def update_canonical(fullname, last): # html redirect or soft link most things in the top-level directory that # are not other modules or versioned docs. if not args.no_redirects: - for entry in os.scandir('./'): + for entry in os.scandir("./"): if not (entry.name in toignore): if entry.is_dir(): do_links(entry.name) - elif entry.name.endswith(('.htm', '.html')): + elif entry.name.endswith((".htm", ".html")): fullname = entry.name last = findlast(fullname, tocheck) - _log.debug(f'Checking: {fullname} found {last}') + _log.debug(f"Checking: {fullname} found {last}") if last is not None: - os.remove('./'+fullname) - _log.info(f'Rewriting HTML: {fullname} in {last}') - with open(fullname, 'w') as fout: - oldname = '/' + os.path.join(last, fullname) - st = html_redirect % (oldname, oldname, oldname) + os.remove("./" + fullname) + _log.info(f"Rewriting HTML: {fullname} in {last}") + with open(fullname, "w") as fout: + oldname = os.path.join(last, fullname) + st = html_redirect % (oldname, "/" + oldname, oldname) fout.write(st) - _log.info('Done links and redirects') + _log.info("Done links and redirects") # change the canonical url for all html to the newest version in the docs: if not args.no_canonicals: From 72bd89683c19705eb52edbb51905c6fe39542a81 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 01:51:33 -0500 Subject: [PATCH 06/18] WIP: make script faster? --- _websiteutils/make_redirects_links.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 8188f4dbcc7..5ebebad2de7 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -162,16 +162,19 @@ def update_canonical(fullname, last): pnew = pathlib.Path(last, *p.parts[1:]) newcanon = f"{pre+str(pnew)}" _log.info(f"{p} to {pre+str(pnew)}") + rec = re.compile(b'{ll}") fout.write(ll) + found = True else: fout.write(line) shutil.move(fout.name, fullname) From 7a9c066f1d0605bffb6837890ee4d893059f839e Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 14:56:23 -0500 Subject: [PATCH 07/18] FIX: redirect url --- _websiteutils/make_redirects_links.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 5ebebad2de7..4b6d2a6dd6b 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -76,8 +76,8 @@ def findlast(fname, tocheck, *, _cache={}): - - + +

From 0fdda704b90958c79ee5f6001a16d9ff1c653268 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 3 Feb 2021 14:59:29 -0500 Subject: [PATCH 08/18] MNT: tweak non-redirected message --- _websiteutils/make_redirects_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 4b6d2a6dd6b..d950376c054 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -81,7 +81,7 @@ def findlast(fname, tocheck, *, _cache={}):

- The page been moved to + The page been moved here!

From 4fd177e54e0075a6f10b67e84d3b07234e95934e Mon Sep 17 00:00:00 2001 From: Jody Klymak Date: Wed, 3 Feb 2021 22:45:36 -0800 Subject: [PATCH 09/18] ENH: add a banner for old docs --- _websiteutils/make_redirects_links.py | 50 +++++++++++++++++++++------ 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index d950376c054..34cbdc11968 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -76,17 +76,28 @@ def findlast(fname, tocheck, *, _cache={}): - - + +

- The page been moved here! + The page been moved to

""" +# note these are all one line so they are easy to search and replace in the +# html files (otherwise we need to close tags) +warn_banner_exists = ('\n') + + +warn_banner_old = ('
You are reading an old ' + 'version of the documentation (v%s). For the latest version see ' + 'https://matplotlib.org/stable/
\n') + def do_links(root0): """ @@ -141,19 +152,17 @@ def do_canonicals(dname): basename = pathlib.Path(*p.parts[1:]) last = findlast(basename, tocheck) if last is not None: - update_canonical(fullname, last) + update_canonical(fullname, last, dname==tocheck[1]) - for d in dirs: - _log.info(f"DIR: {d}") - do_canonicals(os.path.join(dname, d)) - -def update_canonical(fullname, last): +def update_canonical(fullname, last, newest): """ Change the canonical link in *fullname* to the same link in the version given by *last*. We do this with a regexp to prevent removing any other content on a line that has the canonical link. + Also add a banner (div) in the body if an old version of the docs. + Note that if for some reason there are more than one canonical link this will change all of them. """ @@ -169,14 +178,31 @@ def update_canonical(fullname, last): for line in fin: if not found and b'{ll}") fout.write(ll) found = True + elif b'' in line and not newest: + # add a warning right under: + fout.write(line) + line = next(fin) + if last == 'stable': + new = warn_banner_exists % (p.parts[0], newcanon, + newcanon) + else: + new = warn_banner_old % (p.parts[0]) + fout.write(bytes(new, encoding="utf-8")) + if not b'
' in line: + # write the line out if it wasnt' an olddocs-message: + fout.write(line) + + else: fout.write(line) + shutil.move(fout.name, fullname) @@ -198,6 +224,10 @@ def update_canonical(fullname, last): else: np = None + # figure out the newest version and trim tocheck at the same time: + tocheck = [t for t in tocheck if os.path.exists(t)] + print(tocheck) + # html redirect or soft link most things in the top-level directory that # are not other modules or versioned docs. if not args.no_redirects: From 4799a026e222378c640cbdfe8e40e518a1927dfe Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 4 Feb 2021 03:35:15 -0500 Subject: [PATCH 10/18] Clean up flake8 stylistic things. --- _websiteutils/make_redirects_links.py | 52 ++++++++++++--------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 34cbdc11968..e68be9545b5 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -1,12 +1,9 @@ import argparse -import glob import logging import multiprocessing import os import pathlib import re -import subprocess -import sys import tempfile import shutil @@ -89,14 +86,16 @@ def findlast(fname, tocheck, *, _cache={}): # note these are all one line so they are easy to search and replace in the # html files (otherwise we need to close tags) -warn_banner_exists = ('
You are reading an old ' - 'version of the documentation (v%s). For the latest version see ' - '%s
\n') +warn_banner_exists = ( + '
You are reading an old version of the' + 'documentation (v%s). For the latest version see ' + '%s
\n') -warn_banner_old = ('
You are reading an old ' - 'version of the documentation (v%s). For the latest version see ' - 'https://matplotlib.org/stable/
\n') +warn_banner_old = ( + '
You are reading an old version of the' + 'documentation (v%s). For the latest version see ' + 'https://matplotlib.org/stable/
\n') def do_links(root0): @@ -152,7 +151,7 @@ def do_canonicals(dname): basename = pathlib.Path(*p.parts[1:]) last = findlast(basename, tocheck) if last is not None: - update_canonical(fullname, last, dname==tocheck[1]) + update_canonical(fullname, last, dname == tocheck[1]) def update_canonical(fullname, last, newest): @@ -162,26 +161,23 @@ def update_canonical(fullname, last, newest): removing any other content on a line that has the canonical link. Also add a banner (div) in the body if an old version of the docs. - + Note that if for some reason there are more than one canonical link this will change all of them. """ p = pathlib.Path(fullname) pre = "https://matplotlib.org/" pnew = pathlib.Path(last, *p.parts[1:]) - newcanon = f"{pre+str(pnew)}" - _log.info(f"{p} to {pre+str(pnew)}") + newcanon = f"{pre}{str(pnew)}" + _log.info(f"{p} to {pre}{str(pnew)}") rec = re.compile(b'{ll}") fout.write(ll) found = True @@ -194,12 +190,11 @@ def update_canonical(fullname, last, newest): newcanon) else: new = warn_banner_old % (p.parts[0]) - fout.write(bytes(new, encoding="utf-8")) - if not b'
' in line: - # write the line out if it wasnt' an olddocs-message: + fout.write(new.encode("utf-8")) + if b'
' not in line: + # write the line out if it wasn't an olddocs-message: fout.write(line) - else: fout.write(line) @@ -211,12 +206,10 @@ def update_canonical(fullname, last, newest): parser = argparse.ArgumentParser(description="Optional app description") parser.add_argument("--np", type=int, help="Number of processors to use") - parser.add_argument( - "--no_canonicals", help="do not do canonical links", action="store_true" - ) - parser.add_argument( - "--no_redirects", help="do not do redirects links", action="store_true" - ) + parser.add_argument("--no_canonicals", help="do not do canonical links", + action="store_true") + parser.add_argument("--no_redirects", help="do not do redirects links", + action="store_true") args = parser.parse_args() if args.np: @@ -244,7 +237,8 @@ def update_canonical(fullname, last, newest): _log.info(f"Rewriting HTML: {fullname} in {last}") with open(fullname, "w") as fout: oldname = os.path.join(last, fullname) - st = html_redirect % (oldname, "/" + oldname, oldname) + st = html_redirect % (oldname, "/" + oldname, + oldname) fout.write(st) _log.info("Done links and redirects") From 7543df1570738caba4e80bf3fa96bc1c27ae5822 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 4 Feb 2021 03:36:49 -0500 Subject: [PATCH 11/18] Revert some accidental changes. --- _websiteutils/make_redirects_links.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index e68be9545b5..855869659e7 100644 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -73,12 +73,12 @@ def findlast(fname, tocheck, *, _cache={}): - - + +

- The page been moved to + The page been moved here!

From 1e9977f0fa2b7c30e34fe79bd32b51e23c50cd44 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 4 Feb 2021 03:37:00 -0500 Subject: [PATCH 12/18] Make redirect script executable. --- _websiteutils/make_redirects_links.py | 2 ++ 1 file changed, 2 insertions(+) mode change 100644 => 100755 _websiteutils/make_redirects_links.py diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py old mode 100644 new mode 100755 index 855869659e7..f9edecfab87 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import argparse import logging import multiprocessing From 2f144a8e49bf87da7636bbc52769a476593c3d71 Mon Sep 17 00:00:00 2001 From: Jody Klymak Date: Sun, 7 Feb 2021 08:53:10 -0800 Subject: [PATCH 13/18] DOC: add a bit of docs --- _websiteutils/make_redirects_links.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index f9edecfab87..d62298e7f7b 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -10,7 +10,7 @@ import shutil """ -This script does two things that improve the website organization. +This script does three things that improve the website organization. First, we used to host in the root of the webpage, but have now moved to ``/stable/``. We do not want obsolete links to link to nothing (or that has @@ -21,6 +21,9 @@ Second, it changes the canonical link in each html file to the newest version found of the html file (including stable if its in the latest version.) +Third, the script adds a new div to the top of all the old webpages with +tag ``olddocs-message`` to warn users that the page is obsolete. + This script takes a while, and is destructive, so should probably be run on a branch and pushed as a PR so it can easily be reverted. """ From 1ff05eb04be3ef4b8d8859cf53fdd586d244e574 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 9 Feb 2021 02:51:56 -0500 Subject: [PATCH 14/18] Minor stylistic changes to redirects script. --- _websiteutils/make_redirects_links.py | 50 +++++++++++++-------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index d62298e7f7b..81de0dea051 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -22,7 +22,7 @@ found of the html file (including stable if its in the latest version.) Third, the script adds a new div to the top of all the old webpages with -tag ``olddocs-message`` to warn users that the page is obsolete. +tag ``olddocs-message`` to warn users that the page is obsolete. This script takes a while, and is destructive, so should probably be run on a branch and pushed as a PR so it can easily be reverted. @@ -73,17 +73,16 @@ def findlast(fname, tocheck, *, _cache={}): return None -html_redirect = """ - +html_redirect = """ - - + +

- The page been moved here! + The page been moved here!

@@ -92,14 +91,14 @@ def findlast(fname, tocheck, *, _cache={}): # note these are all one line so they are easy to search and replace in the # html files (otherwise we need to close tags) warn_banner_exists = ( - '
You are reading an old version of the' - 'documentation (v%s). For the latest version see ' - '%s
\n') + '
You are reading an old version of the ' + 'documentation (v{version}). For the latest version see ' + '{url}
\n') warn_banner_old = ( - '
You are reading an old version of the' - 'documentation (v%s). For the latest version see ' + '
You are reading an old version of the ' + 'documentation (v{version}). For the latest version see ' 'https://matplotlib.org/stable/
\n') @@ -123,10 +122,9 @@ def do_links(root0): _log.info(f"Rewriting HTML: {fullname} in {last}") with open(fullname, "w") as fout: oldname = os.path.join(last, fullname) - st = html_redirect % ( - "../" * (depth + 1) + oldname, - "/" + oldname, - "../" * (depth + 1) + oldname, + st = html_redirect.format( + newurl="../" * (depth + 1) + oldname, + canonical=oldname, ) fout.write(st) else: @@ -191,10 +189,10 @@ def update_canonical(fullname, last, newest): fout.write(line) line = next(fin) if last == 'stable': - new = warn_banner_exists % (p.parts[0], newcanon, - newcanon) + new = warn_banner_exists.format(version=p.parts[0], + url=newcanon) else: - new = warn_banner_old % (p.parts[0]) + new = warn_banner_old.format(version=p.parts[0]) fout.write(new.encode("utf-8")) if b'
' not in line: # write the line out if it wasn't an olddocs-message: @@ -208,12 +206,12 @@ def update_canonical(fullname, last, newest): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Optional app description") + parser = argparse.ArgumentParser() parser.add_argument("--np", type=int, help="Number of processors to use") - parser.add_argument("--no_canonicals", help="do not do canonical links", + parser.add_argument("--no-canonicals", help="do not do canonical links", action="store_true") - parser.add_argument("--no_redirects", help="do not do redirects links", + parser.add_argument("--no-redirects", help="do not do redirects links", action="store_true") args = parser.parse_args() @@ -229,8 +227,8 @@ def update_canonical(fullname, last, newest): # html redirect or soft link most things in the top-level directory that # are not other modules or versioned docs. if not args.no_redirects: - for entry in os.scandir("./"): - if not (entry.name in toignore): + for entry in os.scandir("."): + if entry.name not in toignore: if entry.is_dir(): do_links(entry.name) elif entry.name.endswith((".htm", ".html")): @@ -238,12 +236,12 @@ def update_canonical(fullname, last, newest): last = findlast(fullname, tocheck) _log.debug(f"Checking: {fullname} found {last}") if last is not None: - os.remove("./" + fullname) + os.remove(fullname) _log.info(f"Rewriting HTML: {fullname} in {last}") with open(fullname, "w") as fout: oldname = os.path.join(last, fullname) - st = html_redirect % (oldname, "/" + oldname, - oldname) + st = html_redirect.format(newurl=oldname, + canonical=oldname) fout.write(st) _log.info("Done links and redirects") From df8ed616dd53bcf4393d3c10b672eb622d0d468d Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 9 Feb 2021 17:32:17 -0500 Subject: [PATCH 15/18] Use pathlib more in redirect script. --- _websiteutils/make_redirects_links.py | 89 ++++++++++++--------------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 81de0dea051..e08257e65cc 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -31,15 +31,14 @@ _log = logging.getLogger("make_redirect_links") -tocheck = ["stable"] + [ - f"{major}.{minor}.{micro}" +tocheck = [pathlib.Path("stable")] + [ + pathlib.Path(f"{major}.{minor}.{micro}") for major in range(6, -1, -1) for minor in range(6, -1, -1) for micro in range(6, -1, -1) - if pathlib.Path(f"{major}.{minor}.{micro}").exists() ] -toignore = tocheck + [ +toignore = tocheck + [pathlib.Path(p) for p in [ "mpl-probscale", "mpl_examples", "mpl_toolkits", @@ -49,7 +48,7 @@ "robots.txt", "CNAME", ".git", -] +]] logging.basicConfig(level=logging.DEBUG) @@ -60,16 +59,15 @@ def findlast(fname, tocheck, *, _cache={}): Check the directories listed in ``tocheck`` to see if they have ``fname`` in them. Return the first one found, or None """ - p = pathlib.Path(fname) - if p in _cache: - return _cache[p] + if fname in _cache: + return _cache[fname] for t in tocheck: - pnew = pathlib.Path(t, p) + pnew = t / fname if pnew.exists(): - _cache[p] = t + _cache[fname] = t return t else: - _cache[p] = None + _cache[fname] = None return None @@ -111,32 +109,29 @@ def do_links(root0): _log.info(f"Doing links on {root0}") for root, dirs, files in os.walk(root0): for name in files: - fullname = os.path.join(root, name) + fullname = pathlib.Path(root, name) last = findlast(fullname, tocheck) _log.debug(f"Checking: {fullname} found {last}") - depth = root.count("/") if last is not None: - os.remove(fullname) + fullname.unlink() + oldname = last / fullname + # Need to do these relative to where the final is, but note + # that `Path.relative_to` does not allow '.' as a common path + # prefix, so we need to use `os.path.relpath` instead. + relpath = os.path.relpath(oldname, start=fullname.parent) if name.endswith((".htm", ".html")): # make an html redirect. _log.info(f"Rewriting HTML: {fullname} in {last}") - with open(fullname, "w") as fout: - oldname = os.path.join(last, fullname) + with fullname.open("w") as fout: st = html_redirect.format( - newurl="../" * (depth + 1) + oldname, + newurl=relpath, canonical=oldname, ) fout.write(st) else: # soft link - # Need to do these relative to where the link is - # so if it is a level down `ln -s ../3.1.1/boo/who boo/who` - last = os.path.join("..", last) - for i in range(depth): - last = os.path.join("..", last) - oldname = os.path.join(last, fullname) _log.info(f"Linking {fullname} to {oldname}") - os.symlink(oldname, fullname) + fullname.symlink_to(relpath) def do_canonicals(dname): @@ -145,16 +140,12 @@ def do_canonicals(dname): to the newest version. """ _log.debug(f"Walking {dname}") - for root, dirs, files in os.walk(dname): - for name in files: - fullname = os.path.join(root, name) - p = pathlib.Path(fullname) - _log.debug(f"Checking {fullname}") - if name.endswith((".htm", ".html")): - basename = pathlib.Path(*p.parts[1:]) - last = findlast(basename, tocheck) - if last is not None: - update_canonical(fullname, last, dname == tocheck[1]) + for fullname in dname.rglob("*.html"): + _log.debug(f"Checking {fullname}") + basename = pathlib.Path(*fullname.parts[1:]) + last = findlast(basename, tocheck) + if last is not None: + update_canonical(fullname, last, dname == tocheck[1]) def update_canonical(fullname, last, newest): @@ -168,15 +159,14 @@ def update_canonical(fullname, last, newest): Note that if for some reason there are more than one canonical link this will change all of them. """ - p = pathlib.Path(fullname) pre = "https://matplotlib.org/" - pnew = pathlib.Path(last, *p.parts[1:]) + pnew = last.joinpath(*fullname.parts[1:]) newcanon = f"{pre}{str(pnew)}" - _log.info(f"{p} to {pre}{str(pnew)}") + _log.info(f"{fullname} to {pre}{str(pnew)}") rec = re.compile(b'' not in line: # write the line out if it wasn't an olddocs-message: @@ -221,25 +212,25 @@ def update_canonical(fullname, last, newest): np = None # figure out the newest version and trim tocheck at the same time: - tocheck = [t for t in tocheck if os.path.exists(t)] + tocheck = tuple(p for p in tocheck if p.exists()) print(tocheck) # html redirect or soft link most things in the top-level directory that # are not other modules or versioned docs. if not args.no_redirects: for entry in os.scandir("."): - if entry.name not in toignore: + fullname = pathlib.Path(entry.name) + if fullname not in toignore: if entry.is_dir(): do_links(entry.name) - elif entry.name.endswith((".htm", ".html")): - fullname = entry.name + elif fullname.suffix == ".html": last = findlast(fullname, tocheck) _log.debug(f"Checking: {fullname} found {last}") if last is not None: - os.remove(fullname) + fullname.unlink() _log.info(f"Rewriting HTML: {fullname} in {last}") - with open(fullname, "w") as fout: - oldname = os.path.join(last, fullname) + with fullname.open("w") as fout: + oldname = last / fullname st = html_redirect.format(newurl=oldname, canonical=oldname) fout.write(st) From a575269ed5a4e591a0e8b660d708a918c974b6f8 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 9 Feb 2021 18:34:15 -0500 Subject: [PATCH 16/18] Correctly ignore new sitemap location. --- _websiteutils/make_redirects_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index e08257e65cc..2cf7cee4e27 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -44,7 +44,7 @@ "mpl_toolkits", "_webpageutils", "xkcd", - "sitemap.xml", + "_sitemap", "robots.txt", "CNAME", ".git", From 6fcc3b744733fb9081825a96731fa5d3b42ce812 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 9 Feb 2021 18:47:48 -0500 Subject: [PATCH 17/18] Use functools.cache in redirects script. --- _websiteutils/make_redirects_links.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 2cf7cee4e27..5b5f19333ff 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +import functools import logging import multiprocessing import os @@ -53,22 +54,17 @@ logging.basicConfig(level=logging.DEBUG) -# beware of triksy mutable defaults! -def findlast(fname, tocheck, *, _cache={}): +@functools.cache +def findlast(fname, tocheck): """ Check the directories listed in ``tocheck`` to see if they have ``fname`` in them. Return the first one found, or None """ - if fname in _cache: - return _cache[fname] for t in tocheck: pnew = t / fname if pnew.exists(): - _cache[fname] = t return t - else: - _cache[fname] = None - return None + return None html_redirect = """ From 55bc6e31f9e3bace9a43dc1910948338deaa6d62 Mon Sep 17 00:00:00 2001 From: Jody Klymak Date: Tue, 9 Feb 2021 19:17:16 -0800 Subject: [PATCH 18/18] FIX: change to unreleased message --- _websiteutils/make_redirects_links.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_websiteutils/make_redirects_links.py b/_websiteutils/make_redirects_links.py index 5b5f19333ff..4299c9c2654 100755 --- a/_websiteutils/make_redirects_links.py +++ b/_websiteutils/make_redirects_links.py @@ -85,13 +85,13 @@ def findlast(fname, tocheck): # note these are all one line so they are easy to search and replace in the # html files (otherwise we need to close tags) warn_banner_exists = ( - '
You are reading an old version of the ' + '
You are reading an old version of the ' 'documentation (v{version}). For the latest version see ' '{url}
\n') warn_banner_old = ( - '
You are reading an old version of the ' + '
You are reading an old version of the ' 'documentation (v{version}). For the latest version see ' 'https://matplotlib.org/stable/
\n')