Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 298b671

Browse files
committed
UTIL: add utility to redirect everything
1 parent 512a813 commit 298b671

File tree

1 file changed

+202
-0
lines changed

1 file changed

+202
-0
lines changed

_websiteutils/make_redirects_links.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import argparse
2+
import glob
3+
import logging
4+
import multiprocessing
5+
import os
6+
import pathlib
7+
import re
8+
import subprocess
9+
import sys
10+
import tempfile
11+
12+
13+
"""
14+
This script does two things that improve the website organization.
15+
16+
First, we used to host in the root of the webpage, but have now moved to
17+
``/stable/``. We do not want obsolete links to link to nothing (or that has
18+
been our policy), so we currently just keep the old version at the top level.
19+
Here, instead, we either softlink to the newest version, or replace the file by
20+
an html refresh redirect.
21+
22+
Second, it changes the canonical link in each html file to the newest version
23+
found of the html file (including stable if its in the latest version.)
24+
25+
This script takes a while, and is desctructive, so should probably be run on a
26+
branch and pushed as a PR so it can easily be reverted.
27+
"""
28+
29+
_log = logging.getLogger('make_redirect_links')
30+
31+
32+
tocheck = ['stable'] + [f'{ver}.{minver}.{minminver}'
33+
for ver in range(6, -1, -1)
34+
for minver in range(6,-1,-1)
35+
for minminver in range(6,-1, -1)]
36+
37+
toignore = tocheck + ['mpl-probscale', 'mpl_examples', 'mpl_toolkits',
38+
'_webpageutils' 'plot_directive', 'xkcd', 'sitemap.xml',
39+
'robots.txt', 'CNAME', '.git']
40+
41+
logging.basicConfig(level=logging.DEBUG)
42+
43+
44+
def findlast(fname, tocheck):
45+
"""
46+
Check the directories listed in ``tocheck`` to see if they have
47+
``fname`` in them. Return the first one found, or None
48+
"""
49+
p = pathlib.Path(fname)
50+
for t in tocheck:
51+
pnew = pathlib.Path(t, *p.parts[:])
52+
if os.path.exists(pnew ):
53+
return t
54+
else:
55+
return None
56+
57+
html_redirect = """
58+
<!DOCTYPE HTML>
59+
<html lang="en">
60+
<head>
61+
<meta charset="utf-8">
62+
<meta http-equiv="refresh" content="0;url=https://matplotlib.org%s" />
63+
<link rel="canonical" href="https://matplotlib.org%s" />
64+
</head>
65+
<body>
66+
<h1>
67+
The page been moved to <a href="https://matplotlib.org%s"</a>
68+
</h1>
69+
</body>
70+
</html>
71+
"""
72+
73+
74+
def do_links(root0):
75+
"""
76+
Either soft link a file at the top level to its newest position,
77+
or make an html redirect if it is an html file.
78+
"""
79+
_log.info(f'Doing links on {root0}')
80+
for root, dirs, files in os.walk(root0):
81+
for name in files:
82+
fullname = os.path.join(root, name)
83+
last = findlast(fullname, tocheck)
84+
_log.debug(f'Checking: {fullname} found {last}')
85+
if last is not None:
86+
os.remove('./'+fullname)
87+
if name.endswith(('.htm', '.html')):
88+
# make an html redirect.
89+
_log.info(f'Rewriting HTML: {fullname} in {last}')
90+
with open(fullname, 'w') as fout:
91+
oldname = '/' + os.path.join(last, fullname)
92+
st = html_redirect % (oldname, oldname, oldname)
93+
fout.write(st)
94+
else:
95+
# soft link
96+
# Need to do these relative to where the link is
97+
# so if it is a level down `ln -s ../3.1.1/boo/who boo/who`
98+
last = os.path.join('..', last)
99+
depth = root.count('/')
100+
for i in range(depth):
101+
last = os.path.join('..', last)
102+
oldname = os.path.join(last, fullname)
103+
_log.info(f'Linking {fullname} to {oldname}')
104+
os.symlink(oldname, fullname)
105+
for d in dirs:
106+
do_links(d)
107+
108+
109+
def do_canonicals(dname):
110+
"""
111+
For each html file in the versioned docs, make the canonical link point
112+
to the newest version.
113+
"""
114+
_log.debug(f'Walking {dname}')
115+
for root, dirs, files in os.walk(dname):
116+
for name in files:
117+
fullname = os.path.join(root, name)
118+
p = pathlib.Path(fullname)
119+
_log.debug(f'Checking {fullname}')
120+
if name.endswith(('.htm', '.html')):
121+
basename = pathlib.Path(*p.parts[1:])
122+
last = findlast(basename, tocheck)
123+
if last is not None:
124+
update_canonical(fullname, last)
125+
126+
for d in dirs:
127+
_log.info(f'DIR: {d}')
128+
do_canonicals(os.path.join(dname,d))
129+
130+
131+
def update_canonical(fullname, last):
132+
"""
133+
Change the canonical link in *fullname* to the same link in the
134+
version given by *last*. We do this with a regexp to prevent
135+
removing any other content on a line that has the canonical link.
136+
137+
Note that if for some reason there are more than one canonical link
138+
this will change all of them.
139+
"""
140+
p = pathlib.Path(fullname)
141+
pre = 'https://matplotlib.org/'
142+
pnew = pathlib.Path(last, *p.parts[1:])
143+
newcanon = f'{pre+str(pnew)}'
144+
_log.info(f'{p} to {pre+str(pnew)}')
145+
with tempfile.NamedTemporaryFile(delete=False) as fout:
146+
with open(fullname,'r') as fin:
147+
for line in fin:
148+
if '<link rel="canonical"' in line:
149+
ll = re.sub('<link rel="canonical" href=".*"',
150+
f'<link rel="canonical" href="{newcanon}"',
151+
line)
152+
_log.debug(f'new {line}->{ll}')
153+
fout.write(bytes(ll, encoding='utf-8'))
154+
else:
155+
fout.write(bytes(line, encoding = 'utf-8'))
156+
os.rename(fout.name, fullname)
157+
158+
159+
if __name__ == "__main__":
160+
161+
parser = argparse.ArgumentParser(description='Optional app description')
162+
163+
parser.add_argument('--np', type=int, help='Number of processors to use')
164+
parser.add_argument('--no_canonicals', help='do not do canonical links',
165+
action="store_true")
166+
parser.add_argument('--no_redirects', help='do not do redirects links',
167+
action="store_true")
168+
169+
args = parser.parse_args()
170+
if args.np:
171+
np = args.np
172+
else:
173+
np = None
174+
175+
# html redirect or soft link most things in the top-level directory that
176+
# are not other modules or versioned docs.
177+
if not args.no_redirects:
178+
for entry in os.scandir('./'):
179+
if not (entry.name in toignore):
180+
if entry.is_dir():
181+
do_links(entry.name)
182+
elif entry.name.endswith(('.htm', '.html')):
183+
fullname = entry.name
184+
last = findlast(fullname, tocheck)
185+
_log.debug(f'Checking: {fullname} found {last}')
186+
if last is not None:
187+
os.remove('./'+fullname)
188+
_log.info(f'Rewriting HTML: {fullname} in {last}')
189+
with open(fullname, 'w') as fout:
190+
oldname = '/' + os.path.join(last, fullname)
191+
st = html_redirect % (oldname, oldname, oldname)
192+
fout.write(st)
193+
_log.info('Done links and redirects')
194+
195+
# change the canonical url for all html to the newest version in the docs:
196+
if not args.no_canonicals:
197+
if np is not None:
198+
with multiprocessing.Pool(np) as pool:
199+
pool.map(do_canonicals, tocheck[1:])
200+
else:
201+
for t in tocheck[1:]:
202+
do_canonicals(t)

0 commit comments

Comments
 (0)