Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d4f4528

Browse files
committed
UTIL: add utility to redirect everything
1 parent 512a813 commit d4f4528

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

_websiteutils/make_redirects_links.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import glob
2+
import os
3+
import subprocess
4+
import pathlib
5+
import tempfile
6+
import re
7+
import logging
8+
9+
"""
10+
This script does two things that improve the website organization.
11+
12+
First, we used to host in the root of the webpage, but have now moved to
13+
``/stable/``. We do not want obsolete links to link to nothing (or that has
14+
been our policy), so we currently just keep the old version at the top level.
15+
Here, instead, we either softlink to the newest version, or replace the file by
16+
an html refresh redirect.
17+
18+
Second, it changes the canonical link in each html file to the newest version
19+
found of the html file (including stable if its in the latest version.)
20+
21+
This script takes a while, and is desctructive, so should probably be run on a
22+
branch and pushed as a PR so it can easily be reverted.
23+
"""
24+
25+
_log = logging.getLogger('make_redirect_links')
26+
27+
28+
tocheck = ['stable'] + [f'{ver}.{minver}.{minminver}'
29+
for ver in range(6, -1, -1)
30+
for minver in range(6,-1,-1)
31+
for minminver in range(6,-1, -1)]
32+
33+
toignore = tocheck + ['mpl-probscale', 'mpl_examples', 'mpl_toolkits',
34+
'_webpageutils' 'plot_directive', 'xkcd', 'sitemap.xml',
35+
'robots.txt', 'CNAME', '.git']
36+
37+
logging.basicConfig(level=logging.DEBUG)
38+
39+
40+
def findlast(fname, tocheck):
41+
"""
42+
Check the directories listed in ``tocheck`` to see if they have
43+
``fname`` in them. Return the first one found, or None
44+
"""
45+
p = pathlib.Path(fname)
46+
for t in tocheck:
47+
pnew = pathlib.Path(t, *p.parts[:])
48+
if os.path.exists(pnew ):
49+
return t
50+
else:
51+
return None
52+
53+
html_redirect = """
54+
<!DOCTYPE HTML>
55+
<html lang="en">
56+
<head>
57+
<meta charset="utf-8">
58+
<meta http-equiv="refresh" content="0;url=https://matplotlib.org%s" />
59+
<link rel="canonical" href="https://matplotlib.org%s" />
60+
</head>
61+
<body>
62+
<h1>
63+
The page been moved to <a href="https://matplotlib.org%s"</a>
64+
</h1>
65+
</body>
66+
</html>
67+
"""
68+
69+
70+
def do_links(root0):
71+
"""
72+
Either soft link a file at the top level to its newest position,
73+
or make an html redirect if it is an html file.
74+
"""
75+
_log.info(f'Doing links on {root0}')
76+
for root, dirs, files in os.walk(root0):
77+
for name in files:
78+
fullname = os.path.join(root, name)
79+
last = findlast(fullname, tocheck)
80+
_log.debug(f'Checking: {fullname} found {last}')
81+
if last is not None:
82+
os.remove('./'+fullname)
83+
if name.endswith(('.htm', '.html')):
84+
# make an html redirect.
85+
_log.info(f'Rewriting HTML: {fullname} in {last}')
86+
with open(fullname, 'w') as fout:
87+
oldname = '/' + os.path.join(last, fullname)
88+
st = html_redirect % (oldname, oldname, oldname)
89+
fout.write(st)
90+
else:
91+
# soft link
92+
# Need to do these relative to where the link is
93+
# so if it is a level down `ln -s ../3.1.1/boo/who boo/who`
94+
last = os.path.join('..', last)
95+
depth = root.count('/')
96+
for i in range(depth):
97+
last = os.path.join('..', last)
98+
oldname = os.path.join(last, fullname)
99+
_log.info(f'Linking {fullname} to {oldname}')
100+
os.symlink(oldname, fullname)
101+
for d in dirs:
102+
do_links(d)
103+
104+
105+
def do_canonicals(dname):
106+
"""
107+
For each html file in the versioned docs, make the canonical link point
108+
to the newest version.
109+
"""
110+
_log.debug(f'Walking {dname}')
111+
for root, dirs, files in os.walk(dname):
112+
for name in files:
113+
fullname = os.path.join(root, name)
114+
p = pathlib.Path(fullname)
115+
_log.debug(f'Checking {fullname}')
116+
if name.endswith(('.htm', '.html')):
117+
basename = pathlib.Path(*p.parts[1:])
118+
last = findlast(basename, tocheck)
119+
if last is not None:
120+
update_canonical(fullname, last)
121+
122+
for d in dirs:
123+
_log.info(f'DIR: {d}')
124+
do_canonicals(os.path.join(dname,d))
125+
126+
127+
def update_canonical(fullname, last):
128+
"""
129+
Change the canonical link in *fullname* to the same link in the
130+
version given by *last*. We do this with a regexp to prevent
131+
removing any other content on a line that has the canonical link.
132+
133+
Note that if for some reason there are more than one canonical link
134+
this will change all of them.
135+
"""
136+
p = pathlib.Path(fullname)
137+
pre = 'https://matplotlib.org/'
138+
pnew = pathlib.Path(last, *p.parts[1:])
139+
newcanon = f'{pre+str(pnew)}'
140+
_log.info(f'{p} to {pre+str(pnew)}')
141+
with tempfile.NamedTemporaryFile(delete=False) as fout:
142+
with open(fullname,'r') as fin:
143+
for line in fin:
144+
if '<link rel="canonical"' in line:
145+
ll = re.sub('<link rel="canonical" href=".*"',
146+
f'<link rel="canonical" href="{newcanon}"',
147+
line)
148+
_log.debug(f'new {line}->{ll}')
149+
fout.write(bytes(ll, encoding='utf-8'))
150+
else:
151+
fout.write(bytes(line, encoding = 'utf-8'))
152+
os.rename(fout.name, fullname)
153+
154+
155+
def main():
156+
# html redirect or soft link most things in the top-level directory that
157+
# are not other modules or versioned docs.
158+
for entry in os.scandir('./'):
159+
if entry.is_dir():
160+
if not (entry.name in toignore):
161+
do_links(entry.name)
162+
_log.info('Done links and redirects')
163+
164+
# change the canonical url for all html to the newest version in the docs:
165+
for entry in os.scandir('./'):
166+
if entry.is_dir() and (entry.name in tocheck[1:]):
167+
_log.info(f'TOP DIR: {entry.name}')
168+
do_canonicals(entry.name)
169+
170+
if __name__ == "__main__":
171+
main()

0 commit comments

Comments
 (0)