diff --git a/.github/workflows/full-release.yml b/.github/workflows/full-release.yml
new file mode 100644
index 0000000..e2e3bdd
--- /dev/null
+++ b/.github/workflows/full-release.yml
@@ -0,0 +1,37 @@
+name: Zipped Release Complete
+
+on:
+ workflow_dispatch:
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+
+ - name: Install wkthtmltopdf
+ run: |
+ sudo apt-get install -y xfonts-base xfonts-75dpi
+ wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb
+ sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb
+
+ - name: Install Liberation Fonts
+ run: |
+ sudo apt-get update -y
+ sudo apt-get install -y fonts-liberation
+
+ - name: Run .py script for Doc Generation
+ run: |
+ cmake . -Bbuild
+ cmake --build build
+
+ - name: Upload zipped doc file to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: build/*.zip
+ tag: Docs-Release
+ overwrite: true
+ file_glob: true
+ body: "This is a release containing both the zip files (one for the Docs Generated, another containing the Generated PDFs) using Github Actions."
\ No newline at end of file
diff --git a/.github/workflows/pdf.yml b/.github/workflows/pdf.yml
new file mode 100644
index 0000000..34dc1dd
--- /dev/null
+++ b/.github/workflows/pdf.yml
@@ -0,0 +1,37 @@
+name: pdf
+
+on:
+ workflow_dispatch:
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+
+ - name: Install wkthtmltopdf
+ run: |
+ sudo apt-get install -y xfonts-base xfonts-75dpi
+ wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb
+ sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb
+
+ - name: Install Liberation Fonts
+ run: |
+ sudo apt-get update -y
+ sudo apt-get install -y fonts-liberation
+
+ - name: Run .py script for Doc Generation
+ run: |
+ cmake . -Bbuild
+ cmake --build build
+
+ - name: Upload zipped doc file to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: build/PDF-Offline-Manual.zip
+ asset_name: PDF-Offline-Manual.zip
+ tag: Docs-Release
+ overwrite: true
+ body: "This is a release containing the PDF version of the Docs Generated using Github Actions."
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 1e27df3..7caaab1 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -12,6 +12,17 @@ jobs:
- name: Checkout
uses: actions/checkout@v2
+ - name: Install wkthtmltopdf
+ run: |
+ sudo apt-get install -y xfonts-base xfonts-75dpi
+ wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb
+ sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb
+
+ - name: Install Liberation Fonts
+ run: |
+ sudo apt-get update -y
+ sudo apt-get install -y fonts-liberation
+
- name: Run .py script for Doc Generation
run: |
cmake . -Bbuild
@@ -21,7 +32,7 @@ jobs:
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
- file: build/docs.zip
+ file: build/Generated-Offline-Manual.zip
asset_name: Generated-Offline-Manual.zip
tag: Docs-Release
overwrite: true
diff --git a/scripts/config.yml b/scripts/config.yml
index 6f409fe..94db412 100644
--- a/scripts/config.yml
+++ b/scripts/config.yml
@@ -1,9 +1,17 @@
---
-url : 'https://en.wikibooks.org/wiki/OpenSCAD_User_Manual'
-url_css : 'https://en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*'
-url_wiki : 'https://en.wikibooks.org'
-url_api : 'https://en.wikibooks.org/w/api.php?action=parse&format=xml&prop=text&page='
+url : https://en.wikibooks.org/wiki/OpenSCAD_User_Manual
+url_css : https://en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*
+url_wiki : https://en.wikibooks.org
+url_api : https://en.wikibooks.org/w/api.php?action=parse&format=xml&prop=text&page=
-pages_for_exclusion : ['https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Example/Strandbeest']
+pages_for_exclusion : [https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Example/Strandbeest]
-user_agent_val : 'Generator-for-Offline-Documentation (https://github.com/abshk-jr ; https://github.com/opencax/GSoC/issues/6 ; https://summerofcode.withgoogle.com/projects/#6746958066089984) urllib/3.9.0 [BeautifulSoup/4.9.0]'
+user_agent_val : Generator-for-Offline-Documentation (https://github.com/abshk-jr ; https://github.com/opencax/GSoC/issues/6 ; https://summerofcode.withgoogle.com/projects/#6746958066089984) urllib/3.9.0 [BeautifulSoup/4.9.0]
+
+url_print : [https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Print_version,https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/The_OpenSCAD_Language]
+
+cheatsheet_url : https://openscad.org/cheatsheet/
+
+options :
+ enable-local-file-access: null
+ --keep-relative-links: ''
\ No newline at end of file
diff --git a/scripts/offline-doc-generator.py b/scripts/offline-doc-generator.py
index 23c91bc..3023deb 100644
--- a/scripts/offline-doc-generator.py
+++ b/scripts/offline-doc-generator.py
@@ -1,287 +1,356 @@
'''
-This is the program for Generator for offline documentation
-more about which can be found out at https://github.com/opencax/GSoC/issues/6
-and the GSOC project details for the same are present at
+This is the code for `Generator for offline documentation`, more
+about which can be read at https://github.com/opencax/GSoC/issues/6
+and the GSOC project details for the same can be checked out at
https://summerofcode.withgoogle.com/projects/#6746958066089984
'''
-import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, os, yaml
+import urllib.request, urllib.parse, os, yaml
from bs4 import BeautifulSoup as bs,Comment, Doctype
import shutil
+import pdfkit
+import platform
+if platform.system() == 'Linux': import cairosvg
with open(os.path.join( os.path.dirname(__file__),'config.yml'),'r') as file:
- config = yaml.safe_load(file)
+ config = yaml.safe_load(file)
+#Update the global variables with the data from config.yml
globals().update(config)
-'''
-The line above does the same work as the code given below
----------------------------------------------------------
-url = config['url']
-url_css = config['url_css']
-url_wiki = config['url_wiki']
-url_api = config['url_api']
-pages_for_exclusion = config['pages_for_exclusion']
-user_agent_val = config['user_agent_val']
----------------------------------------------------------
-'''
-
dir_docs = 'openscad_docs'
dir_imgs = os.path.join( dir_docs, 'imgs')
dir_maths = os.path.join( dir_docs, 'imgs','maths')
dir_styles = os.path.join( dir_docs, 'styles')
-#Create the directories to save the doc if they don't exist
+#Create the directories to save the documentation
if not os.path.exists(dir_docs): os.makedirs(dir_docs)
if not os.path.exists(dir_imgs): os.makedirs(dir_imgs)
if not os.path.exists(dir_maths): os.makedirs(dir_maths)
if not os.path.exists(dir_styles): os.makedirs(dir_styles)
+dir_pdfs = 'openscad_docs_pdf'
+if not os.path.exists(dir_pdfs): os.makedirs(dir_pdfs)
+dir_docpdfs = 'docs_pdf'
+if not os.path.exists(dir_docpdfs): os.makedirs(dir_docpdfs)
+dir_pdfimgs = os.path.join( dir_pdfs, 'imgs')
+if not os.path.exists(dir_pdfimgs): os.makedirs(dir_pdfimgs)
+dir_pdfmaths = os.path.join( dir_pdfs, 'imgs', 'maths')
+if not os.path.exists(dir_pdfmaths): os.makedirs(dir_pdfmaths)
+
pages =[]
pages += pages_for_exclusion
imgs =[]
maths =[]
-def getUrl(url):
- '''
- This function generates the complete url after getting urls form src
- /wiki/OpenSCAD_User_Manual get converted to https://en.wikibooks.org/wiki/OpenSCAD_User_Manual
-
- '''
- if url.startswith('//'):
- url = 'https:'+url
- elif not url.startswith( url_wiki ):
- url = urllib.parse.urljoin( url_wiki, url[0]=="/" and url[1:] or url)
- return url
-
-def getTags(soup):
- '''
- This function handles the different tags present in the HTML document
- for example the image tags
-
- '''
- for a in soup.find_all('a'):
- href= a.get('href')
- if href:
- if href[0] != '#':
- href = getUrl(href)
- if (href.startswith('/wiki/OpenSCAD_User_Manual') or href.startswith(url_wiki + '/wiki/OpenSCAD_User_Manual')):
- newhref = (href.replace('#', '.html#') if '#' in href else href+'.html').split('/')[-1]
-
- if 'Print_version.html' not in newhref:
- getPages(url=href)
- a['href']= newhref
-
- if a.img :
- getImages( a )
-
-def getMaths(soup):
- '''
- This function generates the image version of the math formulas
- to be displayed in various HTML files, for example
- https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Mathematical_Operators
- and saves them to the directory /openscad_docs/imgs/maths
-
- '''
- for img in soup.find_all('img'):
- try:
- for cls in img['class']:
- if('math' in cls):
- mathname = img['src'].split("/")[-1].split("\\")[-1] + '.svg'
- savepath = os.path.join( dir_maths, mathname)
- if (not mathname in maths):
- opener = urllib.request.build_opener()
- opener.addheaders = [('User-Agent',user_agent_val)]
- urllib.request.install_opener(opener)
- urllib.request.urlretrieve( img['src'] , savepath )
- maths.append( mathname )
- linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/')
- img['src'] = linkurl
-
- except:
- pass
-
-def getImages(tag):
- '''
- This function generates the images present the in HTML documents
- and saves them to the directory /openscad_docs/imgs
-
- '''
- src = getUrl( tag.img['src'] )
- imgname = src.split("/")[-1]
- imgname = imgname.replace('%','_')
- imgpath = os.path.join( dir_imgs, imgname)
-
- #The following is to download the image if it hasn't alrady been downloaded
- if not imgpath in imgs:
- opener = urllib.request.build_opener()
- opener.addheaders = [('User-Agent',user_agent_val)]
- urllib.request.install_opener(opener)
- urllib.request.urlretrieve(src , imgpath)
- imgs.append(imgpath)
-
- del tag.img['srcset']
- imgpath = os.path.join('.', 'imgs', imgname).replace('\\','/')
- tag.img['src'] = imgpath
- tag['href']= imgpath
-
-def cleanSoup(soup):
- '''
- This function cleans the soup by removing the redundant HTML tags
- and the parts that are unrelated to the User Manual
- '''
-
- #The following deletes the Tags which aren't required in the User Manual
- red_div_cls = ["printfooter","catlinks","noprint","magnify"]
- red_table_cls= ['noprint','ambox']
- red_input_cls= ['toctogglecheckbox']
- for cls in red_div_cls:
- for tag in soup.findAll('div',{'class':cls}):
- tag.decompose()
- for cls in red_table_cls:
- for tag in soup.findAll('table',{'class':cls}):
- tag.decompose()
- for cls in red_input_cls:
- for tag in soup.findAll('input',{'class':cls}):
- tag.decompose()
- for tag in soup.findAll('style'):
- tag.decompose()
-
- #The following removes the comments present in the HTML document
- comments = soup.findAll(text=lambda text: isinstance(text, Comment))
- [comment.extract() for comment in comments]
-
- #The following replaces the redundant div Tags with the content present in inside of them
- rep_div_cls = ["mw-highlight"]
- for kls in rep_div_cls:
- for tag in soup.findAll('div',kls):
- tag.replaceWithChildren()
-
- #The following is for the cleaning of some redundant li tags
- for _ in range(0,7):
- for tag in soup.findAll('li',{'class':f'toclevel-{_}'}):
- del tag['class']
-
- #The following is for the cleaning/removal of some redundant span tags
- for tag in soup.findAll('span'):
- try:
- if(len(tag.text)==0):
- tag.decompose()
- for cls in tag['class']:
- if(len(cls) <= 2):
- tag.replaceWithChildren()
- if('mathml' in cls):
- tag.decompose()
- if cls in ['toctext']:
- tag.replaceWithChildren()
- if cls in ['mw-headline']:
- del tag['class']
- if cls in ['mw-editsection','toctogglespan','noprint']:
- tag.decompose()
-
-
- except:
- pass
-
- for tag in soup.findAll('ul'):
- tag['style'] = 'list-style-image:none'
+def getParsedUrl(url):
+ '''
+ This function generates a parsed url after accepting the url from the src inside the tags
+ e.g. /wiki/OpenSCAD_User_Manual gets converted to https://en.wikibooks.org/wiki/OpenSCAD_User_Manual
+
+ '''
+ if url.startswith('//'):
+ url = 'https:'+url
+ elif not url.startswith( url_wiki ):
+ url = urllib.parse.urljoin( url_wiki, url[0]=="/" and url[1:] or url)
+ return urllib.parse.urlparse(url)
+
+def getTags(soup,pdf,cs=False):
+ '''
+ This function handles the different tags present in the HTML document
+ e.x. updating the tags with the new links, or handling the
tags
+
+ '''
+ for a in soup.find_all('a'):
+ href= a.get('href')
+ if href:
+ if href[0] != '#':
+ if cs:
+ href = href.replace('w/index.php?title=','wiki/')
+ hrefparse = getParsedUrl(href)
+ hrefurl=hrefparse.geturl()
+ if pdf:
+ a['href']= hrefurl
+ elif hrefparse.path.startswith('/wiki/OpenSCAD_User_Manual'):
+ newhref = (hrefurl.replace('#', '.html#') if '#' in hrefurl else hrefurl+'.html').split('/')[-1]
+
+ if 'Print_version.html' not in newhref:
+ if not cs:
+ getPages(url=hrefurl)
+ a['href']= newhref
+
+ if a.img :
+ getImages( a,pdf )
+
+def getMaths(soup,pdf):
+ '''
+ This function downloads the SVG files for the Math Formulas
+ that are being used on various pages, for example at
+ https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Mathematical_Operators
+ and saves them to the directory /openscad_docs/imgs/maths
+
+ '''
+ for img in soup.find_all('img'):
+ try:
+ for cls in img['class']:
+ if('math' in cls):
+ mathname = img['src'].split("/")[-1].split("\\")[-1] + '.svg'
+ savepath = os.path.join( dir_maths, mathname) if not pdf else os.path.join( dir_pdfmaths, mathname)
+ savepath_png = savepath.replace('.svg','.png')
+ if (not mathname in maths) or pdf:
+ opener = urllib.request.build_opener()
+ opener.addheaders = [('User-Agent',user_agent_val)]
+ urllib.request.install_opener(opener)
+ urllib.request.urlretrieve( img['src'] , savepath )
+ if pdf and platform.system() == 'Linux':
+ '''
+ This part of the code converts the SVGs to PNGs if the program is being run on Linux,
+ to overcome the issue where WebKit Engine renders the SVG images at incorrrect sizing
+ '''
+ cairosvg.svg2png(url=savepath, write_to=savepath_png)
+ os.remove(savepath)
+ maths.append( mathname )
+ if pdf and platform.system() == 'Linux':
+ linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/').replace('.svg','.png')
+ else:
+ linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/')
+ img['src'] = linkurl
+
+ except:
+ pass
+
+def getImages(tag,pdf):
+ '''
+ This function downloads the images present in the HTML files
+ and saves them to the directory - /openscad_docs/imgs
+
+ '''
+ srcparse = getParsedUrl( tag.img['src'] )
+ imgname = srcparse.path.split("/")[-1]
+ imgname = imgname.replace('%','_')
+ imgpath = os.path.join( dir_imgs, imgname) if not pdf else os.path.join( dir_pdfimgs, imgname)
+
+ #The following is to download the image if it hasn't alrady been downloaded
+ if not imgpath in imgs:
+ opener = urllib.request.build_opener()
+ opener.addheaders = [('User-Agent',user_agent_val)]
+ urllib.request.install_opener(opener)
+ urllib.request.urlretrieve(srcparse.geturl() , imgpath)
+ imgs.append(imgpath)
+
+ del tag.img['srcset']
+ imgpath = os.path.join('.', 'imgs', imgname).replace('\\','/')
+ tag.img['src'] = imgpath
+ tag['href']= imgpath
+
+def cleanSoup(soup,pdf):
+ '''
+ This function cleans the HTML by removing the redundant tags
+ and the sections which are not necessary for the User Manual
+ '''
+
+ #The following deletes the Tags which aren't required in the User Manual
+ red_dict = {'div' : ["printfooter","catlinks","noprint","magnify"], 'table' : ['noprint'], 'input' : ['toctogglecheckbox']}
+ for tag,cls_list in red_dict.items():
+ for cls in cls_list:
+ for tag in soup.findAll(tag,{'class':cls}):
+ tag.decompose()
+
+ for tag in soup.findAll('table',{'class':'ambox'}):
+ tag.decompose()
+
+ for tag in soup.findAll('style'):
+ tag.decompose()
+
+ #The following removes the comments present in the HTML document
+ comments = soup.findAll(text=lambda text: isinstance(text, Comment))
+ [comment.extract() for comment in comments]
+
+ #The following replaces the redundant div Tags with the content present inside of them
+ rep_div_cls = ["mw-highlight"]
+ for kls in rep_div_cls:
+ for tag in soup.findAll('div',kls):
+ tag.replaceWithChildren()
+
+ #The following removes the non-contributing classes in li tags
+ for _ in range(0,7):
+ for tag in soup.findAll('li',{'class':f'toclevel-{_}'}):
+ del tag['class']
+
+ #The following is for the removal/cleaning of some redundant span tags
+ for tag in soup.findAll('span'):
+ try:
+ if(len(tag.text)==0):
+ tag.decompose()
+ for cls in tag['class']:
+ if(len(cls) <= 2):
+ tag.replaceWithChildren()
+ elif cls in ['toctext'] or (pdf and cls in ['tocnumber']):
+ tag.replaceWithChildren()
+ elif cls in ['mw-headline']:
+ del tag['class']
+ elif 'mathml' in cls or cls in ['mw-editsection','toctogglespan','noprint']:
+ tag.decompose()
+
+ except:
+ pass
+
+ #The following is to replace the tabs in the code blocks with spaces
+ for txt in soup.findAll('pre'):
+ txt.string = txt.text.replace('\t',' ')
+ if pdf:
+ if platform.system() == 'Linux':
+ for _ in soup.findAll('pre'):
+ _['style']="font-family:'Liberation Mono'"
+
+ #The following unwraps the tables in the pdfs for a better formatting
+ if pdf:
+ for table in soup.findAll('table'):
+ for row in table.findAll('tr'):
+ for col in row.findAll('td'):
+ col.unwrap()
+ row.unwrap()
+ table.unwrap()
+
+ for tag in soup.findAll('ul'):
+ tag['style'] = 'list-style-image:none'
def getFooter( url, name ):
- '''
- This function generates the Footer with the license attribution for all the pages
+ '''
+ This function generates the Footer containing the necessary license attribution
- '''
- footer = (f'''''')
+ '''
+ footer = (f'''''')
- return bs(footer,'html.parser')
+ return bs(footer,'html.parser')
def getStyled(soup,title):
- tag = Doctype('html')
- soup.insert(0, tag)
- soup.html['lang']='en'
- meta_tag = soup.new_tag('meta')
- meta_tag['charset'] = 'UTF-8'
- soup.head.insert(0,meta_tag)
- css_tag = bs('','html.parser')
- soup.head.append(css_tag)
- soup.body['class'] = 'mw-body'
- soup.body['style']=['height:auto']
- del soup.body.div['class']
- soup.body.div['id']='bodyContent'
- h1_tag = bs(f'{title}
','html.parser')
- soup.body.insert(0,h1_tag)
-
-def getPages( url=url,folder=dir_docs ):
- '''
- This is the main function of the program
- which generates the HTML document from the given url
- and calls different functions to generate the Offline
- version of the page and save it under the directory /openscad_docs
-
- '''
- url = getUrl(url)
- if url.split("#")[0] not in pages:
- pages.append( url.split("#")[0] ) #add the url to the `pages` list so that they don't get downloaded again
- wiki_url = url
- url = url.replace(url_wiki+'/wiki/', "")
- url = url_api + url
-
- request = urllib.request.Request(url)
- request.add_header('User-Agent',user_agent_val)
- response = urllib.request.urlopen(request)
- xml = response.read()
- soup = bs(xml, 'lxml')
- soup = soup.text
- soup = bs(soup,'html5lib')
-
- name = url.split("=")[-1]
- name = name.split("/")[-1].split('#')[0] #to convert OpenSCAD_User_Manual/String_Functions#str to String_Functions
-
- title = soup.new_tag("title") #to add title to the pages
- title.string = name.replace("_" , " ")
- soup.html.head.append(title)
-
- name = name + ".html"
- filepath = os.path.join( folder, name)
-
- print("Saving: ", filepath)
-
- getStyled(soup,title.string)
- cleanSoup(soup)
- getMaths(soup)
- getTags(soup)
-
- soup.body.append( getFooter( wiki_url, title.text ))
-
- open(filepath, "w", encoding="utf-8").write( str(soup) )
-
-
-def getCSS():
- '''
- This function runs once after the HTML files have been downloaded
- and downloads the CSS given at https://www.mediawiki.org/wiki/API:Styling_content
- and saves it to openscad_docs/styles
-
- '''
- request = urllib.request.Request(url_css)
- request.add_header('User-Agent',user_agent_val)
- response = urllib.request.urlopen(request)
- css_soup = response.read()
- css = bs(css_soup, 'html5lib')
- csspath = os.path.join( dir_styles, 'style.css')
- open( csspath, "w" , encoding="utf-8").write(css.body.text)
-
-
-
+ tag = Doctype('html')
+ soup.insert(0, tag)
+ soup.html['lang']='en'
+ meta_tag = soup.new_tag('meta')
+ meta_tag['charset'] = 'UTF-8'
+ soup.head.insert(0,meta_tag)
+ css_tag = bs('','html.parser')
+ soup.head.append(css_tag)
+ soup.body['class'] = 'mw-body'
+ soup.body['style']=['height:auto;background-color:#ffffff']
+ del soup.body.div['class']
+ soup.body.div['id']='bodyContent'
+ h1_tag = bs(f'{title}
','html.parser')
+ soup.body.insert(0,h1_tag)
+
+def getPages( url=url,folder=dir_docs,pdf=False ):
+ '''
+ This is the main function of the program
+ which downloads the webpage at the given url
+ and calls different functions to generate the Offline
+ version of the page and save it under the directory /openscad_docs
+
+ '''
+ if url.split("#")[0] not in pages or pdf:
+ pages.append( url.split("#")[0] ) #adds the url to the `pages` list so that the page doesn't get downloaded again
+ wiki_url = url
+ url = url.replace(url_wiki+'/wiki/', "")
+ url = url_api + url
+
+ request = urllib.request.Request(url)
+ request.add_header('User-Agent',user_agent_val)
+ response = urllib.request.urlopen(request)
+ xml = response.read()
+ soup = bs(xml, 'html.parser')
+ soup = soup.text
+ soup = bs(soup,'html5lib')
+
+ name = url.split("=")[-1]
+ name = name.split("/")[-1].split('#')[0] #converts OpenSCAD_User_Manual/String_Functions#str to String_Functions
+
+ if pdf==True: name = 'OpenSCAD_User_Manual' if (name == 'Print_version') else name
+
+ title = soup.new_tag("title") #adds title to the pages
+ title.string = name.replace("_" , " ")
+ soup.html.head.append(title)
+
+ name = name + ".html"
+ filepath = os.path.join( folder, name)
+
+ print("Saving: ", filepath)
+
+ getStyled(soup,title.string)
+ cleanSoup(soup,pdf)
+ getMaths(soup,pdf)
+ getTags(soup,pdf,False)
+
+ soup.body.append( getFooter( wiki_url, title.text ))
+
+ open(filepath, "w", encoding="utf-8").write( str(soup) )
+
+
+def getCSS(url = url_css, css_name = 'style.css'):
+ '''
+ This function runs once after the HTML files have been downloaded
+ and downloads the CSS given at https://www.mediawiki.org/wiki/API:Styling_content
+ and saves it to openscad_docs/styles
+
+ '''
+ request = urllib.request.Request(url)
+ request.add_header('User-Agent',user_agent_val)
+ response = urllib.request.urlopen(request)
+ css_soup = response.read()
+ css = bs(css_soup, 'html5lib')
+ csspath = os.path.join( dir_styles, css_name)
+ open( csspath, "w" , encoding="utf-8").write(css.body.text)
+
+def getPdf():
+ for link in url_print:
+ getPages(link,folder=dir_pdfs,pdf=True)
+ if os.path.exists(f'{os.path.join( os.getcwd(), dir_pdfs)}/styles'):shutil.rmtree(f'{os.path.join( os.getcwd(), dir_pdfs)}/styles')
+ shutil.copytree(f'{os.path.join( os.getcwd(), dir_docs)}/styles', f'{os.path.join( os.getcwd(), dir_pdfs)}/styles')
+
+def cheatSheet():
+ '''
+ This function is run once to download the Cheat Sheet from
+ https://openscad.org/cheatsheet/ and the WikiBooks links
+ are changed to now redirect to the Manual saved offline
+
+ '''
+ request = urllib.request.Request(cheatsheet_url)
+ response = urllib.request.urlopen(request)
+ soup = response.read()
+ soup = bs(soup,'lxml')
+ for css in soup.find_all("link",href=True):
+ css_name = css.attrs.get("href")
+ url_css = urllib.parse.urljoin(cheatsheet_url, css_name)
+ if '.css' in url_css:
+ getCSS(url_css,css_name.split('/')[-1])
+ css['href'] = css['href'].replace('css/','styles/')
+ getTags(soup,False,True)
+ filepath = os.path.join( dir_docs , 'CheatSheet.html')
+ open(filepath, "w", encoding="utf-8").write( str(soup) )
+
+
+
if(__name__ == '__main__'):
- print(f'Started Offline Generator.py\nNow downloading the User-Manual from {url}')
- getPages(url)
- getCSS()
- print("Total number of pages generated is \t:\t", len(pages)-len(pages_for_exclusion))
- print("Total number of images generated is \t:\t", len(imgs))
- print("Total number of math-images generated is:\t", len(maths))
- shutil.make_archive('docs', 'zip', dir_docs)
+ print(f'Started Offline Generator.py\nNow downloading the User-Manual from {url}')
+ getPages(url)
+ getCSS()
+ print("Total number of pages generated is \t:\t", len(pages)-len(pages_for_exclusion))
+ print("Total number of images generated is \t:\t", len(imgs))
+ print("Total number of math-images generated is:\t", len(maths))
+ cheatSheet()
+ shutil.make_archive('Generated-Offline-Manual', 'zip', dir_docs)
+
+ getPdf()
+ files=os.listdir(os.path.join( os.getcwd(), dir_pdfs))
+ for file in files:
+ if ".html" in file:
+ file_pdf = file.replace('.html','.pdf')
+ pdfkit.from_file(f'{os.path.join( os.getcwd(), dir_pdfs)}/{file}', f'{os.path.join( os.getcwd(), dir_docpdfs)}/{file_pdf}' , options=options)
+
+ shutil.make_archive('PDF-Offline-Manual', 'zip', dir_docpdfs)
+ shutil.rmtree(dir_pdfs)
+ shutil.rmtree(dir_docpdfs)
\ No newline at end of file
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 71e0fe0..58f6738 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,4 +1,6 @@
beautifulsoup4==4.9.3
PyYAML==5.4.1
lxml==4.6.3
-html5lib==1.1
\ No newline at end of file
+html5lib==1.1
+pdfkit==0.6.1
+CairoSVG==2.5.2
\ No newline at end of file