diff --git a/.github/workflows/full-release.yml b/.github/workflows/full-release.yml new file mode 100644 index 0000000..e2e3bdd --- /dev/null +++ b/.github/workflows/full-release.yml @@ -0,0 +1,37 @@ +name: Zipped Release Complete + +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install wkthtmltopdf + run: | + sudo apt-get install -y xfonts-base xfonts-75dpi + wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb + sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb + + - name: Install Liberation Fonts + run: | + sudo apt-get update -y + sudo apt-get install -y fonts-liberation + + - name: Run .py script for Doc Generation + run: | + cmake . -Bbuild + cmake --build build + + - name: Upload zipped doc file to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: build/*.zip + tag: Docs-Release + overwrite: true + file_glob: true + body: "This is a release containing both the zip files (one for the Docs Generated, another containing the Generated PDFs) using Github Actions." \ No newline at end of file diff --git a/.github/workflows/pdf.yml b/.github/workflows/pdf.yml new file mode 100644 index 0000000..34dc1dd --- /dev/null +++ b/.github/workflows/pdf.yml @@ -0,0 +1,37 @@ +name: pdf + +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install wkthtmltopdf + run: | + sudo apt-get install -y xfonts-base xfonts-75dpi + wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb + sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb + + - name: Install Liberation Fonts + run: | + sudo apt-get update -y + sudo apt-get install -y fonts-liberation + + - name: Run .py script for Doc Generation + run: | + cmake . -Bbuild + cmake --build build + + - name: Upload zipped doc file to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: build/PDF-Offline-Manual.zip + asset_name: PDF-Offline-Manual.zip + tag: Docs-Release + overwrite: true + body: "This is a release containing the PDF version of the Docs Generated using Github Actions." diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1e27df3..7caaab1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,6 +12,17 @@ jobs: - name: Checkout uses: actions/checkout@v2 + - name: Install wkthtmltopdf + run: | + sudo apt-get install -y xfonts-base xfonts-75dpi + wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb + sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb + + - name: Install Liberation Fonts + run: | + sudo apt-get update -y + sudo apt-get install -y fonts-liberation + - name: Run .py script for Doc Generation run: | cmake . -Bbuild @@ -21,7 +32,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: build/docs.zip + file: build/Generated-Offline-Manual.zip asset_name: Generated-Offline-Manual.zip tag: Docs-Release overwrite: true diff --git a/scripts/config.yml b/scripts/config.yml index 6f409fe..94db412 100644 --- a/scripts/config.yml +++ b/scripts/config.yml @@ -1,9 +1,17 @@ --- -url : 'https://en.wikibooks.org/wiki/OpenSCAD_User_Manual' -url_css : 'https://en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*' -url_wiki : 'https://en.wikibooks.org' -url_api : 'https://en.wikibooks.org/w/api.php?action=parse&format=xml&prop=text&page=' +url : https://en.wikibooks.org/wiki/OpenSCAD_User_Manual +url_css : https://en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&* +url_wiki : https://en.wikibooks.org +url_api : https://en.wikibooks.org/w/api.php?action=parse&format=xml&prop=text&page= -pages_for_exclusion : ['https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Example/Strandbeest'] +pages_for_exclusion : [https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Example/Strandbeest] -user_agent_val : 'Generator-for-Offline-Documentation (https://github.com/abshk-jr ; https://github.com/opencax/GSoC/issues/6 ; https://summerofcode.withgoogle.com/projects/#6746958066089984) urllib/3.9.0 [BeautifulSoup/4.9.0]' +user_agent_val : Generator-for-Offline-Documentation (https://github.com/abshk-jr ; https://github.com/opencax/GSoC/issues/6 ; https://summerofcode.withgoogle.com/projects/#6746958066089984) urllib/3.9.0 [BeautifulSoup/4.9.0] + +url_print : [https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Print_version,https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/The_OpenSCAD_Language] + +cheatsheet_url : https://openscad.org/cheatsheet/ + +options : + enable-local-file-access: null + --keep-relative-links: '' \ No newline at end of file diff --git a/scripts/offline-doc-generator.py b/scripts/offline-doc-generator.py index 23c91bc..3023deb 100644 --- a/scripts/offline-doc-generator.py +++ b/scripts/offline-doc-generator.py @@ -1,287 +1,356 @@ ''' -This is the program for Generator for offline documentation -more about which can be found out at https://github.com/opencax/GSoC/issues/6 -and the GSOC project details for the same are present at +This is the code for `Generator for offline documentation`, more +about which can be read at https://github.com/opencax/GSoC/issues/6 +and the GSOC project details for the same can be checked out at https://summerofcode.withgoogle.com/projects/#6746958066089984 ''' -import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, os, yaml +import urllib.request, urllib.parse, os, yaml from bs4 import BeautifulSoup as bs,Comment, Doctype import shutil +import pdfkit +import platform +if platform.system() == 'Linux': import cairosvg with open(os.path.join( os.path.dirname(__file__),'config.yml'),'r') as file: - config = yaml.safe_load(file) + config = yaml.safe_load(file) +#Update the global variables with the data from config.yml globals().update(config) -''' -The line above does the same work as the code given below ---------------------------------------------------------- -url = config['url'] -url_css = config['url_css'] -url_wiki = config['url_wiki'] -url_api = config['url_api'] -pages_for_exclusion = config['pages_for_exclusion'] -user_agent_val = config['user_agent_val'] ---------------------------------------------------------- -''' - dir_docs = 'openscad_docs' dir_imgs = os.path.join( dir_docs, 'imgs') dir_maths = os.path.join( dir_docs, 'imgs','maths') dir_styles = os.path.join( dir_docs, 'styles') -#Create the directories to save the doc if they don't exist +#Create the directories to save the documentation if not os.path.exists(dir_docs): os.makedirs(dir_docs) if not os.path.exists(dir_imgs): os.makedirs(dir_imgs) if not os.path.exists(dir_maths): os.makedirs(dir_maths) if not os.path.exists(dir_styles): os.makedirs(dir_styles) +dir_pdfs = 'openscad_docs_pdf' +if not os.path.exists(dir_pdfs): os.makedirs(dir_pdfs) +dir_docpdfs = 'docs_pdf' +if not os.path.exists(dir_docpdfs): os.makedirs(dir_docpdfs) +dir_pdfimgs = os.path.join( dir_pdfs, 'imgs') +if not os.path.exists(dir_pdfimgs): os.makedirs(dir_pdfimgs) +dir_pdfmaths = os.path.join( dir_pdfs, 'imgs', 'maths') +if not os.path.exists(dir_pdfmaths): os.makedirs(dir_pdfmaths) + pages =[] pages += pages_for_exclusion imgs =[] maths =[] -def getUrl(url): - ''' - This function generates the complete url after getting urls form src - /wiki/OpenSCAD_User_Manual get converted to https://en.wikibooks.org/wiki/OpenSCAD_User_Manual - - ''' - if url.startswith('//'): - url = 'https:'+url - elif not url.startswith( url_wiki ): - url = urllib.parse.urljoin( url_wiki, url[0]=="/" and url[1:] or url) - return url - -def getTags(soup): - ''' - This function handles the different tags present in the HTML document - for example the image tags - - ''' - for a in soup.find_all('a'): - href= a.get('href') - if href: - if href[0] != '#': - href = getUrl(href) - if (href.startswith('/wiki/OpenSCAD_User_Manual') or href.startswith(url_wiki + '/wiki/OpenSCAD_User_Manual')): - newhref = (href.replace('#', '.html#') if '#' in href else href+'.html').split('/')[-1] - - if 'Print_version.html' not in newhref: - getPages(url=href) - a['href']= newhref - - if a.img : - getImages( a ) - -def getMaths(soup): - ''' - This function generates the image version of the math formulas - to be displayed in various HTML files, for example - https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Mathematical_Operators - and saves them to the directory /openscad_docs/imgs/maths - - ''' - for img in soup.find_all('img'): - try: - for cls in img['class']: - if('math' in cls): - mathname = img['src'].split("/")[-1].split("\\")[-1] + '.svg' - savepath = os.path.join( dir_maths, mathname) - if (not mathname in maths): - opener = urllib.request.build_opener() - opener.addheaders = [('User-Agent',user_agent_val)] - urllib.request.install_opener(opener) - urllib.request.urlretrieve( img['src'] , savepath ) - maths.append( mathname ) - linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/') - img['src'] = linkurl - - except: - pass - -def getImages(tag): - ''' - This function generates the images present the in HTML documents - and saves them to the directory /openscad_docs/imgs - - ''' - src = getUrl( tag.img['src'] ) - imgname = src.split("/")[-1] - imgname = imgname.replace('%','_') - imgpath = os.path.join( dir_imgs, imgname) - - #The following is to download the image if it hasn't alrady been downloaded - if not imgpath in imgs: - opener = urllib.request.build_opener() - opener.addheaders = [('User-Agent',user_agent_val)] - urllib.request.install_opener(opener) - urllib.request.urlretrieve(src , imgpath) - imgs.append(imgpath) - - del tag.img['srcset'] - imgpath = os.path.join('.', 'imgs', imgname).replace('\\','/') - tag.img['src'] = imgpath - tag['href']= imgpath - -def cleanSoup(soup): - ''' - This function cleans the soup by removing the redundant HTML tags - and the parts that are unrelated to the User Manual - ''' - - #The following deletes the Tags which aren't required in the User Manual - red_div_cls = ["printfooter","catlinks","noprint","magnify"] - red_table_cls= ['noprint','ambox'] - red_input_cls= ['toctogglecheckbox'] - for cls in red_div_cls: - for tag in soup.findAll('div',{'class':cls}): - tag.decompose() - for cls in red_table_cls: - for tag in soup.findAll('table',{'class':cls}): - tag.decompose() - for cls in red_input_cls: - for tag in soup.findAll('input',{'class':cls}): - tag.decompose() - for tag in soup.findAll('style'): - tag.decompose() - - #The following removes the comments present in the HTML document - comments = soup.findAll(text=lambda text: isinstance(text, Comment)) - [comment.extract() for comment in comments] - - #The following replaces the redundant div Tags with the content present in inside of them - rep_div_cls = ["mw-highlight"] - for kls in rep_div_cls: - for tag in soup.findAll('div',kls): - tag.replaceWithChildren() - - #The following is for the cleaning of some redundant li tags - for _ in range(0,7): - for tag in soup.findAll('li',{'class':f'toclevel-{_}'}): - del tag['class'] - - #The following is for the cleaning/removal of some redundant span tags - for tag in soup.findAll('span'): - try: - if(len(tag.text)==0): - tag.decompose() - for cls in tag['class']: - if(len(cls) <= 2): - tag.replaceWithChildren() - if('mathml' in cls): - tag.decompose() - if cls in ['toctext']: - tag.replaceWithChildren() - if cls in ['mw-headline']: - del tag['class'] - if cls in ['mw-editsection','toctogglespan','noprint']: - tag.decompose() - - - except: - pass - - for tag in soup.findAll('ul'): - tag['style'] = 'list-style-image:none' +def getParsedUrl(url): + ''' + This function generates a parsed url after accepting the url from the src inside the tags + e.g. /wiki/OpenSCAD_User_Manual gets converted to https://en.wikibooks.org/wiki/OpenSCAD_User_Manual + + ''' + if url.startswith('//'): + url = 'https:'+url + elif not url.startswith( url_wiki ): + url = urllib.parse.urljoin( url_wiki, url[0]=="/" and url[1:] or url) + return urllib.parse.urlparse(url) + +def getTags(soup,pdf,cs=False): + ''' + This function handles the different tags present in the HTML document + e.x. updating the tags with the new links, or handling the tags + + ''' + for a in soup.find_all('a'): + href= a.get('href') + if href: + if href[0] != '#': + if cs: + href = href.replace('w/index.php?title=','wiki/') + hrefparse = getParsedUrl(href) + hrefurl=hrefparse.geturl() + if pdf: + a['href']= hrefurl + elif hrefparse.path.startswith('/wiki/OpenSCAD_User_Manual'): + newhref = (hrefurl.replace('#', '.html#') if '#' in hrefurl else hrefurl+'.html').split('/')[-1] + + if 'Print_version.html' not in newhref: + if not cs: + getPages(url=hrefurl) + a['href']= newhref + + if a.img : + getImages( a,pdf ) + +def getMaths(soup,pdf): + ''' + This function downloads the SVG files for the Math Formulas + that are being used on various pages, for example at + https://en.wikibooks.org/wiki/OpenSCAD_User_Manual/Mathematical_Operators + and saves them to the directory /openscad_docs/imgs/maths + + ''' + for img in soup.find_all('img'): + try: + for cls in img['class']: + if('math' in cls): + mathname = img['src'].split("/")[-1].split("\\")[-1] + '.svg' + savepath = os.path.join( dir_maths, mathname) if not pdf else os.path.join( dir_pdfmaths, mathname) + savepath_png = savepath.replace('.svg','.png') + if (not mathname in maths) or pdf: + opener = urllib.request.build_opener() + opener.addheaders = [('User-Agent',user_agent_val)] + urllib.request.install_opener(opener) + urllib.request.urlretrieve( img['src'] , savepath ) + if pdf and platform.system() == 'Linux': + ''' + This part of the code converts the SVGs to PNGs if the program is being run on Linux, + to overcome the issue where WebKit Engine renders the SVG images at incorrrect sizing + ''' + cairosvg.svg2png(url=savepath, write_to=savepath_png) + os.remove(savepath) + maths.append( mathname ) + if pdf and platform.system() == 'Linux': + linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/').replace('.svg','.png') + else: + linkurl = os.path.join('.','imgs/maths',mathname).replace('\\','/') + img['src'] = linkurl + + except: + pass + +def getImages(tag,pdf): + ''' + This function downloads the images present in the HTML files + and saves them to the directory - /openscad_docs/imgs + + ''' + srcparse = getParsedUrl( tag.img['src'] ) + imgname = srcparse.path.split("/")[-1] + imgname = imgname.replace('%','_') + imgpath = os.path.join( dir_imgs, imgname) if not pdf else os.path.join( dir_pdfimgs, imgname) + + #The following is to download the image if it hasn't alrady been downloaded + if not imgpath in imgs: + opener = urllib.request.build_opener() + opener.addheaders = [('User-Agent',user_agent_val)] + urllib.request.install_opener(opener) + urllib.request.urlretrieve(srcparse.geturl() , imgpath) + imgs.append(imgpath) + + del tag.img['srcset'] + imgpath = os.path.join('.', 'imgs', imgname).replace('\\','/') + tag.img['src'] = imgpath + tag['href']= imgpath + +def cleanSoup(soup,pdf): + ''' + This function cleans the HTML by removing the redundant tags + and the sections which are not necessary for the User Manual + ''' + + #The following deletes the Tags which aren't required in the User Manual + red_dict = {'div' : ["printfooter","catlinks","noprint","magnify"], 'table' : ['noprint'], 'input' : ['toctogglecheckbox']} + for tag,cls_list in red_dict.items(): + for cls in cls_list: + for tag in soup.findAll(tag,{'class':cls}): + tag.decompose() + + for tag in soup.findAll('table',{'class':'ambox'}): + tag.decompose() + + for tag in soup.findAll('style'): + tag.decompose() + + #The following removes the comments present in the HTML document + comments = soup.findAll(text=lambda text: isinstance(text, Comment)) + [comment.extract() for comment in comments] + + #The following replaces the redundant div Tags with the content present inside of them + rep_div_cls = ["mw-highlight"] + for kls in rep_div_cls: + for tag in soup.findAll('div',kls): + tag.replaceWithChildren() + + #The following removes the non-contributing classes in li tags + for _ in range(0,7): + for tag in soup.findAll('li',{'class':f'toclevel-{_}'}): + del tag['class'] + + #The following is for the removal/cleaning of some redundant span tags + for tag in soup.findAll('span'): + try: + if(len(tag.text)==0): + tag.decompose() + for cls in tag['class']: + if(len(cls) <= 2): + tag.replaceWithChildren() + elif cls in ['toctext'] or (pdf and cls in ['tocnumber']): + tag.replaceWithChildren() + elif cls in ['mw-headline']: + del tag['class'] + elif 'mathml' in cls or cls in ['mw-editsection','toctogglespan','noprint']: + tag.decompose() + + except: + pass + + #The following is to replace the tabs in the code blocks with spaces + for txt in soup.findAll('pre'): + txt.string = txt.text.replace('\t',' ') + if pdf: + if platform.system() == 'Linux': + for _ in soup.findAll('pre'): + _['style']="font-family:'Liberation Mono'" + + #The following unwraps the tables in the pdfs for a better formatting + if pdf: + for table in soup.findAll('table'): + for row in table.findAll('tr'): + for col in row.findAll('td'): + col.unwrap() + row.unwrap() + table.unwrap() + + for tag in soup.findAll('ul'): + tag['style'] = 'list-style-image:none' def getFooter( url, name ): - ''' - This function generates the Footer with the license attribution for all the pages + ''' + This function generates the Footer containing the necessary license attribution - ''' - footer = (f'''''') + ''' + footer = (f'''''') - return bs(footer,'html.parser') + return bs(footer,'html.parser') def getStyled(soup,title): - tag = Doctype('html') - soup.insert(0, tag) - soup.html['lang']='en' - meta_tag = soup.new_tag('meta') - meta_tag['charset'] = 'UTF-8' - soup.head.insert(0,meta_tag) - css_tag = bs('','html.parser') - soup.head.append(css_tag) - soup.body['class'] = 'mw-body' - soup.body['style']=['height:auto'] - del soup.body.div['class'] - soup.body.div['id']='bodyContent' - h1_tag = bs(f'

{title}

','html.parser') - soup.body.insert(0,h1_tag) - -def getPages( url=url,folder=dir_docs ): - ''' - This is the main function of the program - which generates the HTML document from the given url - and calls different functions to generate the Offline - version of the page and save it under the directory /openscad_docs - - ''' - url = getUrl(url) - if url.split("#")[0] not in pages: - pages.append( url.split("#")[0] ) #add the url to the `pages` list so that they don't get downloaded again - wiki_url = url - url = url.replace(url_wiki+'/wiki/', "") - url = url_api + url - - request = urllib.request.Request(url) - request.add_header('User-Agent',user_agent_val) - response = urllib.request.urlopen(request) - xml = response.read() - soup = bs(xml, 'lxml') - soup = soup.text - soup = bs(soup,'html5lib') - - name = url.split("=")[-1] - name = name.split("/")[-1].split('#')[0] #to convert OpenSCAD_User_Manual/String_Functions#str to String_Functions - - title = soup.new_tag("title") #to add title to the pages - title.string = name.replace("_" , " ") - soup.html.head.append(title) - - name = name + ".html" - filepath = os.path.join( folder, name) - - print("Saving: ", filepath) - - getStyled(soup,title.string) - cleanSoup(soup) - getMaths(soup) - getTags(soup) - - soup.body.append( getFooter( wiki_url, title.text )) - - open(filepath, "w", encoding="utf-8").write( str(soup) ) - - -def getCSS(): - ''' - This function runs once after the HTML files have been downloaded - and downloads the CSS given at https://www.mediawiki.org/wiki/API:Styling_content - and saves it to openscad_docs/styles - - ''' - request = urllib.request.Request(url_css) - request.add_header('User-Agent',user_agent_val) - response = urllib.request.urlopen(request) - css_soup = response.read() - css = bs(css_soup, 'html5lib') - csspath = os.path.join( dir_styles, 'style.css') - open( csspath, "w" , encoding="utf-8").write(css.body.text) - - - + tag = Doctype('html') + soup.insert(0, tag) + soup.html['lang']='en' + meta_tag = soup.new_tag('meta') + meta_tag['charset'] = 'UTF-8' + soup.head.insert(0,meta_tag) + css_tag = bs('','html.parser') + soup.head.append(css_tag) + soup.body['class'] = 'mw-body' + soup.body['style']=['height:auto;background-color:#ffffff'] + del soup.body.div['class'] + soup.body.div['id']='bodyContent' + h1_tag = bs(f'

{title}

','html.parser') + soup.body.insert(0,h1_tag) + +def getPages( url=url,folder=dir_docs,pdf=False ): + ''' + This is the main function of the program + which downloads the webpage at the given url + and calls different functions to generate the Offline + version of the page and save it under the directory /openscad_docs + + ''' + if url.split("#")[0] not in pages or pdf: + pages.append( url.split("#")[0] ) #adds the url to the `pages` list so that the page doesn't get downloaded again + wiki_url = url + url = url.replace(url_wiki+'/wiki/', "") + url = url_api + url + + request = urllib.request.Request(url) + request.add_header('User-Agent',user_agent_val) + response = urllib.request.urlopen(request) + xml = response.read() + soup = bs(xml, 'html.parser') + soup = soup.text + soup = bs(soup,'html5lib') + + name = url.split("=")[-1] + name = name.split("/")[-1].split('#')[0] #converts OpenSCAD_User_Manual/String_Functions#str to String_Functions + + if pdf==True: name = 'OpenSCAD_User_Manual' if (name == 'Print_version') else name + + title = soup.new_tag("title") #adds title to the pages + title.string = name.replace("_" , " ") + soup.html.head.append(title) + + name = name + ".html" + filepath = os.path.join( folder, name) + + print("Saving: ", filepath) + + getStyled(soup,title.string) + cleanSoup(soup,pdf) + getMaths(soup,pdf) + getTags(soup,pdf,False) + + soup.body.append( getFooter( wiki_url, title.text )) + + open(filepath, "w", encoding="utf-8").write( str(soup) ) + + +def getCSS(url = url_css, css_name = 'style.css'): + ''' + This function runs once after the HTML files have been downloaded + and downloads the CSS given at https://www.mediawiki.org/wiki/API:Styling_content + and saves it to openscad_docs/styles + + ''' + request = urllib.request.Request(url) + request.add_header('User-Agent',user_agent_val) + response = urllib.request.urlopen(request) + css_soup = response.read() + css = bs(css_soup, 'html5lib') + csspath = os.path.join( dir_styles, css_name) + open( csspath, "w" , encoding="utf-8").write(css.body.text) + +def getPdf(): + for link in url_print: + getPages(link,folder=dir_pdfs,pdf=True) + if os.path.exists(f'{os.path.join( os.getcwd(), dir_pdfs)}/styles'):shutil.rmtree(f'{os.path.join( os.getcwd(), dir_pdfs)}/styles') + shutil.copytree(f'{os.path.join( os.getcwd(), dir_docs)}/styles', f'{os.path.join( os.getcwd(), dir_pdfs)}/styles') + +def cheatSheet(): + ''' + This function is run once to download the Cheat Sheet from + https://openscad.org/cheatsheet/ and the WikiBooks links + are changed to now redirect to the Manual saved offline + + ''' + request = urllib.request.Request(cheatsheet_url) + response = urllib.request.urlopen(request) + soup = response.read() + soup = bs(soup,'lxml') + for css in soup.find_all("link",href=True): + css_name = css.attrs.get("href") + url_css = urllib.parse.urljoin(cheatsheet_url, css_name) + if '.css' in url_css: + getCSS(url_css,css_name.split('/')[-1]) + css['href'] = css['href'].replace('css/','styles/') + getTags(soup,False,True) + filepath = os.path.join( dir_docs , 'CheatSheet.html') + open(filepath, "w", encoding="utf-8").write( str(soup) ) + + + if(__name__ == '__main__'): - print(f'Started Offline Generator.py\nNow downloading the User-Manual from {url}') - getPages(url) - getCSS() - print("Total number of pages generated is \t:\t", len(pages)-len(pages_for_exclusion)) - print("Total number of images generated is \t:\t", len(imgs)) - print("Total number of math-images generated is:\t", len(maths)) - shutil.make_archive('docs', 'zip', dir_docs) + print(f'Started Offline Generator.py\nNow downloading the User-Manual from {url}') + getPages(url) + getCSS() + print("Total number of pages generated is \t:\t", len(pages)-len(pages_for_exclusion)) + print("Total number of images generated is \t:\t", len(imgs)) + print("Total number of math-images generated is:\t", len(maths)) + cheatSheet() + shutil.make_archive('Generated-Offline-Manual', 'zip', dir_docs) + + getPdf() + files=os.listdir(os.path.join( os.getcwd(), dir_pdfs)) + for file in files: + if ".html" in file: + file_pdf = file.replace('.html','.pdf') + pdfkit.from_file(f'{os.path.join( os.getcwd(), dir_pdfs)}/{file}', f'{os.path.join( os.getcwd(), dir_docpdfs)}/{file_pdf}' , options=options) + + shutil.make_archive('PDF-Offline-Manual', 'zip', dir_docpdfs) + shutil.rmtree(dir_pdfs) + shutil.rmtree(dir_docpdfs) \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 71e0fe0..58f6738 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,4 +1,6 @@ beautifulsoup4==4.9.3 PyYAML==5.4.1 lxml==4.6.3 -html5lib==1.1 \ No newline at end of file +html5lib==1.1 +pdfkit==0.6.1 +CairoSVG==2.5.2 \ No newline at end of file