Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gstsgy/html2pdf

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

html2pdf

html转pdf 以 开源世界旅行手册 这本书为例

1.第一步 从该网站爬取所有html网页

public class DownBook {

	public static void main(String[] args) {
		// TODO Auto-generated method stub

		String dirPath = "./html";
        String item = "index.html";
        while(item!=null&&item!=""){
            try {
             String html=   Util.downloadPage("https://i.linuxtoy.org/docs/guide/" + item, dirPath, item);
             item = getUrl(html);
            } catch (Exception ex) {

            }
        }
	}

	 static String getUrl(String text) {

	        Pattern pattern = Pattern.compile("<a accesskey=\"n\" href=\"([-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|])\">下一页</a>");
	        Matcher m = pattern.matcher(text);
	        String str = "";
	        if (m.find()) {
	            str = m.group(1);
	        }
	        return str;

	    }
}

2.第二步,把html转换成pdf 这里用的是wkhtmltopdf软件,百度该软件下载安装

public class ToPDF {
	public static void main(String[] args) {
		File rootPath = new File("./html");
		String pdfPath = "./pdf//";

		File[] files = rootPath.listFiles();
		Arrays.sort(files, new Comparator<File>() {
			public int compare(File f1, File f2) {
				long diff = f1.lastModified() - f2.lastModified();
				if (diff > 0)
					return 1;
				else if (diff == 0)
					return 0;
				else
					return -1;// 如果 if 中修改为 返回-1 同时此处修改为返回 1 排序就会是递减
			}

			public boolean equals(Object obj) {
				return true;
			}

		});

		for (File f : files) {
			if (f.isFile() && f.getName().indexOf("html") != -1) {
				String fileName = f.getName();
				String pdfName = fileName.replace("html", "pdf");
				Util.convert("./html//" + fileName, pdfPath + pdfName);
				// Thread.sleep(500);
			}

		}

	}
}

3.将多个pdf合并成一个pdf文件

public class PDFCombine {

	public static void main(String[] args) {
		// TODO Auto-generated method stub

		File rootPath = new File("./pdf");
        File[] files = rootPath.listFiles();
        Arrays.sort(files, new Comparator<File>() {
            public int compare(File f1, File f2) {
                long diff = f1.lastModified() - f2.lastModified();
                if (diff > 0)
                    return 1;
                else if (diff == 0)
                    return 0;
                else
                    return -1;//如果 if 中修改为 返回-1 同时此处修改为返回 1  排序就会是递减
            }

            public boolean equals(Object obj) {
                return true;
            }

        });
        
        
        try{
            File f =Util. mulFile2One(Arrays.asList(files), "./开源世界旅行手册.pdf");
            System.out.println(f.length());
        }
        catch (Exception ex){

        }
	}

}

所用到的Util类

public class Util {
	public static String downloadPage(String u, String mkd,String title1) throws UnsupportedEncodingException, IOException {
        URL url = new URL(u);
        String uri = url.getHost();
        System.out.println("URI:" + uri);
        Document doc = Jsoup.connect(u).get();
        Elements ele = doc.getElementsByTag("title");
        String title = ele.text();
        System.out.println("url:" + u);
        System.out.println("网页标题:" + title);
        File mk = new File(mkd);
        if (mk.exists()) {
            System.out.println("目录" + mk + "已经存在");
        } else {
            mk.mkdir();
            System.out.println("创建目录" + mk + "成功");
        }
        System.out.println("MK:" + mk);
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(), "utf8"));
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(mk + "/" + title1)), "utf8"));
            String line;
            while ((line = br.readLine()) != null) {
                bw.write(line);
            }
            br.close();
            bw.close();
            System.out.println("下载完毕!");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
       return doc.outerHtml();
    }

    private static final String toPdfTool = "D:\\Program Files\\Wkhtmltopdf\\bin\\wkhtmltopdf.exe";

    /**
     * html转pdf
     * @param srcPath html路径,可以是硬盘上的路径,也可以是网络路径
     * @param destPath pdf保存路径
     * @return 转换成功返回true
     */
    public static boolean convert(String srcPath, String destPath){
        File file = new File(destPath);
        File parent = file.getParentFile();
        //如果pdf保存路径不存在,则创建路径
        if(!parent.exists()){
            parent.mkdirs();
        }

        StringBuilder cmd = new StringBuilder();
        cmd.append(toPdfTool);
        cmd.append(" ");
        cmd.append("  --header-line");//页眉下面的线
        cmd.append("  --header-center 这里是页眉这里是页眉这里是页眉这里是页眉 ");//页眉中间内容
        //cmd.append("  --margin-top 30mm ");//设置页面上边距 (default 10mm)
        cmd.append(" --header-spacing 10 ");//    (设置页眉和内容的距离,默认0)
        cmd.append(srcPath);
        cmd.append(" ");
        cmd.append(destPath);

        boolean result = true;
        try{
            Process proc = Runtime.getRuntime().exec(cmd.toString());
            HtmlToPdfInterceptor error = new Util().new HtmlToPdfInterceptor(proc.getErrorStream());
            HtmlToPdfInterceptor output =new Util(). new HtmlToPdfInterceptor(proc.getInputStream());
            error.start();
            output.start();
            proc.waitFor();
        }catch(Exception e){
            result = false;
            e.printStackTrace();
        }

        return result;
    }


    public static File mulFile2One(List<File> files, String targetPath) throws IOException{
        // pdf合并工具类
         PDFMergerUtility mergePdf = new PDFMergerUtility();
         for (File f : files) {
             if(f.exists() && f.isFile()){
                 // 循环添加要合并的pdf
                 mergePdf.addSource(f);
             }
         }        // 设置合并生成pdf文件名称
        mergePdf.setDestinationFileName(targetPath);        // 合并pdf
         mergePdf.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly());
         return new File(targetPath);    
         }

    
    
    public class HtmlToPdfInterceptor extends Thread {
        private InputStream is;

        public HtmlToPdfInterceptor(InputStream is){
            this.is = is;
        }

        public void run(){
            try{
                InputStreamReader isr = new InputStreamReader(is, "utf-8");
                BufferedReader br = new BufferedReader(isr);
                String line = null;
                while ((line = br.readLine()) != null) {
                    System.out.println(line.toString()); //输出内容
                }
            }catch (IOException e){
                e.printStackTrace();
            }
        }
    }
}

About

html转pdf

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages