PDFBox是Apache下的一个操作pdf的类库。其也提供了一个命令行的工具,也提供了java调用的第三方类库。
下载地址:https://pdfbox.apache.org/
下面的实验基于JDK8+pdfbox-2.0.13.jar+pdfbox-app-2.0.13.jar(命令行工具库)
1.命令行使用
文档参考:https://pdfbox.apache.org/2.0/commandline.html
命令行工具可以提取pdf中的图片、文本,合并pdf与拆分pdf,pdf转换为图片等操作。
1.提取图片
java -jar pdfbox-app-2.0.13.jar ExtractImages ./1.pdf
会在同文件夹下提取出pdf中的图片。
2.提取文字
java -jar pdfbox-app-2.0.13.jar ExtractText ./1.pdf ./text.txt
当然还可以指定起始页号等参数。
3.pdf转换为图片
java -jar pdfbox-app-2.0.13.jar PDFToImage ./1.pdf
结果每页生成一张图片。当然可以指定起始页号、生产图片的名称前缀等信息。默认生成的图片命名如下:(pdfname+page)
还有许多命令行操作可以参考官网的文档,对于参数都有详细的解释。这种方式可以封装为工具类用Runtime多线程执行操作pdf。
2.Java中作为类库使用
感觉这个工具还不是太方便,写入pdf表格等直接操作pdf的可以考虑用itext代替。这个就当做命令行工具实现pdf转换图片、提取文字等操作就可以了。关于itext的使用参考:https://www.cnblogs.com/qlqwjy/p/8213989.html
依赖的jar包:
commons-logging-1.0.4.jar、pdfbox-2.0.13.jar、fontbox-2.0.13.jar
packagecn.qlq; importjava.io.File; importjava.io.IOException; importjava.util.List; importorg.apache.pdfbox.pdmodel.PDDocument; importorg.apache.pdfbox.pdmodel.PDPage; importorg.apache.pdfbox.pdmodel.PDPageContentStream; importorg.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; importorg.apache.pdfbox.pdmodel.encryption.AccessPermission; importorg.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; importorg.apache.pdfbox.pdmodel.font.PDFont; importorg.apache.pdfbox.pdmodel.font.PDType1Font; importorg.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; importorg.apache.pdfbox.pdmodel.interactive.action.PDAction; importorg.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript; importorg.apache.pdfbox.pdmodel.interactive.action.PDActionURI; importorg.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; importorg.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink; importorg.apache.pdfbox.text.PDFTextStripper; public classPDFBoxTest { public static void main(String[] args) throwsIOException { ReplaceURLs(); } /** * 替换pdf中的连接的url跳转地址,但是显示的不会变,只是修改点击跳转的地址 * * @throwsInvalidPasswordException * @throwsIOException */ public static void ReplaceURLs() throwsInvalidPasswordException, IOException { PDDocument doc = null; try{ doc = PDDocument.load(new File("G:/qjl.pdf")); int pageNum = 0; for(PDPage page : doc.getPages()) { pageNum++; List<PDAnnotation> annotations =page.getAnnotations(); for(PDAnnotation annotation : annotations) { PDAnnotation annot =annotation; if (annot instanceofPDAnnotationLink) { PDAnnotationLink link =(PDAnnotationLink) annot; PDAction action =link.getAction(); if (action instanceofPDActionURI) { PDActionURI uri =(PDActionURI) action; String oldURI =uri.getURI(); String newURI = "http://pdfbox.apache.org"; System.out.println("Page " + pageNum + ": Replacing " + oldURI + " with " +newURI); uri.setURI(newURI); } } } doc.save("G:/qjl_1.pdf"); } } finally{ if (doc != null) { doc.close(); } } } /** * 删除PDF里面的指定页 * * @throwsIOException * @throwsInvalidPasswordException */ public static void removePdfPage() throwsIOException, InvalidPasswordException { try (PDDocument document = PDDocument.load(new File("G:/1.pdf"))) { if(document.isEncrypted()) { throw new IOException("Encrypted documents are not supported for this example"); } if (document.getNumberOfPages() <= 1) { throw newIOException( "Error: A PDF document must have at least one page, " + "cannot remove the last page!"); } document.removePage(1);//删除第2页 System.out.println("ssssssssssssss"); document.save("G:/1_1.pdf"); } catch(Exception e) { e.printStackTrace(); } } /** * PDF中增加图片 * * @throwsIOException * @throwsInvalidPasswordException */ public static void addImg2PDF() throwsIOException, InvalidPasswordException { try (PDDocument doc = PDDocument.load(new File("G:/blank.pdf"))) { PDPage page = doc.getPage(0); PDImageXObject pdImage = PDImageXObject.createFromFile("G:/0101.jpg", doc); try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true)) { //contentStream.drawImage(ximage, 20, 20 ); //better method inspired by // http://stackoverflow.com/a/22318681/535646 //reduce this value if the image is too large float scale =1f; contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() *scale); } doc.save("G:/blank.pdf"); } } /** * PDF文件中增加JS脚本 * * @throwsIOException * @throwsInvalidPasswordException */ public static void addJavaScript2PDF() throwsIOException, InvalidPasswordException { try (PDDocument document = PDDocument.load(new File("G:/blank.pdf"))) { PDActionJavaScript javascript = newPDActionJavaScript( "app.alert( {cMsg: 'PDFBox rocks!', nIcon: 3, nType: 0, cTitle: 'PDFBox Javascript example' } );"); document.getDocumentCatalog().setOpenAction(javascript); if(document.isEncrypted()) { throw new IOException("Encrypted documents are not supported for this example"); } document.save("G:/blank.pdf"); } } /** * 阅读PDF文件的内容,支持阅读中文(如果需要阅读指定页面的PDF自己改写此方法) * * @throwsIOException * @throwsInvalidPasswordException */ public static void readPDFText() throwsIOException, InvalidPasswordException { try (PDDocument document = PDDocument.load(new File("G:/jl.pdf"))) { AccessPermission ap =document.getCurrentAccessPermission(); if (!ap.canExtractContent()) { throw new IOException("You do not have permission to extract text"); } PDFTextStripper stripper = newPDFTextStripper(); stripper.setSortByPosition(true); for (int p = 1; p <= document.getNumberOfPages(); ++p) { stripper.setStartPage(p); stripper.setEndPage(p); String text =stripper.getText(document); String pageStr = String.format("page %d:", p); System.out.println(pageStr); for (int i = 0; i < pageStr.length(); ++i) { System.out.print("-"); } System.out.println(); System.out.println(text.trim()); System.out.println(); } } } /** * 创建空的PDF文件并且添加一个空白页,多页可以插入多个PDPage * * @throwsIOException */ public static void createBlankPDF() throwsIOException { String filename = "G:/blank.pdf"; try (PDDocument doc = newPDDocument()) { //a valid PDF document requires at least one page PDPage blankPage = newPDPage(); doc.addPage(blankPage); doc.save(filename); } } /** * 图片转换为PDF文件 * * @throwsIOException */ public static void img2PDF() throwsIOException { String imagePath = "G:/0101.jpg"; String pdfPath = "G:/0101.pdf"; try (PDDocument doc = newPDDocument()) { PDPage page = newPDPage(); doc.addPage(page); PDImageXObject pdImage =PDImageXObject.createFromFile(imagePath, doc); try (PDPageContentStream contents = newPDPageContentStream(doc, page)) { contents.drawImage(pdImage, 20, 20); } doc.save(pdfPath); } } /** * 创建PDF文件并写入内容(暂时不支持写入中文) * * @throwsIOException */ public static void createPDF() throwsIOException { String filename = "G:/Test.pdf"; String message = "pdf";//字体不能有中文,暂时不支持中文 try (PDDocument doc = newPDDocument()) { PDPage page = newPDPage(); doc.addPage(page); PDFont font =PDType1Font.HELVETICA_BOLD; try (PDPageContentStream contents = newPDPageContentStream(doc, page)) { contents.beginText(); contents.setFont(font, 12); contents.newLineAtOffset(100, 700); contents.showText(message); contents.endText(); } doc.save(filename); } } }