JAVA pdf转图片 图片ocr文字

  • Post author:
  • Post category:java


PDF文件 转图片

之前用可很多其他的 都不好用 很多内容根本读不到

pom.xml

 		<dependency>
            <groupId>org.icepdf.os</groupId>
            <artifactId>icepdf-core</artifactId>
            <version>6.2.2</version>
            <exclusions>  
                <exclusion>  
                    <groupId>javax.media</groupId>  
                    <artifactId>jai_core</artifactId>  
                </exclusion>
            </exclusions>
        </dependency>
	public static void pdfToImageFile(String filePath) throws Exception {
		//百度ocrkey 
		String appkey = "mS";
		String secret = "mM";
		JSONObject returndata = RequestUtil.getRequest("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id="
				 +appkey+"&client_secret="+secret);
		String token = returndata.getString("access_token");
		 System.out.println(token);
		String name = "backimg";
		Document document = new Document();
		document.setFile(filePath);
		float scale = 2.5f;// 缩放比例
		float rotation = 0f;// 旋转角度
		System.out.println("正在转换...");
		File dirFile = new File("E:/" + name);
		if (!dirFile.exists()) {
			dirFile.mkdir();
		} 
		for (int i = 0; i < document.getNumberOfPages(); i++) {
			BufferedImage image = (BufferedImage) document.getPageImage(i,
					GraphicsRenderingHints.SCREEN,
					org.icepdf.core.pobjects.Page.BOUNDARY_CROPBOX, rotation,
					scale);
			RenderedImage rendImage = image;
			try {
				//图片放到了这个目录下
				File file = new File("E:/" + name + "/" + name + "_" + (i + 1)
						+ ".png");
				ImageIO.write(rendImage, "png", file);
				//这个时候去转 文字
				//下面的方法百度api文档有提供 自行去看
				    /**
				    * 重要提示代码中所需工具类
				    * FileUtil,Base64Util,HttpUtil,GsonUtils请从
				    * https://ai.baidu.com/file/658A35ABAB2D404FBF903F64D47C1F72
				    * https://ai.baidu.com/file/C8D81F3301E24D2892968F09AE1AD6E2
				    * https://ai.baidu.com/file/544D677F5D4E4F17B4122FBD60DB82B3
				    * https://ai.baidu.com/file/470B3ACCA3FE43788B5A963BF0B625F3
				    * 下载
				    */
				byte[] imgData = FileUtil.readFileByBytes("E:/" + name + "/" + name + "_" + (i + 1)
						+ ".png");
				 String imgStr = Base64Util.encode(imgData);
				 String imgParam = URLEncoder.encode(imgStr, "UTF-8");
				 String param = "image=" + imgParam;
				 String result = RequestUtil.post("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic", token, param);
				 System.out.println(result);
				
				
			} catch (IOException e) {
				e.printStackTrace();
			}
			image.flush();
		}
		document.dispose();
		System.out.println("finish");
    }



版权声明:本文为weixin_43829933原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。