tesseract-ocr

  01 May 2018


tesseract-ocr
# download
https://github.com/tesseract-ocr/tesseract/wiki/Downloads
或
yum install tesseract

# 依赖
yum install libpng12.so.0
ln -s /usr/lib/libpng12.so.0 /usr/lib64/libpng12.so.0
sudo ldconfig

# 语言包,解压至安装路径tessdata目录
# linux路径在:/usr/share/tesseract/tessdata  或 /usr/local/share/tesseract/tessdata
https://github.com/tesseract-ocr/tessdata

环境变量
# windows环境变量PATH和系统变量Path
# 安装地址:C:\Program Files (x86)\Tesseract-OCR
# 添加系统变量:TESSDATA_PREFIX
TESSDATA_PREFIX=C:\Program Files (x86)\Tesseract-OCR

# linux
export TESSDATA_PREFIX=/usr/share/tesseract/tessdata                                                                                                                                                                                        

// 识别
public static String recognizeText(BufferedImage bufferedImage) throws Exception {

		String path = "/tmp/ocr/" + UUID.randomUUID() + ".png"; //图片缓存路径

		File imageFile = ImageFileUtil.saveImage(bufferedImage, path);
		if (imageFile == null) {
			return null;
		}
		List<String> cmd = new ArrayList<String>();
		cmd.add("tesseract");
		cmd.add(imageFile.getName());
		cmd.add(imageFile.getName());
		cmd.add("-l");
		cmd.add("chi_sim");
		cmd.add("eng");
		ProcessBuilder pb = new ProcessBuilder();
		pb.directory(imageFile.getParentFile());

		pb.command(cmd);
		pb.redirectErrorStream(true);
		Process process = pb.start();

		int ret = process.waitFor();

		if (ret == 0) {
			StringBuffer strB = new StringBuffer();
			String txtFilePath = imageFile.getPath() + ".txt";
			BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(txtFilePath), "UTF-8"));
			String str;
			while ((str = in.readLine()) != null) {
				strB.append(str).append(EOL);
			}
			in.close();
			new File(txtFilePath).delete();
			imageFile.delete();
			return strB.toString();
		} else {
			throw new RuntimeException("处理code异常:" + ret);
		}
}
// ImageFileUtil
public static File saveImage(BufferedImage bufferedImage, String path){
        File file = new File(path);
        try {
            ImageIO.write(bufferedImage, DEFAULT_CON, file);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
        return file;
}