# download
https://github.com/tesseract-ocr/tesseract/wiki/Downloads
或
yum install tesseract
# 依赖
yum install libpng12.so.0
ln -s /usr/lib/libpng12.so.0 /usr/lib64/libpng12.so.0
sudo ldconfig
# 语言包,解压至安装路径tessdata目录
# linux路径在:/usr/share/tesseract/tessdata 或 /usr/local/share/tesseract/tessdata
https://github.com/tesseract-ocr/tessdata
# windows环境变量PATH和系统变量Path
# 安装地址:C:\Program Files (x86)\Tesseract-OCR
# 添加系统变量:TESSDATA_PREFIX
TESSDATA_PREFIX=C:\Program Files (x86)\Tesseract-OCR
# linux
export TESSDATA_PREFIX=/usr/share/tesseract/tessdata
// 识别
public static String recognizeText(BufferedImage bufferedImage) throws Exception {
String path = "/tmp/ocr/" + UUID.randomUUID() + ".png"; //图片缓存路径
File imageFile = ImageFileUtil.saveImage(bufferedImage, path);
if (imageFile == null) {
return null;
}
List<String> cmd = new ArrayList<String>();
cmd.add("tesseract");
cmd.add(imageFile.getName());
cmd.add(imageFile.getName());
cmd.add("-l");
cmd.add("chi_sim");
cmd.add("eng");
ProcessBuilder pb = new ProcessBuilder();
pb.directory(imageFile.getParentFile());
pb.command(cmd);
pb.redirectErrorStream(true);
Process process = pb.start();
int ret = process.waitFor();
if (ret == 0) {
StringBuffer strB = new StringBuffer();
String txtFilePath = imageFile.getPath() + ".txt";
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(txtFilePath), "UTF-8"));
String str;
while ((str = in.readLine()) != null) {
strB.append(str).append(EOL);
}
in.close();
new File(txtFilePath).delete();
imageFile.delete();
return strB.toString();
} else {
throw new RuntimeException("处理code异常:" + ret);
}
}
// ImageFileUtil
public static File saveImage(BufferedImage bufferedImage, String path){
File file = new File(path);
try {
ImageIO.write(bufferedImage, DEFAULT_CON, file);
} catch (IOException e) {
e.printStackTrace();
return null;
}
return file;
}