zxing+java 发票二维码识别(包含区块链发票二维码识别)
依赖引入
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.0</version>
</dependency>
<!-- 二维码识别 zxing库-->
<dependency>
<groupId>com.google.zxing</groupId>
<artifactId>core</artifactId>
<version>3.5.2</version>
</dependency>
<dependency>
<groupId>com.google.zxing</groupId>
<artifactId>javase</artifactId>
<version>3.5.2</version>
</dependency>
结果处理类
1、结果处理类HandleResult
/** * 承接处理结果 */
public class HandleResult {
//二维码解析出的信息
private String qcodeInfo;
//处理结果
private String handleResult;
//是否处理成功 默认成功
private boolean handleSuccess = true;
public HandleResult() {
}
public HandleResult(String qcodeInfo, String handleResult) {
this.qcodeInfo = qcodeInfo;
this.handleResult = handleResult;
}
public String getHandleResult() {
return handleResult;
}
public void setHandleResult(String handleResult) {
this.handleResult = handleResult;
}
public String getQcodeInfo() {
return qcodeInfo;
}
public void setQcodeInfo(String qcodeInfo) {
this.qcodeInfo = qcodeInfo;
}
public boolean isHandleSuccess() {
return handleSuccess;
}
public void setHandleSuccess(boolean handleSuccess) {
this.handleSuccess = handleSuccess;
}
}
2、ResultMsgTemplate 发票处理结果类
//发票类型
private String invoiceType;
private String invoiceCode;
private String invoiceNo;
private String amount;
private String createDate;
private Map<String,String> extraMsg = new HashMap<>(0);
public String getInvoiceType() {
return invoiceType;
}
public void setInvoiceType(String invoiceType) {
this.invoiceType = invoiceType;
}
public String getInvoiceCode() {
return invoiceCode;
}
public void setInvoiceCode(String invoiceCode) {
this.invoiceCode = invoiceCode;
}
public String getInvoiceNo() {
return invoiceNo;
}
public void setInvoiceNo(String invoiceNo) {
this.invoiceNo = invoiceNo;
}
public String getAmount() {
return amount;
}
public void setAmount(String amount) {
this.amount = amount;
}
public String getCreateDate() {
return createDate;
}
public void setCreateDate(String createDate) {
this.createDate = createDate;
}
public Map<String, String> getExtraMsg() {
return extraMsg;
}
public void setExtraMsg(Map<String, String> extraMsg) {
this.extraMsg = extraMsg;
}
@Override
public String toString() {
StringBuilder stringBuilder = new StringBuilder();
for(Map.Entry<String, String> entry:extraMsg.entrySet()){
stringBuilder.append(",").append(entry.getKey()).append(":").append("\"").append(entry.getValue()).append("\"");
}
return "{" +
"\"发票类型\":\"" + invoiceType + "\""+
",\"发票编码\":\"" + invoiceCode +"\""+
", \"发票号\":\"" + invoiceNo + "\""+
", \"不含税金额\":\"" + amount + "\""+
", \"开票日期\":\"" + createDate + "\""+
stringBuilder.toString()+
"}";
}
发票处理接口
1、InfoHandler接口
public interface InfoHandler {
//确定给定的信息是否能被处理
boolean canHandle(String info);
//处理给定的信息
HandleResult handleInfo(String info);
}
2、普通增值税发票解析类 NormalInvoiceInfoHandler
此处需要对解析好的发票树族值有一定的理解,如果有疑问,可以查阅此篇文章
增值税发票解析说明
public class NormalInvoiceInfoHandler implements InfoHandler{
@Override
public boolean canHandle(String info) {
boolean flag = false;
String[] sequences= info.split(",");
//发票序列应该大于7小于10前两位长度都是2
if(sequences.length>=7&&sequences.length<=10){
flag = sequences[0].equals("01")&&"01,04,10,32".contains(sequences[1]);
}
return flag;
}
@Override
public HandleResult handleInfo(String info) {
ResultMsgTemplate resultMsgTemplate = new ResultMsgTemplate();
HandleResult handleResult = new HandleResult();
String[] sequences= info.split(",");
switch (sequences[1]){
case "01":
resultMsgTemplate.setInvoiceType("增值税专用发票");break;
case "04":
resultMsgTemplate.setInvoiceType("增值税普通发票");break;
case "10":
resultMsgTemplate.setInvoiceType("增值税电子普通发票");break;
case "32":
resultMsgTemplate.setInvoiceType("电子普通发票");break;
}
resultMsgTemplate.setInvoiceCode(sequences[2]);
resultMsgTemplate.setInvoiceNo(sequences[3]);
resultMsgTemplate.setAmount(sequences[4]);
resultMsgTemplate.setCreateDate( formatDateStr(sequences[5]));
handleResult.setQcodeInfo(info);
handleResult.setHandleResult(resultMsgTemplate.toString());
return handleResult ;
}
private String formatDateStr(String str){
if(str.length() >=8){
return str.substring(0,4)+"-"+str.substring(4,6)+"-"+str.substring(6,8);
}else if (str.length() == 6){
return str.substring(0,4)+"-"+str.substring(4,6);
}else {
return str;
}
}
3、车票处理类TrainInvoiceInfoHandler
public class TrainInvoiceInfoHandler implements InfoHandler {
private final static Pattern pattern = Pattern.compile("/^\\d+$/");
private final static String infoSeparator = ",";
private final static String invoiceType = "纯数字识别(车票等)";
@Override
public boolean canHandle(String info) {
if (StringUtil.isNotEmpty(info)) {
Matcher matcher = pattern.matcher(info);
return matcher.matches();
}
return false;
}
@Override
public HandleResult handleInfo(String info) {
HandleResult handleResult = new HandleResult();
handleResult.setQcodeInfo(info);
handleResult.setHandleResult("解析为纯数字" + info);
return handleResult;
}
}
4、区块链发票处理类UrlInvoiceInfoHandler
通常以深圳发票为例,深圳发票可能解析为两种情况
public class UrlInvoiceInfoHandler implements InfoHandler{
private final static Logger logger = LoggerFactory.getLogger(UrlInvoiceInfoHandler.class);
private final static String INVOICE_HOST = "bcfp.shenzhen.chinatax.gov.cn";
private final static String QUERY_STR = "/dzswj/bers_ep_web/query_bill_detail";
@Override
public boolean canHandle(String info) {
return info.contains(INVOICE_HOST);
}
@Override
public HandleResult handleInfo(String info) {
HandleResult handleResult = new HandleResult();
RestTemplate restTemplate = new RestTemplate();
restTemplate.getMessageConverters().set(1, new StringHttpMessageConverter(StandardCharsets.UTF_8));
URI uri = null;
try {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
// headers.setContentType(MediaType.TEXT_PLAIN);
uri = new URI(info);
Map<String, String> params = UriParamsInfoHandler.parseUrlParameters(uri.getQuery()) /*UrlUtils.parseQueryString(uri.getQuery())*/;
String url = uri.getScheme() + "://" + uri.getHost() + QUERY_STR;
System.out.println(url);
JSONObject jsonObject = new JSONObject();
jsonObject.putAll(params);
System.out.println(jsonObject.toString());
jsonObject.put("tx_hash", jsonObject.get("hash"));
HttpEntity<String> httpEntity = new HttpEntity<>(jsonObject.toString(), headers);
ResponseEntity<String> responseEntity = restTemplate.exchange(url, HttpMethod.POST, httpEntity, String.class);
Map resJson = JSONObject.fromObject(responseEntity.getBody());
if (resJson.get("retcode") != null && "0".equals(resJson.get("retcode").toString()) && "success".equalsIgnoreCase((String) resJson.get("retmsg")) && (resJson.get("bill_record") instanceof Map)) {
handleResult.setHandleResult(createResultMsgTemlateByJson((Map) resJson.get("bill_record")).toString());
handleResult.setQcodeInfo((String) ((Map) resJson.get("bill_record")).get("tx_hash"));
} else {
handleResult.setHandleSuccess(false);
}
} catch (URISyntaxException e) {
e.printStackTrace();
handleResult.setHandleSuccess(false);
logger.error("二维码Url处理失败", e);
} catch (Exception e) {
e.printStackTrace();
handleResult.setHandleSuccess(false);
logger.error("请求数据失败", e);
}
return handleResult;
}
private ResultMsgTemplate createResultMsgTemlateByJson(Map resJson) {
ResultMsgTemplate resultMsgTemplate = new ResultMsgTemplate();
resultMsgTemplate.setInvoiceType("区块链电子发票");
Object amountValue = resJson.get("amount");
//这里金额是以分来计转换为元
double amount = amountValue == null ? 0 :
amountValue instanceof Integer ? ((int) amountValue) * 0.01 : Integer.parseInt(amountValue.toString()) * 0.01;
resultMsgTemplate.setAmount(String.format("%.2f", amount));
resultMsgTemplate.setInvoiceCode((String) resJson.get("bill_code"));
resultMsgTemplate.setInvoiceNo((String) resJson.get("bill_num"));
long time = 0L;
try {
time = Long.parseLong((String) resJson.get("bill_num"));
DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
resultMsgTemplate.setCreateDate(format.format(new Date(time)));
} catch (Exception e) {
logger.error("发票信息日期格式转换错误", e);
}
resultMsgTemplate.getExtraMsg().put("买方", (String) resJson.get("buyer_name"));
resultMsgTemplate.getExtraMsg().put("出售方", (String) resJson.get("seller_name"));
return resultMsgTemplate;
}
}
5、浙江通用发票处理类ZhejiangNormalInvoiceInfoHandler
public class ZhejiangNormalInvoiceInfoHandler implements InfoHandler{
private final static Pattern pattern = Pattern.compile("([0-9]+,?){3}[0-9]+\\.?([0-9]+,?){1,2}");
private final static String infoSeparator = ",";
private final static String invoiceType = "浙江通用发票";
@Override
public boolean canHandle(String info) {
if (StringUtil.isNotEmpty(info)) {
Matcher matcher = pattern.matcher(info);
return matcher.matches();
}
return false;
}
@Override
public HandleResult handleInfo(String info) {
ResultMsgTemlate resultMsgTemlate = new ResultMsgTemlate();
HandleResult handleResult = new HandleResult();
String[] infos = info.split(infoSeparator);
resultMsgTemlate.setCreateDate(formatDateStr(infos[2]));
resultMsgTemlate.setInvoiceNo(infos[1]);
resultMsgTemlate.setInvoiceType(invoiceType);
resultMsgTemlate.setInvoiceCode(infos[0]);
resultMsgTemlate.setAmount(infos[3]);
handleResult.setHandleResult(resultMsgTemlate.toString());
handleResult.setQcodeInfo(info);
return handleResult;
}
private String formatDateStr(String str) {
if (str.length() >= 8) {
return str.substring(0, 4) + "-" + str.substring(4, 6) + "-" + str.substring(6, 8);
} else if (str.length() == 6) {
return str.substring(0, 4) + "-" + str.substring(4, 6);
} else {
return str;
}
}
}
6、特殊url二维码处理类UriParamsInfoHandler
一般这种情况常见于广东发票验证,发票解析后出现参数key
public class UriParamsInfoHandler implements InfoHandler{
private final static Logger logger = LoggerFactory.getLogger(UriParamsInfoHandler.class);
private Set<String> addresses;
// private static RestTemplate restTemplate = new RestTemplate();
public UriParamsInfoHandler() {
this(new HashSet<String>());
}
public UriParamsInfoHandler(Set<String> addresses) {
this.addresses = addresses;
if (this.addresses == null)
this.addresses = new HashSet<>();
this.addresses.add("bdyw.etax-gd.gov.cn");
this.addresses.add("invoice.etax-gd.gov.cn");
}
@Override
public boolean canHandle(String info) {
for (String address : this.addresses) {
if (info.contains(address))
return true;
}
return false;
}
@Override
public HandleResult handleInfo(String info) {
HandleResult handleResult = new HandleResult();
try {
URI uri = new URI(info);
Map<String, String> params = parseUrlParameters(uri.getQuery());
StringBuilder stringBuilder = new StringBuilder();
//参数拼成,号分割的字符串作为qrcodeinfo
for (Map.Entry<String, String> entry : params.entrySet()) {
stringBuilder.append(entry.getValue()).append(",");
}
if (stringBuilder.length() > 0)
stringBuilder.deleteCharAt(stringBuilder.lastIndexOf(","));
handleResult.setQcodeInfo(stringBuilder.toString());
handleResult.setHandleResult("特殊发票,格式无法识别[" + info + "]");
} catch (URISyntaxException e) {
e.printStackTrace();
logger.debug("二维码解析信息处理失败!" + info, e);
handleResult.setHandleSuccess(false);
}
return handleResult;
}
public static Map<String, String> parseUrlParameters(String queryString) {
List<NameValuePair> nameValuePairList = URLEncodedUtils.parse(queryString, StandardCharsets.UTF_8);
Map<String, String> params = new HashMap<>();
if (!queryString.contains("=")) {
params.put("empty", queryString);
return params;
} else {
for (NameValuePair nameValuePair : nameValuePairList) {
params.put(nameValuePair.getName(), nameValuePair.getValue());
}
return params;
}
}
}
7、非增值税的二维码数字字母处理NumericLetterInvoiceInfoHandler
public class NumericLetterInvoiceInfoHandler implements InfoHandler {
//发票解析信息必须是8-63位数字字母组合
private final static Pattern LETTER_NUMBER_PATTERN = Pattern.compile("[0-9a-zA-Z]{8,63}");
@Override
public boolean canHandle(String info) {
Matcher matcher = LETTER_NUMBER_PATTERN.matcher(info);
return matcher.matches();
}
@Override
public HandleResult handleInfo(String info) {
HandleResult handleResult = new HandleResult();
handleResult.setQcodeInfo(info);
handleResult.setHandleResult("解析为数字为特殊发票或凭证等"+info);
return handleResult;
}
}
好了,准备了这么多了,准备开始解析图片中的二维码了!!!
注:中间有一部分代码之前是做图形优化的,现在有QRCodeReader类处理的更好
图片直接解析ImageQcodeDecoder
public class ImageQcodeDecoder implements IQcodeDecoder {
private final static Logger logger = Logger.getLogger(ImageQcodeDecoder.class);
private BufferedImage imageBuffer;
private List<BarcodeFormat> possibleTwoDimensionFormats;
//是否纯净的二维码
private boolean isPureQrcode = false;
//图片最大宽度
private final static double MAX_IMAGE_WIDTH_THRESHOULD = 2500.0;
private final static double locatorRate = 9.0 / 30;
private final static double locatorCenterDistanceRate = 30.0 / 45;
public ImageQcodeDecoder() {
//加载所有可能的二维码格式
possibleTwoDimensionFormats = new ArrayList<>();
possibleTwoDimensionFormats.add(BarcodeFormat.AZTEC);
possibleTwoDimensionFormats.add(BarcodeFormat.DATA_MATRIX);
possibleTwoDimensionFormats.add(BarcodeFormat.MAXICODE);
possibleTwoDimensionFormats.add(BarcodeFormat.QR_CODE);
}
public ImageQcodeDecoder(boolean isPureQrcode) {
this.isPureQrcode = isPureQrcode;
}
private void initDecoder(MultipartFile multipartFile) throws IOException {
this.imageBuffer = ImageIO.read(multipartFile.getInputStream());
//避免图片太大OOM 大于2500 一律压缩为2500
double quality = 1;
if (this.imageBuffer.getWidth() > MAX_IMAGE_WIDTH_THRESHOULD) {
quality = MAX_IMAGE_WIDTH_THRESHOULD / this.imageBuffer.getWidth();
}
this.imageBuffer = ImageUtils.compressImage(this.imageBuffer, quality);
//灰化
this.imageBuffer = ImageUtils.grayImage(this.imageBuffer);
}
private BinaryBitmap getGlobalHistogramBinarizerBitMap(LuminanceSource luminanceSource) {
Binarizer binarizer = new GlobalHistogramBinarizer(luminanceSource);
return new BinaryBitmap(binarizer);
}
private BinaryBitmap getHybridBinarizerBitMap(LuminanceSource luminanceSource) {
Binarizer binarizer = new HybridBinarizer(luminanceSource);
return new BinaryBitmap(binarizer);
}
@Override
public boolean canDecode(MultipartFile multipartFile) {
try {
InputStream inputStream = multipartFile.getInputStream();
return ImageIO.read(inputStream) != null;
} catch (IOException e) {
return false;
}
}
@Override
public List<Result> decode(MultipartFile multipartFile) throws IOException {
initDecoder(multipartFile);
return this.decode(this.imageBuffer);
}
public List<Result> decode(BufferedImage image) {
List<Result> results = new ArrayList<>();
Map<DecodeHintType, Object> hints = new HashMap<>();
LuminanceSource luminanceSource = new BufferedImageLuminanceSource(image);
//文字编码
hints.put(DecodeHintType.CHARACTER_SET, "UTF-8");
// 优化精度
hints.put(DecodeHintType.TRY_HARDER, Boolean.TRUE);
FinderPatternInfo[] infos = getPatternInfos(luminanceSource, hints);
if (infos == null || infos.length == 0) {
return results;
}
Reader imageDecodeReader = new MultiFormatReader();
QRCodeReader reader = new QRCodeReader();
Result imageDecodeResult = null;
try {
try {
// 兼容低版本CPU,内存使用较少
imageDecodeResult = imageDecodeReader.decode(getGlobalHistogramBinarizerBitMap(luminanceSource), hints);
} catch (FormatException | NotFoundException e) {
//优化算法 针对像素高的适应 需要启用复杂模式
imageDecodeResult = reader.decode(getHybridBinarizerBitMap(luminanceSource), hints);
}
// 若无法解码,需要启动复杂模式
} catch (NotFoundException e1) {
try{
// 复杂模式
hints.put(DecodeHintType.PURE_BARCODE,Boolean.TRUE);
imageDecodeResult = reader.decode(getHybridBinarizerBitMap(luminanceSource), hints);
}catch (Exception e){
logger.error("无法找到二维码"+e.getMessage());
}
} catch (ChecksumException | FormatException e1) {
throw new QCodeDecodingException("二维码格式错误或无法解析");
}
if (imageDecodeResult != null)
results.add(imageDecodeResult);
disposeImageBuffer();
logger.info(imageDecodeResult);
return results;
}
private FinderPatternInfo[] findQrcode(BinaryBitmap binaryBitmap, Map<DecodeHintType, Object> hints) {
MultiFinderPatternFinder finderPatternFinder = null;
try {
finderPatternFinder = new MultiFinderPatternFinder(binaryBitmap.getBlackMatrix(), logger::debug);
return finderPatternFinder.findMulti(hints);
} catch (NotFoundException e) {
return null;
}
}
private FinderPatternInfo[] getPatternInfos(LuminanceSource luminanceSource, Map<DecodeHintType, Object> hints) {
BinaryBitmap binaryBitmap = new BinaryBitmap(new GlobalHistogramBinarizer(luminanceSource));
FinderPatternInfo[] infos = findQrcode(binaryBitmap, hints);
if (infos == null) {
binaryBitmap = new BinaryBitmap(new HybridBinarizer(luminanceSource));
infos = findQrcode(binaryBitmap, hints);
}
return infos;
}
private void disposeImageBuffer() {
if (this.imageBuffer != null) {
this.imageBuffer.flush();
this.imageBuffer = null;
}
}
private CropLocation calculateCrop(ResultPoint[] resultPoints) {
if (resultPoints.length >= 3) {
ResultPoint p1 = resultPoints[0];
ResultPoint p2 = resultPoints[1];
ResultPoint p3 = resultPoints[2];
float startX = Math.min(Math.min(p1.getX(), p2.getX()), p3.getX());
float startY = Math.min(Math.min(p1.getY(), p2.getY()), p3.getY());
float endY = Math.max(Math.max(p1.getY(), p2.getY()), p3.getY());
float endX = Math.max(Math.max(p1.getX(), p2.getX()), p3.getX());
return new CropLocation((int) Math.floor(startX), (int) Math.floor(startY), (int) Math.ceil(endX - startX), (int) Math.ceil(endY - startY));
}
return null;
}
private CropLocation getContainQrcodeEntireSquare(CropLocation cropLocation, int imageWidth, int imageHeight) {
double startX = cropLocation.getX() - cropLocation.getWidth() * locatorRate / 2;
double startY = cropLocation.getY() - cropLocation.getHeight() * locatorRate / 2;
startX = keepNotNegative(startX);
startY = keepNotNegative(startY);
double width = cropLocation.getWidth() / locatorCenterDistanceRate;
double height = cropLocation.getHeight() / locatorCenterDistanceRate;
width = startX + width > imageWidth ? imageWidth - startX : width;
height = startY + height > imageHeight ? imageHeight - startY : height;
return new CropLocation(
(int) Math.floor(startX),
(int) Math.floor(startY),
(int) Math.ceil(width),
(int) Math.ceil(height)
);
}
private double keepNotNegative(double num) {
return num > 0 ? num : 1;
}
class CropLocation {
private int x;
private int y;
private int width;
private int height;
public CropLocation() {
}
public CropLocation(int x, int y, int width, int height) {
this.x = x;
this.y = y;
this.width = width;
this.height = height;
}
public int getX() {
return x;
}
public void setX(int x) {
this.x = x;
}
public int getY() {
return y;
}
public void setY(int y) {
this.y = y;
}
public int getWidth() {
return width;
}
public void setWidth(int width) {
this.width = width;
}
public int getHeight() {
return height;
}
public void setHeight(int height) {
this.height = height;
}
}
}
等等,图片??? 他们报销大多数的都是pdf咋整!!! 寄!!
这时候就到pdfbox发挥作用了!!
代码如下:
PDDocument pdfDocument = Loader.loadPDF(tempFile);
PDFRenderer renderer = new PDFRenderer(pdfDocument);
BufferedImage image = renderer.renderImageWithDPI(0, 300, ImageType.RGB);
记得要把缓存在内存中的文件删除哦!!!要不然容易oom!!!
简单pdf转图片,图片中二维码识别(采用了线程池,工具类,配置类放在最后)
public class PDFQcodeDecoder implements IQcodeDecoder {
private final static Logger logger = Logger.getLogger(PDFQcodeDecoder.class);
//最大解析页数
private final static int MAXPAGE = 4;
//解析的终止页
private int endIndex;
private PDDocument pdfDocument;
//扫描的Dpi率 300基本满足需求 可以继续调高
//对内存的需求线性增加 会出现OOM
private int scanDpi;
public PDFQcodeDecoder(int scanDpi) {
this.scanDpi = scanDpi;
}
public PDFQcodeDecoder() {
this.scanDpi = 300;
}
@Override
public boolean canDecode(MultipartFile multipartFile) {
if (multipartFile == null) return false;
return StringUtil.isNotEmpty(multipartFile.getOriginalFilename()) && FileUtils.getExtend(multipartFile.getOriginalFilename()).equalsIgnoreCase("pdf");
}
private File initDecoder(MultipartFile file) throws IOException {
File tempFile = ImageUtils.transferToFile(file);
this.pdfDocument = Loader.loadPDF(tempFile);
this.endIndex = Math.min(pdfDocument.getNumberOfPages(), MAXPAGE);
return tempFile;
}
@Override
public List<Result> decode(MultipartFile multipartFile) throws IOException {
File file = initDecoder(multipartFile);
//设置任务数量
List<Integer> futureTasks = new ArrayList<>();
ThreadPoolTaskExecutorMy defaultExecutor = ThreadPoolExecutorConfig.createDefaultExecutor();
for (int i = 0; i < this.endIndex; i++) {
futureTasks.add(i);
}
// 开始时间
long start = System.currentTimeMillis();
// 成功结果
List<Result> successList = new Vector<>();
// 失败结果
Map<String, String> errorMap = new ConcurrentHashMap<>();
Stream<CompletableFuture<List<Result>>> completableFutureStream = futureTasks.stream().map(integer -> {
return CompletableFuture.supplyAsync(() -> {
StopWatch stopWatch = DateUtil.createStopWatch();
stopWatch.start("转化开始:" + new Exception().getStackTrace()[0].getMethodName());
List<Result> resultList = new ArrayList<>();
BufferedImage image = null;
try {
PDFRenderer renderer = new PDFRenderer(pdfDocument);
image = renderer.renderImageWithDPI(integer, scanDpi, ImageType.RGB);
ImageQcodeDecoder qCodeDecoder = new ImageQcodeDecoder();
resultList.addAll(qCodeDecoder.decode(image));
} catch (Exception e) {
logger.error("\n解析文件失败或二维码解析失败" + e.getMessage() + "\n", e);
} finally {
if (image != null) {
image.flush();
image.getGraphics().dispose();
}
}
stopWatch.stop();
System.out.println("PDFQcodeDecoder转化已完成,花费时间:" + stopWatch.getTotalTimeSeconds() + "ms");
return resultList;
}, defaultExecutor)
.handle((results, throwable) -> {
if (throwable != null) {
errorMap.put(integer.toString(), throwable.getMessage());
System.out.println("任务" + integer + "异常! e=" + throwable + ", " + new Date());
logger.error("任务" + integer + "异常! e=" + throwable + ", " + new Date(), throwable);
} else {
successList.addAll(results);
}
return results;
});
});
CompletableFuture.allOf(completableFutureStream.toArray(CompletableFuture[]::new))
.whenComplete((v, th) -> {
logger.info(StrUtil.format("所有任务执行完成触发\n resultList={}\n errorMap={}\n耗时={}ms", successList, errorMap, (System.currentTimeMillis() - start)));
}).join();
if (file.exists()){
logger.info(file.getName()+" 文件是否正常删除:"+ file.delete());
}
return successList;
}
}
上面的如果还不能解决问题,还可以使用下面的方式,解决复制二维码问题
复杂pdf转图片中二维码解析 PDFResourceQRcodeDecoder
public class PDFResourceQRcodeDecoder implements IQcodeDecoder {
private final static Logger logger = Logger.getLogger(PDFResourceQRcodeDecoder.class);
//最大解析页数
private final static int MAXPAGE = 4;
//解析的终止页
private int endIndex;
private PDDocument pdfDocument;
//扫描的Dpi率 300基本满足需求 可以继续调高
private int scanDpi;
public PDFResourceQRcodeDecoder(int scanDpi) {
this.scanDpi = scanDpi;
}
public PDFResourceQRcodeDecoder() {
this.scanDpi = 300;
}
private File initDecoder(MultipartFile multipartFile) throws IOException {
File tempFile = ImageUtils.transferToFile(multipartFile);
this.pdfDocument = Loader.loadPDF(tempFile);
this.endIndex = Math.min(pdfDocument.getNumberOfPages(), MAXPAGE);
return tempFile;
}
@Override
public boolean canDecode(MultipartFile multipartFile) {
if (multipartFile == null) return false;
return StringUtil.isNotEmpty(multipartFile.getOriginalFilename()) && FileUtils.getExtend(multipartFile.getOriginalFilename()).equalsIgnoreCase("pdf");
}
@Override
public List<Result> decode(MultipartFile multipartFile) throws IOException {
File file = initDecoder(multipartFile);
//设置任务数量
List<Integer> futureTasks = new ArrayList<>();
ThreadPoolTaskExecutorMy defaultExecutor = ThreadPoolExecutorConfig.createDefaultExecutor();
for (int i = 0; i < this.endIndex; i++) {
futureTasks.add(i);
}
List<CompletableFuture<List<Result>>> collect = futureTasks.stream().map(integer ->
CompletableFuture.supplyAsync(() -> {
StopWatch stopWatch = DateUtil.createStopWatch();
stopWatch.start("转化开始:" + new Exception().getStackTrace()[0].getMethodName());
List<Result> resultList = new ArrayList<>();
PDPage pdPage = pdfDocument.getPage(integer);
for (COSName cosName : pdPage.getResources().getXObjectNames()) {
if (pdPage.getResources().isImageXObject(cosName)) {
try {
PDImageXObject imageXObject = (PDImageXObject) pdPage.getResources().getXObject(cosName);
System.out.println("当前图片获取格式为:"+ imageXObject.getSuffix());
ImageQcodeDecoder qCodeDecoder = new ImageQcodeDecoder(true);
List<Result> decodeResults = qCodeDecoder.decode(imageXObject.getImage());
if (decodeResults.isEmpty()) {
continue;
}
resultList.addAll(decodeResults);
} catch (IOException e) {
logger.error(StrUtil.format("读取pdf图片资源失败!{}", e.getMessage()), e);
}
}
}
stopWatch.stop();
System.out.println("二维码无法找到时,PDFResourceQRcodeDecoder转化已完成,花费时间:"+ stopWatch.getTotalTimeSeconds()+"ms");
return resultList;
}, defaultExecutor)
.handle((results, throwable) -> {
if (throwable != null) {
System.out.println("任务" + integer + "异常! e=" + throwable + ", " + new Date());
logger.error("任务" + integer + "异常! e=" + throwable + ", " + new Date(), throwable);
}else{
return results;
}
return new ArrayList<Result>();
})).collect(Collectors.toList());
Stream<List<Result>> stream = collect.stream().map(CompletableFuture::join).collect(Collectors.toList())
.stream();
if (file.exists()){
logger.info(file.getName()+" 文件是否正常删除:"+ file.delete());
}
return stream.collect(ArrayList::new, ArrayList::addAll, ArrayList::addAll);
}
}
如果遇到ofd附件中的二维码怎么办?wtf!!?失算了
<dependency>
<groupId>org.ofdrw</groupId>
<artifactId>ofdrw-full</artifactId>
<version>1.5.2</version>
</dependency>
OFD文件处理OFDQRcodeDecoder
public class OFDQRcodeDecoder implements IQcodeDecoder {
private final static Logger logger = LoggerFactory.getLogger(OFDQRcodeDecoder.class);
private String tempUrl = "/ofdTmp";
private Path tempFilePath;
private final static String QRCODE_TAG_NAME = "GraphCode";
public OFDQRcodeDecoder(){
}
@Override
public boolean canDecode(MultipartFile multipartFile) {
if(multipartFile == null) return false;
return StringUtil.isNotEmpty(multipartFile.getOriginalFilename())&&
FileUtils.getExtend(multipartFile.getOriginalFilename()).equalsIgnoreCase("ofd");
}
/** * 初始化 将文件放入缓存 * @param multipartFile * @throws IOException */
private void initDecoder(MultipartFile multipartFile) throws IOException {
String fileName = UploadUtils.getUUIDName(multipartFile.getOriginalFilename());
File tmpdir = new File(UploadUtils.getUploadRootDir().getPath() +tempUrl);
if(!tmpdir.exists()||!tmpdir.isDirectory()){
tmpdir.mkdir();
}
System.out.println(tmpdir);
File file = new File(tmpdir.getPath()+ File.separator + fileName);
if(!file.exists()) {
file.createNewFile();
}
tempFilePath = file.toPath();
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(file));
StreamUtils.copy(multipartFile.getInputStream(),outputStream);
outputStream.flush();
outputStream.close();
}
@Override
public List<Result> decode(MultipartFile multipartFile) throws IOException {
List<Result> results = new ArrayList<>();
initDecoder(multipartFile);
OFDReader ofdReader = new OFDReader(tempFilePath);
ofdReader.getResourceLocator();
ST_Loc st_loc = null;
DocDir docDir = null;
try {
docDir = ofdReader.getOFDDir().obtainDocDefault();
st_loc = docDir.getDocument().getAttachments();
ResourceLocator locator = ofdReader.getResourceLocator();
locator.save();
locator.cd(docDir);
Attachments attachments = locator.get(st_loc,Attachments::new);
List<CT_Attachment> attachmentList = attachments.getAttachments();
for(CT_Attachment attachment :attachmentList){
OFDInvoice ofdInvoice = locator.get(ST_Loc.getInstance(st_loc.parent()).cat(attachment.getFileLoc()),OFDInvoice::new);
//找到ofd文件附件中关于附件二维码信息的字符串提取
Element ofdElement = ofdInvoice.getElementByName(QRCODE_TAG_NAME);
results.add(new Result(ofdElement.getText(),null,null,null));
}
docDir.close();
} catch (DocumentException e) {
logger.debug("ofd 文件解析错误",e);
}finally {
if(docDir != null){
docDir.close();
}
ofdReader.close();
}
return results;
}
}
以上都这么多处理方式了,是时候放在一起处理发票文件了!!
二维码处理工厂 QRCodeDecodeFactory
其实这里可以优化成filter的处理方式,有大佬能提供一下建议吗?!跪求!!!请原谅我这丑陋的代码
public class QRCodeDecodeFactory {
private final static Logger logger = LoggerFactory.getLogger(QRCodeDecodeFactory.class);
/** * 不共用解析器 */
private List<IQcodeDecoder> qrcodeDecoders = new ArrayList<>();
/** * 共用处理器 */
private static List<InfoHandler> handlers = new ArrayList<>();
/** * 初始化二维码解析信息处理器 */
static {
handlers.add(new NormalInvoiceInfoHandler());
handlers.add(new UrlInvoiceInfoHandler());
handlers.add(new UriParamsInfoHandler());
handlers.add(new NumericLetterInvoicInfoHandler());
handlers.add(new ZhejiangNormalInvoiceInfoHandler());
handlers.add(new DefaultProvinceInvoiceInfoHandler());
handlers.add(new TrainInvoiceInfoHandler());
}
private QRCodeDecodeFactory() {
qrcodeDecoders.add(new ImageQcodeDecoder());
qrcodeDecoders.add(new OFDQRcodeDecoder());
qrcodeDecoders.add(new PDFQcodeDecoder());
qrcodeDecoders.add(new PDFResourceQRcodeDecoder());
}
/** * 不考虑并发 * @return */
public static QRCodeDecodeFactory createNewQrcodeDecodeFactory(){
return new QRCodeDecodeFactory();
}
/*** * 根据上传文件匹配文件二维码解析器解析二维码 * @param multipartFile * @return * @throws IOException */
public List<HandleResult> decode (MultipartFile multipartFile) throws IOException {
List<HandleResult> results = new ArrayList<>();
if(multipartFile == null){
return results;
}
if(!qrcodeDecoders.isEmpty()){
for(IQcodeDecoder decoder :qrcodeDecoders){
if(decoder.canDecode(multipartFile)){
List<Result> decodeResults = decoder.decode(multipartFile);
for(Result result:decodeResults){
if(result!=null&& StringUtil.isNotEmpty(result.getText())) {
results.add(handleDecodeResult(result.getText()));
logger.info("解析结果:{}",result.getText());
}
}
if(!decodeResults.isEmpty()){
break;
}
}
}
}
return results;
}
/** * 处理解析结果 * @param decodeResult * @return */
private HandleResult handleDecodeResult(String decodeResult){
HandleResult handleResult = new HandleResult();
for(InfoHandler infoHandler:handlers){
try{
if(infoHandler.canHandle(decodeResult)){
handleResult = infoHandler.handleInfo(decodeResult);
return handleResult;
}
}catch(Exception ex){
logger.error("发票识别过程中出现一异常",ex);
}
}
handleResult.setHandleSuccess(false);
return handleResult;
}
}
- [x]
完成任务,下面是一些线程配置参数和文件处理类
线程池相关,spring版本4.0较低,线程传递TaskDecorator需要自己手写?(其实我是从高版本抄过来的哈哈哈)
1、ThreadPoolExecutorProperties
public class ThreadPoolExecutorProperties {
/** * 配置核心线程数 */
public static final Integer corePoolSize = 8;
/** * 配置最大线程数 */
public static final Integer maxPoolSize = 16;
/** * 配置队列大小 */
public static final Integer queueCapacity = 10000;
/** * 配置线程池中的线程的名称前缀 */
public static final String threadNamePrefix = "um-async-service-";
/** * 设置线程活跃时间(秒) */
public static final Integer keepAliveSeconds = 300;
/** * 等待任务在关机时完成--表明等待所有线程执行完 */
public static final Boolean waitForTasksToCompleteOnShutdown = true;
/** * 拒绝策略:当 pool 已经达到 max pool size,且队列容量已满的时候,如何处理新任务 * <p/> * CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行 */
public static final RejectedExecutionHandler rejectedExecutionHandler = new ThreadPoolExecutor.CallerRunsPolicy();
}
2、ThreadPoolTaskExecutorMy
public class ThreadPoolTaskExecutorMy extends ThreadPoolTaskExecutor {
private final Object poolSizeMonitor = new Object();
private int corePoolSize = 1;
private int maxPoolSize = Integer.MAX_VALUE;
private int keepAliveSeconds = 60;
private int queueCapacity = Integer.MAX_VALUE;
private boolean allowCoreThreadTimeOut = false;
private boolean prestartAllCoreThreads = false;
private TaskDecorator taskDecorator;
private ThreadPoolExecutor threadPoolExecutor;
// Runnable decorator to user-level FutureTask, if different
private final Map<Runnable, Object> decoratedTaskMap =
new ConcurrentReferenceHashMap<>(16, ConcurrentReferenceHashMap.ReferenceType.WEAK);
/** * Set the ThreadPoolExecutor's core pool size. * Default is 1. * <p><b>This setting can be modified at runtime, for example through JMX.</b> */
public void setCorePoolSize(int corePoolSize) {
synchronized (this.poolSizeMonitor) {
if (this.threadPoolExecutor != null) {
this.threadPoolExecutor.setCorePoolSize(corePoolSize);
}
this.corePoolSize = corePoolSize;
}
}
/** * Return the ThreadPoolExecutor's core pool size. */
public int getCorePoolSize() {
synchronized (this.poolSizeMonitor) {
return this.corePoolSize;
}
}
/** * Set the ThreadPoolExecutor's maximum pool size. * Default is {@code Integer.MAX_VALUE}. * <p><b>This setting can be modified at runtime, for example through JMX.</b> */
public void setMaxPoolSize(int maxPoolSize) {
synchronized (this.poolSizeMonitor) {
if (this.threadPoolExecutor != null) {
this.threadPoolExecutor.setMaximumPoolSize(maxPoolSize);
}
this.maxPoolSize = maxPoolSize;
}
}
/** * Return the ThreadPoolExecutor's maximum pool size. */
public int getMaxPoolSize() {
synchronized (this.poolSizeMonitor) {
return this.maxPoolSize;
}
}
/** * Set the ThreadPoolExecutor's keep-alive seconds. * Default is 60. * <p><b>This setting can be modified at runtime, for example through JMX.</b> */
public void setKeepAliveSeconds(int keepAliveSeconds) {
synchronized (this.poolSizeMonitor) {
if (this.threadPoolExecutor != null) {
this.threadPoolExecutor.setKeepAliveTime(keepAliveSeconds, TimeUnit.SECONDS);
}
this.keepAliveSeconds = keepAliveSeconds;
}
}
/** * Return the ThreadPoolExecutor's keep-alive seconds. */
public int getKeepAliveSeconds() {
synchronized (this.poolSizeMonitor) {
return this.keepAliveSeconds;
}
}
public void setQueueCapacity(int queueCapacity) {
this.queueCapacity = queueCapacity;
}
public void setAllowCoreThreadTimeOut(boolean allowCoreThreadTimeOut) {
this.allowCoreThreadTimeOut = allowCoreThreadTimeOut;
}
public void setPrestartAllCoreThreads(boolean prestartAllCoreThreads) {
this.prestartAllCoreThreads = prestartAllCoreThreads;
}
public void setTaskDecorator(TaskDecorator taskDecorator) {
this.taskDecorator = taskDecorator;
}
@Override
protected ExecutorService initializeExecutor(
ThreadFactory threadFactory, RejectedExecutionHandler rejectedExecutionHandler) {
BlockingQueue<Runnable> queue = createQueue(this.queueCapacity);
ThreadPoolExecutor executor;
if (this.taskDecorator != null) {
executor = new ThreadPoolExecutor(
this.corePoolSize, this.maxPoolSize, this.keepAliveSeconds, TimeUnit.SECONDS,
queue, threadFactory, rejectedExecutionHandler) {
@Override
public void execute(Runnable command) {
Runnable decorated = taskDecorator.decorate(command);
if (decorated != command) {
decoratedTaskMap.put(decorated, command);
}
super.execute(decorated);
}
};
} else {
executor = new ThreadPoolExecutor(
this.corePoolSize, this.maxPoolSize, this.keepAliveSeconds, TimeUnit.SECONDS,
queue, threadFactory, rejectedExecutionHandler);
}
if (this.allowCoreThreadTimeOut) {
executor.allowCoreThreadTimeOut(true);
}
if (this.prestartAllCoreThreads) {
executor.prestartAllCoreThreads();
}
this.threadPoolExecutor = executor;
return executor;
}
protected BlockingQueue<Runnable> createQueue(int queueCapacity) {
if (queueCapacity > 0) {
return new LinkedBlockingQueue<>(queueCapacity);
} else {
return new SynchronousQueue<>();
}
}
public ThreadPoolExecutor getThreadPoolExecutor() throws IllegalStateException {
Assert.state(this.threadPoolExecutor != null, "ThreadPoolTaskExecutor not initialized");
return this.threadPoolExecutor;
}
public int getPoolSize() {
if (this.threadPoolExecutor == null) {
// Not initialized yet: assume core pool size.
return this.corePoolSize;
}
return this.threadPoolExecutor.getPoolSize();
}
public int getActiveCount() {
if (this.threadPoolExecutor == null) {
// Not initialized yet: assume no active threads.
return 0;
}
return this.threadPoolExecutor.getActiveCount();
}
@Override
public void execute(Runnable task) {
Executor executor = getThreadPoolExecutor();
try {
executor.execute(task);
} catch (RejectedExecutionException ex) {
throw new TaskRejectedException("Executor [" + executor + "] did not accept task: " + task, ex);
}
}
@Deprecated
@Override
public void execute(Runnable task, long startTimeout) {
execute(task);
}
@Override
public Future<?> submit(Runnable task) {
ExecutorService executor = getThreadPoolExecutor();
try {
return executor.submit(task);
} catch (RejectedExecutionException ex) {
throw new TaskRejectedException("Executor [" + executor + "] did not accept task: " + task, ex);
}
}
@Override
public <T> Future<T> submit(Callable<T> task) {
ExecutorService executor = getThreadPoolExecutor();
try {
return executor.submit(task);
} catch (RejectedExecutionException ex) {
throw new TaskRejectedException("Executor [" + executor + "] did not accept task: " + task, ex);
}
}
@Override
public ListenableFuture<?> submitListenable(Runnable task) {
ExecutorService executor = getThreadPoolExecutor();
try {
ListenableFutureTask<Object> future = new ListenableFutureTask<>(task, null);
executor.execute(future);
return future;
} catch (RejectedExecutionException ex) {
throw new TaskRejectedException("Executor [" + executor + "] did not accept task: " + task, ex);
}
}
@Override
public <T> ListenableFuture<T> submitListenable(Callable<T> task) {
ExecutorService executor = getThreadPoolExecutor();
try {
ListenableFutureTask<T> future = new ListenableFutureTask<>(task);
executor.execute(future);
return future;
} catch (RejectedExecutionException ex) {
throw new TaskRejectedException("Executor [" + executor + "] did not accept task: " + task, ex);
}
}
protected void cancelRemainingTask(Runnable task) {
if (task instanceof Future) {
((Future<?>) task).cancel(true);
}
Object original = this.decoratedTaskMap.get(task);
if (original instanceof Future) {
((Future<?>) original).cancel(true);
}
}
}
3、TaskDecorator
public interface TaskDecorator {
/**/
Runnable decorate(@NonNull Runnable runnable);
}
4、ThreadPoolExecutorConfig (核心配置)
@Slf4j
@Configuration
@RequiredArgsConstructor
public class ThreadPoolExecutorConfig {
public static ThreadPoolTaskExecutorMy createDefaultExecutor(){
return new ThreadPoolExecutorConfig().buildAsyncExecutor();
}
@Bean(name = "executor")
public ThreadPoolTaskExecutorMy buildAsyncExecutor() {
ThreadPoolTaskExecutorMy executor = new ThreadPoolTaskExecutorMy();
// 配置核心线程数
executor.setCorePoolSize(ThreadPoolExecutorProperties.corePoolSize);
// 配置最大线程数
executor.setMaxPoolSize(ThreadPoolExecutorProperties.maxPoolSize);
//配置队列大小
executor.setQueueCapacity(ThreadPoolExecutorProperties.queueCapacity);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix(ThreadPoolExecutorProperties.threadNamePrefix);
// 设置线程活跃时间(秒)
executor.setKeepAliveSeconds(ThreadPoolExecutorProperties.keepAliveSeconds);
// 等待任务在关机时完成--表明等待所有线程执行完
executor.setWaitForTasksToCompleteOnShutdown(ThreadPoolExecutorProperties.waitForTasksToCompleteOnShutdown);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(ThreadPoolExecutorProperties.rejectedExecutionHandler);
// 线程上下文传递
executor.setTaskDecorator(new ContextDecorator());
//执行初始化
executor.initialize();
return executor;
}
static class ContextDecorator implements TaskDecorator {
@Override
public @NonNull Runnable decorate(@NonNull Runnable runnable) {
Optional<RequestAttributes> attrsOpt = Optional.empty();
try {
attrsOpt = Optional.of(RequestContextHolder.currentRequestAttributes());
} catch (Exception e) {
log.debug("获取 RequestContextHolder 上下文失败:当前线程可能执行的是定时任务!");
}
RequestAttributes attrs = attrsOpt.orElse(null);
return () -> {
try {
if (ObjectUtil.isNotNull(attrs)) {
// 将主线程的请求信息,设置到子线程中
RequestContextHolder.setRequestAttributes(attrs);
}
// 执行子线程
runnable.run();
} finally {
if (ObjectUtil.isNotNull(attrs)) {
RequestContextHolder.resetRequestAttributes();
}
}
};
}
}
}
图像的工具类
有些图像处理方式二值化和二维码定位处理的还是有点粗糙,如果有更好的处理麻烦联系我
public class ImageUtils {
/** * 判断是否为图片 * * @param file * @return * @throws IOException */
public static boolean judgeImageFile(MultipartFile file) throws IOException {
InputStream inputStream = file.getInputStream();
try {
return ImageIO.read(inputStream) != null;
} catch (IOException e) {
return false;
}
}
/** * 判断是否是pdf * * @param file * @return */
public static boolean judgePDFFile(MultipartFile file) {
if (file == null || file.isEmpty())
throw new RuntimeException("文件不能为空");
else {
String fileName = file.getOriginalFilename();
if (fileName.contains(".")) {
String suffix = fileName.substring(fileName.lastIndexOf("."));
return suffix.toLowerCase().endsWith(".pdf");
}
}
return false;
}
public static BufferedImage makeGrayingImage(BufferedImage image) {
return makeGrayingImage(image, null);
}
/*** * java 颜色 * @param image * @param hints * @return */
public static BufferedImage makeGrayingImage(BufferedImage image, RenderingHints hints) {
//定义颜色阈值
ColorSpace colorSpace = ColorSpace.getInstance(ColorSpace.CS_GRAY);
ColorConvertOp colorConvertOp = new ColorConvertOp(colorSpace, hints);
return colorConvertOp.filter(image, null);
}
public static BufferedImage compressImage(Image inputImage, double quality) {
double ratio = inputImage.getHeight(null) / (inputImage.getWidth(null) * 1.0);
int width = (int) (inputImage.getWidth(null) * quality);
int height = (int) (inputImage.getWidth(null) * quality * ratio);
BufferedImage tempImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
tempImage.getGraphics().drawImage(inputImage.getScaledInstance(width, height, Image.SCALE_SMOOTH), 0, 0, null);
//释放资源
tempImage.getGraphics().dispose();
inputImage.flush();
return tempImage;
}
/** * 图片去噪 * 判断去噪的阈值为 300 * * @param image * @return */
public static BufferedImage denoise(BufferedImage image) {
int w = image.getWidth();
int h = image.getHeight();
int white = new Color(255, 255, 255).getRGB();
if (isWhite(image.getRGB(1, 0)) && isWhite(image.getRGB(0, 1)) && isWhite(image.getRGB(1, 1))) {
image.setRGB(0, 0, white);
}
if (isWhite(image.getRGB(w - 2, 0)) && isWhite(image.getRGB(w - 1, 1)) && isWhite(image.getRGB(w - 2, 1))) {
image.setRGB(w - 1, 0, white);
}
if (isWhite(image.getRGB(0, h - 2)) && isWhite(image.getRGB(1, h - 1)) && isWhite(image.getRGB(1, h - 2))) {
image.setRGB(0, h - 1, white);
}
if (isWhite(image.getRGB(w - 2, h - 1)) && isWhite(image.getRGB(w - 1, h - 2)) && isWhite(image.getRGB(w - 2, h - 2))) {
image.setRGB(w - 1, h - 1, white);
}
for (int x = 1; x < w - 1; x++) {
int y = 0;
if (isBlack(image.getRGB(x, y))) {
int size = 0;
if (isWhite(image.getRGB(x - 1, y))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y))) {
size++;
}
if (isWhite(image.getRGB(x, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x - 1, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y + 1))) {
size++;
}
if (size >= 5) {
image.setRGB(x, y, white);
}
}
}
for (int x = 1; x < w - 1; x++) {
int y = h - 1;
if (isBlack(image.getRGB(x, y))) {
int size = 0;
if (isWhite(image.getRGB(x - 1, y))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y))) {
size++;
}
if (isWhite(image.getRGB(x, y - 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y - 1))) {
size++;
}
if (isWhite(image.getRGB(x - 1, y - 1))) {
size++;
}
if (size >= 5) {
image.setRGB(x, y, white);
}
}
}
for (int y = 1; y < h - 1; y++) {
int x = 0;
if (isBlack(image.getRGB(x, y))) {
int size = 0;
if (isWhite(image.getRGB(x + 1, y))) {
size++;
}
if (isWhite(image.getRGB(x, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x, y - 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y - 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y + 1))) {
size++;
}
if (size >= 5) {
image.setRGB(x, y, white);
}
}
}
for (int y = 1; y < h - 1; y++) {
int x = w - 1;
if (isBlack(image.getRGB(x, y))) {
int size = 0;
if (isWhite(image.getRGB(x - 1, y))) {
size++;
}
if (isWhite(image.getRGB(x, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x, y - 1))) {
size++;
}
//斜上下为空时,去掉此点
if (isWhite(image.getRGB(x - 1, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x - 1, y - 1))) {
size++;
}
if (size >= 5) {
image.setRGB(x, y, white);
}
}
}
//降噪,以1个像素点为单位
for (int y = 1; y < h - 1; y++) {
for (int x = 1; x < w - 1; x++) {
if (isBlack(image.getRGB(x, y))) {
int size = 0;
//上下左右均为空时,去掉此点
if (isWhite(image.getRGB(x - 1, y))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y))) {
size++;
}
//上下均为空时,去掉此点
if (isWhite(image.getRGB(x, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x, y - 1))) {
size++;
}
//斜上下为空时,去掉此点
if (isWhite(image.getRGB(x - 1, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y - 1))) {
size++;
}
if (isWhite(image.getRGB(x + 1, y + 1))) {
size++;
}
if (isWhite(image.getRGB(x - 1, y - 1))) {
size++;
}
if (size >= 8) {
image.setRGB(x, y, white);
}
}
}
}
return image;
}
public static boolean isBlack(int colorInt) {
Color color = new Color(colorInt);
return color.getRed() + color.getGreen() + color.getBlue() <= 300;
}
public static boolean isWhite(int colorInt) {
Color color = new Color(colorInt);
return color.getRed() + color.getGreen() + color.getBlue() > 300;
}
public static int isBlack(int colorInt, int whiteThreshold) {
final Color color = new Color(colorInt);
if (color.getRed() + color.getGreen() + color.getBlue() <= whiteThreshold) {
return 1;
}
return 0;
}
/** * 颜色二值化 * * @param bufferedImage * @return * @throws Exception */
public static BufferedImage grayImage(BufferedImage bufferedImage) {
int width = bufferedImage.getWidth();
int height = bufferedImage.getHeight();
BufferedImage grayBufferedImage = new BufferedImage(width, height, bufferedImage.getType());
for (int i = 0; i < bufferedImage.getWidth(); i++) {
for (int j = 0; j < bufferedImage.getHeight(); j++) {
final int color = bufferedImage.getRGB(i, j);
final int r = (color >> 16) & 0xff;
final int g = (color >> 8) & 0xff;
final int b = color & 0xff;
int gray = (int) (0.3 * r + 0.59 * g + 0.11 * b);
int newPixel = colorToRGB(255, gray, gray, gray);
grayBufferedImage.setRGB(i, j, newPixel);
}
}
bufferedImage.flush();
bufferedImage = null;
return grayBufferedImage;
}
/** * 颜色分量转换为RGB值 * * @param alpha * @param red * @param green * @param blue * @return */
private static int colorToRGB(int alpha, int red, int green, int blue) {
int newPixel = 0;
newPixel += alpha;
newPixel = newPixel << 8;
newPixel += red;
newPixel = newPixel << 8;
newPixel += green;
newPixel = newPixel << 8;
newPixel += blue;
return newPixel;
}
/** * 图片二值化 * 有阈值的二值化 * * @param image * @return * @throws Exception */
public static BufferedImage binaryImage(BufferedImage image, int thresholdValue) throws Exception {
int w = image.getWidth();
int h = image.getHeight();
float[] rgb = new float[3];
double[][] zuobiao = new double[w][h];
int black = new Color(0, 0, 0).getRGB();
int white = new Color(255, 255, 255).getRGB();
BufferedImage bi = new BufferedImage(w, h,
BufferedImage.TYPE_BYTE_BINARY);
for (int x = 0; x < w; x++) {
for (int y = 0; y < h; y++) {
int pixel = image.getRGB(x, y);
rgb[0] = (pixel & 0xff0000) >> 16;
rgb[1] = (pixel & 0xff00) >> 8;
rgb[2] = (pixel & 0xff);
float avg = (rgb[0] + rgb[1] + rgb[2]) / 3;
zuobiao[x][y] = avg;
}
}
//这里是阈值,白底黑字还是黑底白字,大多数情况下建议白底黑字,后面都以白底黑字为例
double SW = thresholdValue;
for (int x = 0; x < w; x++) {
for (int y = 0; y < h; y++) {
if (zuobiao[x][y] < SW) {
bi.setRGB(x, y, black);
} else {
bi.setRGB(x, y, white);
}
}
}
return bi;
}
// 转化为临时文件
public static File transferToFile(MultipartFile multipartFile) {
File file = null;
InputStream ins = null;
OutputStream os = null;
try {
String originalFilename = multipartFile.getOriginalFilename();
String[] filename = originalFilename.split("\\.");
file = File.createTempFile(new Date().getTime() + "", "." + filename[1]);
os = new FileOutputStream(file);
int bytesRead = 0;
byte[] buffer = new byte[8192];
ins = multipartFile.getInputStream();
while ((bytesRead = ins.read(buffer, 0, 8192)) != -1) {
os.write(buffer, 0, bytesRead);
}
os.close();
ins.close();
} catch (IOException e) {
e.printStackTrace();
}
return file;
}
}
开始测试!!!
List<HandleResult> decodeResults = QRCodeDecodeFactory.createNewQrcodeDecodeFactory().decode(uploadFile);
结果:
测试数据 | 结果 |
---|---|
图片png | 成功 |
区块链发票pdf | 成功 |
普通电子专用发票pdf | 成功 |
最后,如果上述还是不能满足你,来试试偏方
package com.unionman.fmis.qrcode.pdf;
import java.math.BigDecimal;
import java.util.List;
public class Invoice {
private String title;
private String machineNumber;
private String code;
private String number;
private String date;
private String checksum;
private String buyerName;
private String buyerCode;
private String buyerAddress;
private String buyerAccount;
private String password;
private BigDecimal amount;
private BigDecimal taxAmount;
private String totalAmountString;
private BigDecimal totalAmount;
private String sellerName;
private String sellerCode;
private String sellerAddress;
private String sellerAccount;
private String payee;
private String reviewer;
private String drawer;
private String type;
private List<Detail> detailList;
/** * @return the title */
public String getTitle() {
return title;
}
/** * @param title * the title to set */
public void setTitle(String title) {
this.title = title;
}
/** * @return the machineNumber */
public String getMachineNumber() {
return machineNumber;
}
/** * @param machineNumber * the machineNumber to set */
public void setMachineNumber(String machineNumber) {
this.machineNumber = machineNumber;
}
/** * @return the code */
public String getCode() {
return code;
}
/** * @param code * the code to set */
public void setCode(String code) {
this.code = code;
}
/** * @return the number */
public String getNumber() {
return number;
}
/** * @param number * the number to set */
public void setNumber(String number) {
this.number = number;
}
/** * @return the date */
public String getDate() {
return date;
}
/** * @param date * the date to set */
public void setDate(String date) {
this.date = date;
}
/** * @return the checksum */
public String getChecksum() {
return checksum;
}
/** * @param checksum * the checksum to set */
public void setChecksum(String checksum) {
this.checksum = checksum;
}
/** * @return the buyerName */
public String getBuyerName() {
return buyerName;
}
/** * @param buyerName * the buyerName to set */
public void setBuyerName(String buyerName) {
this.buyerName = buyerName;
}
/** * @return the buyerInvoiceCode */
public String getBuyerCode() {
return buyerCode;
}
/** * @param buyerCode * the buyerCode to set */
public void setBuyerCode(String buyerCode) {
this.buyerCode = buyerCode;
}
/** * @return the buyerAddress */
public String getBuyerAddress() {
return buyerAddress;
}
/** * @param buyerAddress * the buyerAddress to set */
public void setBuyerAddress(String buyerAddress) {
this.buyerAddress = buyerAddress;
}
/** * @return the buyerAccount */
public String getBuyerAccount() {
return buyerAccount;
}
/** * @param buyerAccount * the buyerAccount to set */
public void setBuyerAccount(String buyerAccount) {
this.buyerAccount = buyerAccount;
}
/** * @return the password */
public String getPassword() {
return password;
}
/** * @param password * the password to set */
public void setPassword(String password) {
this.password = password;
}
/** * @return the amount */
public BigDecimal getAmount() {
return amount;
}
/** * @param amount * the amount to set */
public void setAmount(BigDecimal amount) {
this.amount = amount;
}
/** * @return the taxAmount */
public BigDecimal getTaxAmount() {
return taxAmount;
}
/** * @param taxAmount * the taxAmount to set */
public void setTaxAmount(BigDecimal taxAmount) {
this.taxAmount = taxAmount;
}
/** * @return the totalAmountString */
public String getTotalAmountString() {
return totalAmountString;
}
/** * @param totalAmountString * the totalAmountString to set */
public void setTotalAmountString(String totalAmountString) {
this.totalAmountString = totalAmountString;
}
/** * @return the totalAmount */
public BigDecimal getTotalAmount() {
return totalAmount;
}
/** * @param totalAmount * the totalAmount to set */
public void setTotalAmount(BigDecimal totalAmount) {
this.totalAmount = totalAmount;
}
/** * @return the sellerName */
public String getSellerName() {
return sellerName;
}
/** * @param sellerName * the sellerName to set */
public void setSellerName(String sellerName) {
this.sellerName = sellerName;
}
/** * @return the sellerCode */
public String getSellerCode() {
return sellerCode;
}
/** * @param sellerCode * the sellerCode to set */
public void setSellerCode(String sellerCode) {
this.sellerCode = sellerCode;
}
/** * @return the sellerAddress */
public String getSellerAddress() {
return sellerAddress;
}
/** * @param sellerAddress * the sellerAddress to set */
public void setSellerAddress(String sellerAddress) {
this.sellerAddress = sellerAddress;
}
/** * @return the sellerAccount */
public String getSellerAccount() {
return sellerAccount;
}
/** * @param sellerAccount * the sellerAccount to set */
public void setSellerAccount(String sellerAccount) {
this.sellerAccount = sellerAccount;
}
/** * @return the payee */
public String getPayee() {
return payee;
}
/** * @param payee * the payee to set */
public void setPayee(String payee) {
this.payee = payee;
}
/** * @return the reviewer */
public String getReviewer() {
return reviewer;
}
/** * @param reviewer * the reviewer to set */
public void setReviewer(String reviewer) {
this.reviewer = reviewer;
}
/** * @return the drawer */
public String getDrawer() {
return drawer;
}
/** * @param drawer * the drawer to set */
public void setDrawer(String drawer) {
this.drawer = drawer;
}
/** * @return the type */
public String getType() {
return type;
}
/** * @param type * the type to set */
public void setType(String type) {
this.type = type;
}
/** * @return the detailList */
public List<Detail> getDetailList() {
return detailList;
}
/** * @param detailList * the detailList to set */
public void setDetailList(List<Detail> detailList) {
this.detailList = detailList;
}
@Override
public String toString() {
return "Invoice [title=" + title + ", machineNumber=" + machineNumber + ", code=" + code + ", number=" + number
+ ", date=" + date + ", checksum=" + checksum + ", buyerName=" + buyerName + ", buyerCode=" + buyerCode
+ ", buyerAddress=" + buyerAddress + ", buyerAccount=" + buyerAccount + ", password=" + password + ", amount="
+ amount + ", taxAmount=" + taxAmount + ", totalAmountString=" + totalAmountString + ", totalAmount="
+ totalAmount + ", sellerName=" + sellerName + ", sellerCode=" + sellerCode + ", sellerAddress=" + sellerAddress
+ ", sellerAccount=" + sellerAccount + ", payee=" + payee + ", reviewer=" + reviewer + ", drawer=" + drawer
+ ", type=" + type + ", detailList=" + detailList + "]";
}
}
class Detail {
private String name;
private String model;
private String unit;
private BigDecimal count;
private BigDecimal price;
private BigDecimal amount;
private BigDecimal taxRate;
private BigDecimal taxAmount;
/** * @return the name */
public String getName() {
return name;
}
/** * @param name * the name to set */
public void setName(String name) {
this.name = name;
}
/** * @return the model */
public String getModel() {
return model;
}
/** * @param model * the model to set */
public void setModel(String model) {
this.model = model;
}
/** * @return the unit */
public String getUnit() {
return unit;
}
/** * @param unit * the unit to set */
public void setUnit(String unit) {
this.unit = unit;
}
/** * @return the count */
public BigDecimal getCount() {
return count;
}
/** * @param count * the count to set */
public void setCount(BigDecimal count) {
this.count = count;
}
/** * @return the price */
public BigDecimal getPrice() {
return price;
}
/** * @param price * the price to set */
public void setPrice(BigDecimal price) {
this.price = price;
}
/** * @return the amount */
public BigDecimal getAmount() {
return amount;
}
/** * @param amount * the amount to set */
public void setAmount(BigDecimal amount) {
this.amount = amount;
}
/** * @return the taxRate */
public BigDecimal getTaxRate() {
return taxRate;
}
/** * @param taxRate * the taxRate to set */
public void setTaxRate(BigDecimal taxRate) {
this.taxRate = taxRate;
}
/** * @return the taxAmount */
public BigDecimal getTaxAmount() {
return taxAmount;
}
/** * @param taxAmount * the taxAmount to set */
public void setTaxAmount(BigDecimal taxAmount) {
this.taxAmount = taxAmount;
}
@Override
public String toString() {
return "Detail [name=" + name + ", model=" + model + ", unit=" + unit + ", count=" + count + ", price=" + price
+ ", amount=" + amount + ", taxRate=" + taxRate + ", taxAmount=" + taxAmount + "]";
}
}
package com.unionman.fmis.qrcode.pdf;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import java.awt.*;
import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** * 专用于处理电子发票识别的类 * * @author arthurlee */
public class PdfInvoiceExtractor {
public static Invoice extract(File file) throws IOException {
Invoice invoice = new Invoice();
PDDocument doc = Loader.loadPDF(file);
PDPage firstPage = doc.getPage(0);
int pageWidth = Math.round(firstPage.getCropBox().getWidth());
PDFTextStripper textStripper = new PDFTextStripper();
textStripper.setSortByPosition(true);
String fullText = textStripper.getText(doc);
if (firstPage.getRotation() != 0) {
pageWidth = Math.round(firstPage.getCropBox().getHeight());
}
String allText = replace(fullText).replaceAll("(", "(").replaceAll(")", ")").replaceAll("¥", "¥");
{
String reg = "机器编号:(?<machineNumber>\\d{12})|发票代码:(?<code>\\d{12})|发票号码:(?<number>\\d{19})|:(?<date>\\d{4}年\\d{2}月\\d{2}日)"
+ "|验证码:(?<checksum>\\d{20}|\\S{4,})";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
while (matcher.find()) {
if (matcher.group("machineNumber") != null) {
invoice.setMachineNumber(matcher.group("machineNumber"));
} else if (matcher.group("code") != null) {
invoice.setCode(matcher.group("code"));
} else if (matcher.group("number") != null) {
invoice.setNumber(matcher.group("number"));
} else if (matcher.group("date") != null) {
invoice.setDate(matcher.group("date"));
} else if (matcher.group("checksum") != null) {
invoice.setChecksum(matcher.group("checksum"));
}
}
}
{
String reg = "合计¥?(?<amount>[^ \\f\\n\\r\\t\\v\\*]*)(?:¥?(?<taxAmount>\\S*)|\\*+)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
try {
invoice.setAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
}
try {
invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
} catch (Exception e) {
invoice.setTaxAmount(new BigDecimal(0));
}
}
}
if (null == invoice.getAmount()) {
String reg = "合\\u0020*计\\u0020*¥?(?<amount>[^ ]*)\\u0020+¥?(?:(?<taxAmount>\\S*)|\\*+)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(fullText);
if (matcher.find()) {
try {
invoice.setAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
invoice.setAmount(new BigDecimal(0));
}
try {
invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
} catch (Exception e) {
invoice.setTaxAmount(new BigDecimal(0));
}
}
}
{
String reg = "价税合计\\u0028大写\\u0029(?<amountString>\\S*)\\u0028小写\\u0029¥?(?<amount>\\S*)\\s";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
invoice.setTotalAmountString(matcher.group("amountString"));
try {
invoice.setTotalAmount(new BigDecimal(matcher.group("amount")));
} catch (Exception e) {
invoice.setTotalAmount(new BigDecimal(0));
}
}
}
{
String reg = "收款人:(?<payee>\\S*)复核:(?<reviewer>\\S*)开票人:(?<drawer>\\S*)销售方";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(allText);
if (matcher.find()) {
invoice.setPayee(matcher.group("payee"));
invoice.setReviewer(matcher.group("reviewer"));
invoice.setDrawer(matcher.group("drawer"));
}
Pattern type00Pattern = Pattern.compile("(?<p>\\S*)发票");
Matcher m00 = type00Pattern.matcher(allText);
if (m00.find()) {
invoice.setTitle(m00.group("p").replaceAll("(?:国|统|一|发|票|监|制)", "") + "发票");
if (null == invoice.getType()) {
invoice.setType("普通发票");
}
} else {
Pattern type01Pattern = Pattern.compile("(?<p>\\S*)发票");
Matcher m01 = type01Pattern.matcher(allText);
if (m01.find()) {
invoice.setTitle(m01.group("p").replaceAll("(?:国|统|一|发|票|监|制)", "") + "发票");
if (null == invoice.getType()) {
invoice.setType("专用发票");
}
}
}
}
PDFKeyWordPosition kwp = new PDFKeyWordPosition();
Map<String, List<Position>> positionListMap = kwp
.getCoordinate(Arrays.asList("机器编号", "税率", "价税合计", "合计", "开票日期", "规格型号", "验证码", "开户行及账号"), doc);
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripperByArea detailStripper = new PDFTextStripperByArea();
detailStripper.setSortByPosition(true);
{
Position machineNumber;
if (positionListMap.get("机器编号").size() > 0) {
machineNumber = positionListMap.get("机器编号").get(0);
} else {
machineNumber = positionListMap.get("开票日期").get(0);
machineNumber.setY(machineNumber.getY() + 30);
}
Position taxRate = positionListMap.get("税率").get(0);
Position totalAmount = positionListMap.get("价税合计").get(0);
Position amount = positionListMap.get("合计").get(0);
Position model = null;
if (!positionListMap.get("规格型号").isEmpty()) {
model = positionListMap.get("规格型号").get(0);
} else {
model = positionListMap.get("验证码").get(0);
model.setX(model.getX() - 15);
}
List<Position> account = positionListMap.get("开户行及账号");
Position buyer;
Position seller;
if (account.size() < 2) {
buyer = new Position(51, 122);
seller = new Position(51, 341);
} else {
buyer = account.get(0);
seller = account.get(1);
}
int maqX = 370;
Position checkPassword = null;
List<Position> mi = positionListMap.get("验证码");
if (!mi.isEmpty()) {
checkPassword = mi.get(0);
}
{
int x = Math.round(model.getX()) - 13;
int y = Math.round(taxRate.getY()) + 5; // 用税率的y坐标作参考
int h = Math.round(amount.getY()) - Math.round(taxRate.getY()) - 25; // 价税合计的y坐标减去税率的y坐标
detailStripper.addRegion("detail", new Rectangle(0, y, pageWidth, h));
stripper.addRegion("detailName", new Rectangle(0, y, x, h));
stripper.addRegion("detailPrice", new Rectangle(x, y, pageWidth, h));
}
{
int x = maqX + 10;
int y = Math.round(machineNumber.getY()) + 10;
int w = pageWidth - maqX - 10;
int h = Math.round(taxRate.getY() - 5) - y;
stripper.addRegion("password", new Rectangle(x, y, w, h));
}
{
int x = Math.round(buyer.getX()) - 15; // 开户行及账号的x为参考
int y = Math.round(machineNumber.getY()) + 10; // 机器编号的y坐标为参考
int w = maqX - x - 5; // 密码区x坐标为参考
int h = Math.round(buyer.getY()) - y + 20; // 开户行及账号的y坐标为参考
stripper.addRegion("buyer", new Rectangle(x, y, w, h));
}
{
int x = Math.round(seller.getX()) - 15; // 开户行及账号为x参考
int y = Math.round(totalAmount.getY()) + 10; // 价税合计的y坐标为参考
int w = maqX - x - 5; // 密码区的x为参考
int h = Math.round(seller.getY()) - y + 20; // 开户行及账号的y为参考
stripper.addRegion("seller", new Rectangle(x, y, w, h));
}
}
stripper.extractRegions(firstPage);
detailStripper.extractRegions(firstPage);
doc.close();
invoice.setPassword(StringUtils.trim(stripper.getTextForRegion("password")));
String reg = "名(.*)称:(?<name>\\S*)|纳税人识别号:(?<code>\\S*)|地址、电话:(?<address>\\S*)|开户行及账号:(?<account>\\S*)|电子支付标识:(?<account2>\\S*)";
{
String buyer = replace(stripper.getTextForRegion("buyer"));
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(buyer);
while (matcher.find()) {
if (matcher.group("name") != null) {
invoice.setBuyerName(matcher.group("name"));
} else if (matcher.group("code") != null) {
invoice.setBuyerCode(matcher.group("code"));
} else if (matcher.group("address") != null) {
invoice.setBuyerAddress(matcher.group("address"));
} else if (matcher.group("account") != null) {
invoice.setBuyerAccount(matcher.group("account"));
} else if (matcher.group("account2") != null) {
invoice.setBuyerAccount(matcher.group("account2"));
}
}
}
{
String seller = replace(stripper.getTextForRegion("seller"));
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(seller);
while (matcher.find()) {
if (matcher.group("name") != null) {
invoice.setSellerName(matcher.group("name"));
} else if (matcher.group("code") != null) {
invoice.setSellerCode(matcher.group("code"));
} else if (matcher.group("address") != null) {
invoice.setSellerAddress(matcher.group("address"));
} else if (matcher.group("account") != null) {
invoice.setSellerAccount(matcher.group("account"));
}
}
}
{
List<String> skipList = new ArrayList<>();
List<Detail> detailList = new ArrayList<>();
String[] detailPriceStringArray = stripper.getTextForRegion("detailPrice").replaceAll(" ", " ").replaceAll(" ", " ")
.replaceAll("\r", "").split("\\n");
for (String detailString : detailPriceStringArray) {
Detail detail = new Detail();
detail.setName("");
String[] itemArray = StringUtils.split(detailString, " ");
if (2 == itemArray.length) {
detail.setAmount(new BigDecimal(itemArray[0]));
detail.setTaxAmount(new BigDecimal(itemArray[1]));
detailList.add(detail);
} else if (2 < itemArray.length) {
/*detail.setAmount(new BigDecimal(itemArray[itemArray.length - 3]));*/
String taxRate = itemArray[itemArray.length - 2];
if (taxRate.indexOf("免税") > 0 || taxRate.indexOf("不征税") > 0 || taxRate.indexOf("出口零税率") > 0
|| taxRate.indexOf("普通零税率") > 0 || !taxRate.contains("%")) {
detail.setTaxRate(new BigDecimal(0));
detail.setTaxAmount(new BigDecimal(0));
} else {
BigDecimal rate = new BigDecimal(Integer.parseInt(taxRate.replaceAll("%", "")));
detail.setTaxRate(rate.divide(new BigDecimal(100)));
detail.setTaxAmount(new BigDecimal(itemArray[itemArray.length - 1]));
}
for (int j = 0; j < itemArray.length - 3; j++) {
if (itemArray[j].matches("^(-?\\d+)(\\.\\d+)?$")) {
if (null == detail.getCount()) {
detail.setCount(new BigDecimal(itemArray[j]));
} else {
detail.setPrice(new BigDecimal(itemArray[j]));
}
} else {
if (itemArray.length >= j + 1 && !itemArray[j + 1].matches("^(-?\\d+)(\\.\\d+)?$")) {
detail.setUnit(itemArray[j + 1]);
detail.setModel(itemArray[j]);
j++;
} else if (itemArray[j].length() > 2) {
detail.setModel(itemArray[j]);
} else {
detail.setUnit(itemArray[j]);
}
}
}
detailList.add(detail);
} else {
skipList.add(detailString);
}
}
String[] detailNameStringArray = stripper.getTextForRegion("detailName").replaceAll(" ", " ").replaceAll(" ", " ")
.replaceAll("\r", "").split("\\n");
String[] detailStringArray = replace(detailStripper.getTextForRegion("detail")).replaceAll("\r", "").split("\\n");
int i = 0, j = 0, h = 0, m = 0;
Detail lastDetail = null;
for (String detailString : detailStringArray) {
if (m < detailNameStringArray.length) {
if (detailString.matches("\\S+\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")
&& !detailString.matches("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")
&& detailString.matches("\\S+\\d+%[\\-\\d]+\\S*")
|| detailStringArray.length > i + 1
&& detailStringArray[i + 1].matches("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*")) {
if (j < detailList.size()) {
lastDetail = detailList.get(j);
lastDetail.setName(detailNameStringArray[m]);
}
j++;
} else if (null != lastDetail && StringUtils.isNotBlank(detailNameStringArray[m])) {
if (skipList.size() > h) {
String skip = skipList.get(h);
if (detailString.endsWith(skip)) {
if (detailString.equals(skip)) {
m--;
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
lastDetail.setModel(lastDetail.getModel() + skip);
h++;
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
} else {
lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
}
}
}
i++;
m++;
}
invoice.setDetailList(detailList);
}
return invoice;
}
public static String replace(String str) {
return str.replaceAll(" ", "").replaceAll(" ", "").replaceAll(":", ":").replaceAll(" ", "");
}
public static void main(String[] args) throws IOException {
Invoice extract = extract(new File("G:\\企业微信\\WXWork\\File\\2023-10\\未能识别查重发票类型\\浙江通用电子发票4.pdf"));
System.out.println(extract);
}
}
c
文章评论