"更新项目规则定义和处理PDF数据提取"

本次提交主要包含以下几个方面的更新:

1. 更新了`PdfData`类的结构,修改了数据字段,以更好地反映PDF中提取的数据内容。

2. 创建了`ReadJiashou`类,用于处理浙江数据,实现了对数据库的操作,包括读取和批量插入。

3. 修改了`ReadQinhai`类中的数据处理逻辑,优化了PDF文件的读取和数据提取流程。

4. 新增了`RuleDetail`类,用于定义和存储规则详细信息,为项目的规则引擎提供数据支持。

本次提交的代码改动涉及PDF数据提取、数据处理和规则定义等多个方面,旨在优化和扩展项目的功能。
master
linbin 2024-08-08 22:38:09 +08:00
parent 7492d787ad
commit 3092fd6c5c
4 changed files with 272 additions and 0 deletions

View File

@ -15,6 +15,12 @@
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP-java6</artifactId>

View File

@ -0,0 +1,37 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
import lombok.experimental.Accessors;
@Data
@Accessors(chain = true)
@TableDef("qinhai_haocai")
public class PdfData2
{
@Pk
@AutoIncrement
private Integer id;
private String code;
private String containName;
private String extName;
@Override
public boolean equals(Object data)
{
if (data instanceof PdfData2)
{
PdfData2 that = (PdfData2) data;
return this.code.equals(that.code);
}
return false;
}
@Override
public int hashCode()
{
return code.hashCode();
}
}

View File

@ -0,0 +1,123 @@
package org.example;
import cn.hutool.http.HttpUtil;
import com.jfirer.dson.Dson;
import com.jfirer.dson.util.JsonRename;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import com.jfirer.jsql.session.SqlSession;
import com.zaxxer.hikari.HikariDataSource;
import lombok.Data;
import lombok.experimental.Accessors;
import org.example.zhejiang.HiTrtServ;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReadYibaoHaocai
{
@Data
public static class Res
{
private List<RowData> rows;
}
@TableDef("material")
@Data
@Accessors(chain = true)
public static class RowData
{
@AutoIncrement
@Pk
private Integer id;
/**
*
*/
private String specificationCode;
/**
*
*/
@JsonRename("catalogcode")
private String catalogCode;
/**
*
*/
@JsonRename("catalogname1")
private String catalogName1;
/**
*
*/
@JsonRename("catalogname2")
private String catalogName2;
/**
*
*/
@JsonRename("catalogname3")
private String catalogName3;
/**
*
*/
@JsonRename("commonnamecode")
private String commonNameCode;
/**
*
*/
@JsonRename("commonname")
private String commonName;
/**
*
*/
@JsonRename("matrialcode")
private String matrialCode;
/**
*
*/
private String matrial;
/**
*
*/
@JsonRename("characteristiccode")
private String characteristicCode;
/**
*
*/
private String characteristic;
}
public static void main(String[] args)
{
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
for (int i = 1; i <= 6; i++)
{
Map<String, String> headers = new HashMap<>();
headers.put("Cookies", "__jsluid_s=d8d3898d0ef2f29970f4abc514490e2a; https_waf_cookie=e38f98fc-40fb-4b91c23ba39a30b9b2b880d3ad675d4e9ca4; queryCondition=9be8ff2ed273bc199e6707a6822f559f%3D%7B%22specificationCode%22%3A%22%22%2C%22commonname%22%3A%22%22%2C%22companyName%22%3A%22%22%2C%22catalogname1%22%3A%22%22%2C%22catalogname2%22%3A%22%22%2C%22catalogname3%22%3A%22%22%2C%22regcardNm%22%3A%22%22%2C%22productName%22%3A%22%22%2C%22releaseVersion%22%3A%22%22%7D; JSESSIONID=0C3F947A8FB959691AE837EE075AF4E1; pageSelect=ed65ffb5c1fd849ab9c2a36665b9b001%3D3");
headers.put("Origin", "https://code.nhsa.gov.cn");
headers.put("Referer", "https://code.nhsa.gov.cn/hc/stdSpecification/toStdSpecificationCompanyReportList.html");
headers.put("Accept-Language", "zh-CN,zh;q=0.9");
Map<String, Object> form = new HashMap<>();
form.put("_search", "false");
form.put("rows", "1000");
form.put("page", String.valueOf(i));
form.put("sidx", null);
form.put("sord", "asc");
form.put("nd", "1723081276723");
String body = HttpUtil.createPost("https://code.nhsa.gov.cn/hc/stdSpecification/getStdSpecificationListDataCompanyReport.html").form(form).addHeaders(headers).execute().body();
Res o = Dson.fromString(Res.class, body);
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(o.getRows(), 1000);
}
}
}
}

View File

@ -0,0 +1,106 @@
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import org.example.PdfData2;
import org.example.zhejiang.HiTrtServ;
import org.junit.Ignore;
import org.junit.Test;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
public class ReadQinhai2
{
@Test
@Ignore
public void readPdf() throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int start = 1;
int end = 263;
CountDownLatch countDownLatch = new CountDownLatch(end - start + 1);
AtomicReferenceArray<List<PdfData2>> array = new AtomicReferenceArray<>(end + 1);
Queue<PdfData2> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = start; i <= end; i++)
{
int finalI = i;
executorService.submit(() -> {
try
{
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/2023年医保智能审核诊疗项目内涵耗材新增规则-/2023年医保智能审核诊疗项目内涵耗材新增规则- " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<PdfData2> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
PdfData2 pdfData = new PdfData2();
String text = table.getText(row, 1).replace("\n", "");
if (text.equals("项目代码"))
{
continue;
}
pdfData.setCode(text.substring(0, text.indexOf("-")));
text = table.getText(row, 3).replace("\n", "");
pdfData.setContainName(text);
text = table.getText(row, 4).replace("\n", "");
pdfData.setExtName(text);
list.add(pdfData);
}
}
array.set(finalI, list);
}
}
catch (Throwable e)
{
e.printStackTrace();
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = start; i <= end; i++)
{
List<PdfData2> datas = array.get(i);
queue.addAll(datas);
}
List<PdfData2> list = queue.stream().distinct().toList();
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(list, 1000);
}
}
}