"更新项目规则定义和处理PDF数据提取"
本次提交主要包含以下几个方面的更新: 1. 更新了`PdfData`类的结构,修改了数据字段,以更好地反映PDF中提取的数据内容。 2. 创建了`ReadJiashou`类,用于处理浙江数据,实现了对数据库的操作,包括读取和批量插入。 3. 修改了`ReadQinhai`类中的数据处理逻辑,优化了PDF文件的读取和数据提取流程。 4. 新增了`RuleDetail`类,用于定义和存储规则详细信息,为项目的规则引擎提供数据支持。 本次提交的代码改动涉及PDF数据提取、数据处理和规则定义等多个方面,旨在优化和扩展项目的功能。master
parent
7492d787ad
commit
3092fd6c5c
6
pom.xml
6
pom.xml
|
@ -15,6 +15,12 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.zaxxer</groupId>
|
||||
<artifactId>HikariCP-java6</artifactId>
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
package org.example;
|
||||
|
||||
import com.jfirer.jsql.annotation.AutoIncrement;
|
||||
import com.jfirer.jsql.annotation.Pk;
|
||||
import com.jfirer.jsql.annotation.TableDef;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
|
||||
@Data
|
||||
@Accessors(chain = true)
|
||||
@TableDef("qinhai_haocai")
|
||||
public class PdfData2
|
||||
{
|
||||
@Pk
|
||||
@AutoIncrement
|
||||
private Integer id;
|
||||
private String code;
|
||||
private String containName;
|
||||
private String extName;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object data)
|
||||
{
|
||||
if (data instanceof PdfData2)
|
||||
{
|
||||
PdfData2 that = (PdfData2) data;
|
||||
return this.code.equals(that.code);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return code.hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
package org.example;
|
||||
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import com.jfirer.dson.Dson;
|
||||
import com.jfirer.dson.util.JsonRename;
|
||||
import com.jfirer.jsql.SessionFactory;
|
||||
import com.jfirer.jsql.SessionfactoryConfig;
|
||||
import com.jfirer.jsql.annotation.AutoIncrement;
|
||||
import com.jfirer.jsql.annotation.Pk;
|
||||
import com.jfirer.jsql.annotation.TableDef;
|
||||
import com.jfirer.jsql.session.SqlSession;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
import org.example.zhejiang.HiTrtServ;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ReadYibaoHaocai
|
||||
{
|
||||
@Data
|
||||
public static class Res
|
||||
{
|
||||
private List<RowData> rows;
|
||||
}
|
||||
|
||||
@TableDef("material")
|
||||
@Data
|
||||
@Accessors(chain = true)
|
||||
public static class RowData
|
||||
{
|
||||
@AutoIncrement
|
||||
@Pk
|
||||
private Integer id;
|
||||
/**
|
||||
* 医用耗材通用代码
|
||||
*/
|
||||
private String specificationCode;
|
||||
/**
|
||||
* 三级分类代码
|
||||
*/
|
||||
@JsonRename("catalogcode")
|
||||
private String catalogCode;
|
||||
/**
|
||||
* 一级
|
||||
*/
|
||||
@JsonRename("catalogname1")
|
||||
private String catalogName1;
|
||||
/**
|
||||
* 二级
|
||||
*/
|
||||
@JsonRename("catalogname2")
|
||||
private String catalogName2;
|
||||
/**
|
||||
* 三级
|
||||
*/
|
||||
@JsonRename("catalogname3")
|
||||
private String catalogName3;
|
||||
/**
|
||||
* 医保通用名称代码
|
||||
*/
|
||||
@JsonRename("commonnamecode")
|
||||
private String commonNameCode;
|
||||
/**
|
||||
* 医保通用名称
|
||||
*/
|
||||
@JsonRename("commonname")
|
||||
private String commonName;
|
||||
/**
|
||||
* 材质代码
|
||||
*/
|
||||
@JsonRename("matrialcode")
|
||||
private String matrialCode;
|
||||
/**
|
||||
* 耗材材质
|
||||
*/
|
||||
private String matrial;
|
||||
/**
|
||||
* 规格代码
|
||||
*/
|
||||
@JsonRename("characteristiccode")
|
||||
private String characteristicCode;
|
||||
/**
|
||||
* 规格
|
||||
*/
|
||||
private String characteristic;
|
||||
}
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
SessionfactoryConfig config = new SessionfactoryConfig();
|
||||
HikariDataSource dataSource = new HikariDataSource();
|
||||
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
|
||||
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
|
||||
dataSource.setUsername("root");
|
||||
dataSource.setPassword("root");
|
||||
config.setDataSource(dataSource);
|
||||
SessionFactory sessionFactory = config.build();
|
||||
for (int i = 1; i <= 6; i++)
|
||||
{
|
||||
Map<String, String> headers = new HashMap<>();
|
||||
headers.put("Cookies", "__jsluid_s=d8d3898d0ef2f29970f4abc514490e2a; https_waf_cookie=e38f98fc-40fb-4b91c23ba39a30b9b2b880d3ad675d4e9ca4; queryCondition=9be8ff2ed273bc199e6707a6822f559f%3D%7B%22specificationCode%22%3A%22%22%2C%22commonname%22%3A%22%22%2C%22companyName%22%3A%22%22%2C%22catalogname1%22%3A%22%22%2C%22catalogname2%22%3A%22%22%2C%22catalogname3%22%3A%22%22%2C%22regcardNm%22%3A%22%22%2C%22productName%22%3A%22%22%2C%22releaseVersion%22%3A%22%22%7D; JSESSIONID=0C3F947A8FB959691AE837EE075AF4E1; pageSelect=ed65ffb5c1fd849ab9c2a36665b9b001%3D3");
|
||||
headers.put("Origin", "https://code.nhsa.gov.cn");
|
||||
headers.put("Referer", "https://code.nhsa.gov.cn/hc/stdSpecification/toStdSpecificationCompanyReportList.html");
|
||||
headers.put("Accept-Language", "zh-CN,zh;q=0.9");
|
||||
Map<String, Object> form = new HashMap<>();
|
||||
form.put("_search", "false");
|
||||
form.put("rows", "1000");
|
||||
form.put("page", String.valueOf(i));
|
||||
form.put("sidx", null);
|
||||
form.put("sord", "asc");
|
||||
form.put("nd", "1723081276723");
|
||||
String body = HttpUtil.createPost("https://code.nhsa.gov.cn/hc/stdSpecification/getStdSpecificationListDataCompanyReport.html").form(form).addHeaders(headers).execute().body();
|
||||
Res o = Dson.fromString(Res.class, body);
|
||||
try (SqlSession session = sessionFactory.openSession())
|
||||
{
|
||||
session.batchInsert(o.getRows(), 1000);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
import com.jfirer.jsql.SessionFactory;
|
||||
import com.jfirer.jsql.SessionfactoryConfig;
|
||||
import com.jfirer.jsql.session.SqlSession;
|
||||
import com.spire.pdf.PdfDocument;
|
||||
import com.spire.pdf.utilities.PdfTable;
|
||||
import com.spire.pdf.utilities.PdfTableExtractor;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.example.PdfData2;
|
||||
import org.example.zhejiang.HiTrtServ;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.LinkedTransferQueue;
|
||||
import java.util.concurrent.atomic.AtomicReferenceArray;
|
||||
|
||||
public class ReadQinhai2
|
||||
{
|
||||
@Test
|
||||
@Ignore
|
||||
public void readPdf() throws InterruptedException
|
||||
{
|
||||
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
|
||||
int start = 1;
|
||||
int end = 263;
|
||||
CountDownLatch countDownLatch = new CountDownLatch(end - start + 1);
|
||||
AtomicReferenceArray<List<PdfData2>> array = new AtomicReferenceArray<>(end + 1);
|
||||
Queue<PdfData2> queue = new LinkedTransferQueue<>();
|
||||
HiTrtServ lastCodedRecord;
|
||||
for (int i = start; i <= end; i++)
|
||||
{
|
||||
int finalI = i;
|
||||
executorService.submit(() -> {
|
||||
try
|
||||
{
|
||||
PdfDocument pdf = new PdfDocument();
|
||||
//加载PDF文档
|
||||
pdf.loadFromFile("/Users/linbin/Downloads/2023年医保智能审核诊疗项目内涵耗材新增规则-/2023年医保智能审核诊疗项目内涵耗材新增规则- " + finalI + ".pdf");
|
||||
//创建StringBuilder类的实例
|
||||
//创建PdfTableExtractor类的对象
|
||||
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
|
||||
//遍历每一页
|
||||
for (int page = 0; page < pdf.getPages().getCount(); page++)
|
||||
{
|
||||
//提取页面中的表格存入PdfTable[]数组
|
||||
PdfTable[] tableLists = extractor.extractTable(page);
|
||||
List<PdfData2> list = new LinkedList<>();
|
||||
if (tableLists != null && tableLists.length > 0)
|
||||
{
|
||||
PdfTable table = tableLists[0];
|
||||
//获取表格中的行数和列数
|
||||
int rowCount = table.getRowCount();
|
||||
//遍历表格中的每一个单元格
|
||||
for (int row = 1; row < rowCount; row++)
|
||||
{
|
||||
PdfData2 pdfData = new PdfData2();
|
||||
String text = table.getText(row, 1).replace("\n", "");
|
||||
if (text.equals("项目代码"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
pdfData.setCode(text.substring(0, text.indexOf("-")));
|
||||
text = table.getText(row, 3).replace("\n", "");
|
||||
pdfData.setContainName(text);
|
||||
text = table.getText(row, 4).replace("\n", "");
|
||||
pdfData.setExtName(text);
|
||||
list.add(pdfData);
|
||||
}
|
||||
}
|
||||
array.set(finalI, list);
|
||||
}
|
||||
}
|
||||
catch (Throwable e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
countDownLatch.countDown();
|
||||
});
|
||||
}
|
||||
countDownLatch.await();
|
||||
for (int i = start; i <= end; i++)
|
||||
{
|
||||
List<PdfData2> datas = array.get(i);
|
||||
queue.addAll(datas);
|
||||
}
|
||||
List<PdfData2> list = queue.stream().distinct().toList();
|
||||
SessionfactoryConfig config = new SessionfactoryConfig();
|
||||
HikariDataSource dataSource = new HikariDataSource();
|
||||
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
|
||||
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
|
||||
dataSource.setUsername("root");
|
||||
dataSource.setPassword("root");
|
||||
config.setDataSource(dataSource);
|
||||
SessionFactory sessionFactory = config.build();
|
||||
try (SqlSession session = sessionFactory.openSession())
|
||||
{
|
||||
session.batchInsert(list, 1000);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue