Compare commits

...

2 Commits
main ... master

Author SHA1 Message Date
linbin 3092fd6c5c "更新项目规则定义和处理PDF数据提取"
本次提交主要包含以下几个方面的更新:

1. 更新了`PdfData`类的结构,修改了数据字段,以更好地反映PDF中提取的数据内容。

2. 创建了`ReadJiashou`类,用于处理浙江数据,实现了对数据库的操作,包括读取和批量插入。

3. 修改了`ReadQinhai`类中的数据处理逻辑,优化了PDF文件的读取和数据提取流程。

4. 新增了`RuleDetail`类,用于定义和存储规则详细信息,为项目的规则引擎提供数据支持。

本次提交的代码改动涉及PDF数据提取、数据处理和规则定义等多个方面,旨在优化和扩展项目的功能。
2024-08-08 22:38:09 +08:00
linbin 7492d787ad "更新项目规则定义和处理PDF数据提取"
本次提交主要包含以下几个方面的更新:

1. 更新了`PdfData`类的结构,修改了数据字段,以更好地反映PDF中提取的数据内容。

2. 创建了`ReadJiashou`类,用于处理浙江数据,实现了对数据库的操作,包括读取和批量插入。

3. 修改了`ReadQinhai`类中的数据处理逻辑,优化了PDF文件的读取和数据提取流程。

4. 新增了`RuleDetail`类,用于定义和存储规则详细信息,为项目的规则引擎提供数据支持。

本次提交的代码改动涉及PDF数据提取、数据处理和规则定义等多个方面,旨在优化和扩展项目的功能。
2024-08-07 21:35:10 +08:00
9 changed files with 483 additions and 65 deletions

View File

@ -15,6 +15,12 @@
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP-java6</artifactId>

View File

@ -0,0 +1,15 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import lombok.Data;
import lombok.experimental.Accessors;
@Data
@Accessors(chain = true)
public class PdfData
{
private String code;
private String code1;
private String code2;
}

View File

@ -0,0 +1,37 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
import lombok.experimental.Accessors;
@Data
@Accessors(chain = true)
@TableDef("qinhai_haocai")
public class PdfData2
{
@Pk
@AutoIncrement
private Integer id;
private String code;
private String containName;
private String extName;
@Override
public boolean equals(Object data)
{
if (data instanceof PdfData2)
{
PdfData2 that = (PdfData2) data;
return this.code.equals(that.code);
}
return false;
}
@Override
public int hashCode()
{
return code.hashCode();
}
}

View File

@ -0,0 +1,102 @@
package org.example;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.model.Model;
import com.jfirer.jsql.session.SqlSession;
import com.mysql.cj.util.DnsSrv;
import com.zaxxer.hikari.HikariDataSource;
import org.example.zhejiang.HiTrtServ;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
public class ReadJiashou
{
public static void main(String[] args)
{
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
List<HiTrtServ> list;
try (SqlSession session = sessionFactory.openSession())
{
list = session.findList(Model.selectAll(HiTrtServ.class));
}
Map<String, HiTrtServ> collect = list.stream().collect(Collectors.toMap(HiTrtServ::getHilistCode, Function.identity()));
boolean hasError = false;
for (Map.Entry<String, HiTrtServ> each : collect.entrySet())
{
if (each.getValue().getHilistName().contains("加收") && each.getValue().getHilistName().contains("加收费") == false)
{
String name = each.getValue().getHilistName();
int count_1 = 0;
while (name.indexOf("") != -1)
{
count_1++;
name = name.substring(name.indexOf("") + 1);
}
name = each.getValue().getHilistName();
int count_2 = 0;
while (name.indexOf("") != -1)
{
count_2++;
name = name.substring(name.indexOf("") + 1);
}
if (count_1 == 0 || count_2 == 0)
{
System.err.println("异常" + each.getValue().getHilistName());
hasError = true;
throw new IllegalStateException();
}
if (count_1 != count_2)
{
System.err.println("异常" + each.getValue().getHilistName());
hasError = true;
}
}
}
if (hasError)
{
throw new IllegalStateException();
}
List<RuleDetail> ruleDetails = new LinkedList<>();
for (Map.Entry<String, HiTrtServ> each : collect.entrySet())
{
if (each.getValue().getHilistName().contains("加收") && each.getValue().getHilistName().contains("加收费") == false)
{
String code = each.getKey();
String parentCode = code.substring(0, code.length() - 1) + "0";
if (collect.containsKey(parentCode))
{
HiTrtServ parent = collect.get(parentCode);
RuleDetail ruleDetail = new RuleDetail();
ruleDetail.setRuleId(33);
ruleDetail.setDefinition("医疗服务项目无主项[" + parent.getHilistName() + "]收取加收项目[" + each.getValue().getHilistName() + "]");
ruleDetail.setPre("orderInfos.hasHiTreatment('" + each.getKey() + "') && orderInfos.hasHiTreatment('" + parent.getHilistCode() + "')==false");
ruleDetail.setEl("1");
ruleDetail.setTip("orderInfos.listHiTreatment('" + each.getKey() + "')");
ruleDetail.setHiListType(2);
ruleDetails.add(ruleDetail);
}
}
}
if (hasError)
{
throw new IllegalStateException();
}
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(ruleDetails, 1000);
}
}
}

View File

@ -2,6 +2,8 @@ package org.example;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.model.Model;
import com.jfirer.jsql.model.Param;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
@ -9,76 +11,87 @@ import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import org.example.zhejiang.HiTrtServ;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.stream.Collectors;
public class ReadQinhai
{
public static void main(String[] args) throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int start = 3;
int num = 41;
CountDownLatch countDownLatch = new CountDownLatch(num - start + 1);
AtomicReferenceArray<List<Repeat>> array = new AtomicReferenceArray<>(num + 1);
Queue<Repeat> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = start; i <= num; i++)
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int start = 96;
int end = 97;
CountDownLatch countDownLatch = new CountDownLatch(end - start + 1);
AtomicReferenceArray<List<PdfData>> array = new AtomicReferenceArray<>(end + 1);
Queue<PdfData> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = start; i <= end; i++)
{
int finalI = i;
executorService.submit(() -> {
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/2023年医保智能审核新增规则(诊疗项目)-/2023年医保智能审核新增规则(诊疗项目)- " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
try
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<Repeat> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/SynologyDrive/临时同步/2023年医保智能审核新增规则(诊疗项目)-/2023年医保智能审核新增规则(诊疗项目)- " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<PdfData> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
Repeat repeat = new Repeat();
repeat.setACode(table.getText(row, 1).replace("\n", ""));
repeat.setAName(table.getText(row, 2).replace("\n", ""));
repeat.setBCode(table.getText(row, 3).replace("\n", ""));
repeat.setBName(table.getText(row, 4).replace("\n", ""));
repeat.setACode(repeat.getACode().substring(0, repeat.getACode().indexOf("-")));
repeat.setBCode(repeat.getBCode().substring(0, repeat.getBCode().indexOf("-")));
if (repeat.getACode().equals(repeat.getBCode()) == false)
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
list.add(repeat);
PdfData pdfData = new PdfData();
String text = table.getText(row, 1).replace("\n", "");
if (text.equals("项目代码"))
{
continue;
}
pdfData.setCode(text.substring(0, text.indexOf("-")));
text = table.getText(row, 3).replace("\n", "");
pdfData.setCode1(text.substring(0, text.indexOf("-")));
text = table.getText(row, 5).replace("\n", "");
pdfData.setCode2(text.substring(0, text.indexOf("-")));
// text = table.getText(row, 4).replace("\n", "");
// pdfData.setUp(text);
list.add(pdfData);
// System.out.println(pdfData);
}
}
array.set(finalI, list);
}
array.set(finalI, list);
}
catch (Throwable e)
{
e.printStackTrace();
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = start; i <= num; i++)
for (int i = start; i <= end; i++)
{
List<Repeat> hiTrtServs = array.get(i);
queue.addAll(hiTrtServs);
List<PdfData> datas = array.get(i);
queue.addAll(datas);
}
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
// Map<String, List<PdfData>> collect = queue.stream().collect(Collectors.groupingBy(pdfData -> pdfData.getCode()));
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
@ -87,7 +100,23 @@ public class ReadQinhai
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(queue, 1000);
for (PdfData data : queue)
{
HiTrtServ one = session.findOne(Model.selectAll(HiTrtServ.class).where(Param.eq(HiTrtServ::getHilistCode, data.getCode())));
HiTrtServ two = session.findOne(Model.selectAll(HiTrtServ.class).where(Param.eq(HiTrtServ::getHilistCode, data.getCode1())));
HiTrtServ third = session.findOne(Model.selectAll(HiTrtServ.class).where(Param.eq(HiTrtServ::getHilistCode, data.getCode2())));
if (one != null && two !=null && third !=null && third.getHilistCode().equals(two.getHilistCode())==false)
{
RuleDetail ruleDetail = new RuleDetail();
ruleDetail.setRuleId(25);
ruleDetail.setDefinition("医疗服务项目分解收费,涉及["+two.getHilistName()+"]与["+third.getHilistName()+"],应当采用["+one.getHilistName()+"]进行收费");
ruleDetail.setPre("orderInfos.hasHiTreatment('" + two.getHilistCode() + "') && orderInfos.hasHiTreatment('"+third.getHilistCode()+"')");
ruleDetail.setEl("1");
ruleDetail.setTip("orderInfos.listHiTreatment('" + two.getHilistCode()+","+third.getHilistCode() + "')");
ruleDetail.setHiListType(2);
session.save(ruleDetail);
}
}
}
}
}

View File

@ -0,0 +1,123 @@
package org.example;
import cn.hutool.http.HttpUtil;
import com.jfirer.dson.Dson;
import com.jfirer.dson.util.JsonRename;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import com.jfirer.jsql.session.SqlSession;
import com.zaxxer.hikari.HikariDataSource;
import lombok.Data;
import lombok.experimental.Accessors;
import org.example.zhejiang.HiTrtServ;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ReadYibaoHaocai
{
@Data
public static class Res
{
private List<RowData> rows;
}
@TableDef("material")
@Data
@Accessors(chain = true)
public static class RowData
{
@AutoIncrement
@Pk
private Integer id;
/**
*
*/
private String specificationCode;
/**
*
*/
@JsonRename("catalogcode")
private String catalogCode;
/**
*
*/
@JsonRename("catalogname1")
private String catalogName1;
/**
*
*/
@JsonRename("catalogname2")
private String catalogName2;
/**
*
*/
@JsonRename("catalogname3")
private String catalogName3;
/**
*
*/
@JsonRename("commonnamecode")
private String commonNameCode;
/**
*
*/
@JsonRename("commonname")
private String commonName;
/**
*
*/
@JsonRename("matrialcode")
private String matrialCode;
/**
*
*/
private String matrial;
/**
*
*/
@JsonRename("characteristiccode")
private String characteristicCode;
/**
*
*/
private String characteristic;
}
public static void main(String[] args)
{
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
for (int i = 1; i <= 6; i++)
{
Map<String, String> headers = new HashMap<>();
headers.put("Cookies", "__jsluid_s=d8d3898d0ef2f29970f4abc514490e2a; https_waf_cookie=e38f98fc-40fb-4b91c23ba39a30b9b2b880d3ad675d4e9ca4; queryCondition=9be8ff2ed273bc199e6707a6822f559f%3D%7B%22specificationCode%22%3A%22%22%2C%22commonname%22%3A%22%22%2C%22companyName%22%3A%22%22%2C%22catalogname1%22%3A%22%22%2C%22catalogname2%22%3A%22%22%2C%22catalogname3%22%3A%22%22%2C%22regcardNm%22%3A%22%22%2C%22productName%22%3A%22%22%2C%22releaseVersion%22%3A%22%22%7D; JSESSIONID=0C3F947A8FB959691AE837EE075AF4E1; pageSelect=ed65ffb5c1fd849ab9c2a36665b9b001%3D3");
headers.put("Origin", "https://code.nhsa.gov.cn");
headers.put("Referer", "https://code.nhsa.gov.cn/hc/stdSpecification/toStdSpecificationCompanyReportList.html");
headers.put("Accept-Language", "zh-CN,zh;q=0.9");
Map<String, Object> form = new HashMap<>();
form.put("_search", "false");
form.put("rows", "1000");
form.put("page", String.valueOf(i));
form.put("sidx", null);
form.put("sord", "asc");
form.put("nd", "1723081276723");
String body = HttpUtil.createPost("https://code.nhsa.gov.cn/hc/stdSpecification/getStdSpecificationListDataCompanyReport.html").form(form).addHeaders(headers).execute().body();
Res o = Dson.fromString(Res.class, body);
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(o.getRows(), 1000);
}
}
}
}

View File

@ -1,21 +0,0 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
import lombok.experimental.Accessors;
@TableDef("qinhai_repeat")
@Data
@Accessors(chain = true)
public class Repeat
{
@Pk
@AutoIncrement
private Integer id;
private String aCode;
private String aName;
private String bCode;
private String bName;
}

View File

@ -0,0 +1,21 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
@Data
@TableDef("rule_detail")
public class RuleDetail
{
@Pk
@AutoIncrement
private Integer detailId;
private Integer ruleId;
private String definition;
private String pre;
private String el;
private int hiListType;
private String tip;
}

View File

@ -0,0 +1,106 @@
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import org.example.PdfData2;
import org.example.zhejiang.HiTrtServ;
import org.junit.Ignore;
import org.junit.Test;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
public class ReadQinhai2
{
@Test
@Ignore
public void readPdf() throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int start = 1;
int end = 263;
CountDownLatch countDownLatch = new CountDownLatch(end - start + 1);
AtomicReferenceArray<List<PdfData2>> array = new AtomicReferenceArray<>(end + 1);
Queue<PdfData2> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = start; i <= end; i++)
{
int finalI = i;
executorService.submit(() -> {
try
{
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/2023年医保智能审核诊疗项目内涵耗材新增规则-/2023年医保智能审核诊疗项目内涵耗材新增规则- " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<PdfData2> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
PdfData2 pdfData = new PdfData2();
String text = table.getText(row, 1).replace("\n", "");
if (text.equals("项目代码"))
{
continue;
}
pdfData.setCode(text.substring(0, text.indexOf("-")));
text = table.getText(row, 3).replace("\n", "");
pdfData.setContainName(text);
text = table.getText(row, 4).replace("\n", "");
pdfData.setExtName(text);
list.add(pdfData);
}
}
array.set(finalI, list);
}
}
catch (Throwable e)
{
e.printStackTrace();
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = start; i <= end; i++)
{
List<PdfData2> datas = array.get(i);
queue.addAll(datas);
}
List<PdfData2> list = queue.stream().distinct().toList();
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(list, 1000);
}
}
}