更新读取

master
linbin 2024-08-05 17:07:56 +08:00
parent 6b2310afd7
commit 9c24b6e2cf
14 changed files with 555 additions and 16 deletions

40
pom.xml
View File

@ -62,6 +62,42 @@
<artifactId>easyexcel</artifactId>
<version>3.3.4</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.4.0-b180830.0359</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.6.5</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>pdfa</artifactId>
<version>7.1.11</version>
</dependency>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.pdf</artifactId>
<version>10.6.0</version>
</dependency>
</dependencies>
<build>
<plugins>
@ -69,8 +105,8 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>16</source>
<target>16</target>
<source>21</source>
<target>21</target>
</configuration>
</plugin>
</plugins>

View File

@ -0,0 +1,93 @@
package org.example;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import org.example.zhejiang.HiTrtServ;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
public class ReadQinhai
{
public static void main(String[] args) throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int start = 3;
int num = 41;
CountDownLatch countDownLatch = new CountDownLatch(num - start + 1);
AtomicReferenceArray<List<Repeat>> array = new AtomicReferenceArray<>(num + 1);
Queue<Repeat> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = start; i <= num; i++)
{
int finalI = i;
executorService.submit(() -> {
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/2023年医保智能审核新增规则(诊疗项目)-/2023年医保智能审核新增规则(诊疗项目)- " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<Repeat> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
Repeat repeat = new Repeat();
repeat.setACode(table.getText(row, 1).replace("\n", ""));
repeat.setAName(table.getText(row, 2).replace("\n", ""));
repeat.setBCode(table.getText(row, 3).replace("\n", ""));
repeat.setBName(table.getText(row, 4).replace("\n", ""));
repeat.setACode(repeat.getACode().substring(0, repeat.getACode().indexOf("-")));
repeat.setBCode(repeat.getBCode().substring(0, repeat.getBCode().indexOf("-")));
if (repeat.getACode().equals(repeat.getBCode()) == false)
{
list.add(repeat);
}
}
}
array.set(finalI, list);
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = start; i <= num; i++)
{
List<Repeat> hiTrtServs = array.get(i);
queue.addAll(hiTrtServs);
}
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(queue, 1000);
}
}
}

View File

@ -0,0 +1,21 @@
package org.example;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
import lombok.experimental.Accessors;
@TableDef("qinhai_repeat")
@Data
@Accessors(chain = true)
public class Repeat
{
@Pk
@AutoIncrement
private Integer id;
private String aCode;
private String aName;
private String bCode;
private String bName;
}

View File

@ -1,4 +1,4 @@
package org.example;
package org.example.drug;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelReader;
@ -9,7 +9,7 @@ import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.mysql.cj.jdbc.Driver;
import com.zaxxer.hikari.HikariDataSource;
import org.example.dto.HiDrugType;
import org.example.drug.dto.HiDrugType;
import java.util.ArrayList;
import java.util.HashMap;

View File

@ -1,4 +1,4 @@
package org.example;
package org.example.drug;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
@ -8,8 +8,8 @@ import com.jfirer.jsql.session.SqlSession;
import lombok.Data;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.example.dto.HiDrugDetail;
import org.example.dto.HiDrugType;
import org.example.drug.dto.HiDrugDetail;
import org.example.drug.dto.HiDrugType;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,15 +1,14 @@
package org.example;
package org.example.drug;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.jfirer.baseutil.StringUtil;
import com.jfirer.dson.Dson;
import com.jfirer.jsql.session.SqlSession;
import lombok.Data;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.example.dto.HiDrugDetail;
import org.example.dto.HiDrugType;
import org.example.drug.dto.HiDrugDetail;
import org.example.drug.dto.HiDrugType;
import java.util.ArrayList;
import java.util.List;

View File

@ -0,0 +1,81 @@
package org.example.drug;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import org.example.drug.dto.Drug;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedTransferQueue;
public class ReadPdf
{
public static void main(String[] args) throws InterruptedException
{
Queue<Drug> drugs = new LinkedTransferQueue<>();
CountDownLatch countDownLatch = new CountDownLatch(17950);
for (int index = 1; index <= 17950; index++)
{
int finalIndex = index;
Thread.startVirtualThread(() -> {
//实例化PdfDocument类的对象
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/未命名文件夹/医保药品分类与代码数据(西药、中成药)截至2024年3月29日-part/医保药品分类与代码数据(西药、中成药)截至2024年3月29日-part " + finalIndex + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
if (tableLists != null && tableLists.length > 0)
{
//遍历表
for (PdfTable table : tableLists)
{
int row = table.getRowCount();//获取表格行
int startRow = finalIndex == 1 ? 3 : 2;
for (int i = startRow; i < row; i++)
{
Drug drug = new Drug();
drug.setDrugCode(table.getText(i, 0).replace("\n", ""));
drug.setRegisterName(table.getText(i, 1).replace("\n", ""));
drug.setRegisterDosageform(table.getText(i, 2).replace("\n", ""));
drug.setRegisterSpec(table.getText(i, 3).replace("\n", ""));
drug.setProductName(table.getText(i, 4).replace("\n", ""));
drug.setDosageform(table.getText(i, 5).replace("\n", ""));
drug.setSpec(table.getText(i, 6).replace("\n", ""));
drug.setPackagingMaterial(table.getText(i, 7).replace("\n", ""));
drug.setMinPackageNum(table.getText(i, 8).replace("\n", ""));
drug.setMinPreparationUnit(table.getText(i, 9).replace("\n", ""));
drug.setMinPackageUnit(table.getText(i, 10).replace("\n", ""));
drug.setEnterprise(table.getText(i, 11).replace("\n", ""));
drug.setApprovalNum(table.getText(i, 12).replace("\n", ""));
drug.setDrugStandardNum(table.getText(i, 13).replace("\n", ""));
drug.setHiType(table.getText(i, 14).replace("\n", ""));
drug.setHiNum(table.getText(i, 15).replace("\n", ""));
drug.setHiName(table.getText(i, 16).replace("\n", ""));
drug.setHiDosageform(table.getText(i, 17).replace("\n", ""));
drugs.add(drug);
}
}
}
}
System.out.println("第" + finalIndex + "页结束");
countDownLatch.countDown();
pdf.close();
});
}
countDownLatch.await();
long t0 = System.currentTimeMillis();
try (SqlSession session = App.SESSION_FACTORY.openSession())
{
session.batchInsert(drugs, 1000);
}
System.out.println("插入耗时:" + (System.currentTimeMillis() - t0));
}
}

View File

@ -1,4 +1,4 @@
package org.example;
package org.example.drug;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
@ -8,8 +8,8 @@ import com.jfirer.jsql.session.SqlSession;
import lombok.Data;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.example.dto.HiDrugDetail;
import org.example.dto.HiDrugType;
import org.example.drug.dto.HiDrugDetail;
import org.example.drug.dto.HiDrugType;
import java.util.ArrayList;
import java.util.List;

View File

@ -0,0 +1,87 @@
package org.example.drug.dto;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
@TableDef("all_drug_data")
@Data
public class Drug
{
@Pk
@AutoIncrement
private Integer id;
/**
*
*/
private String drugCode;
/**
*
*/
private String registerName;
/**
*
*/
private String registerDosageform;
/**
*
*/
private String registerSpec;
/**
*
*/
private String productName;
/**
*
*/
private String dosageform;
/**
*
*/
private String spec;
/**
*
*/
private String packagingMaterial;
/**
*
*/
private String minPackageNum;
/**
*
*/
private String minPreparationUnit;
/**
*
*/
private String minPackageUnit;
/**
*
*/
private String enterprise;
/**
*
*/
private String approvalNum;
/**
*
*/
private String drugStandardNum;
/**
*
*/
private String hiType;
/**
*
*/
private String hiNum;
/**
*
*/
private String hiName;
/**
*
*/
private String hiDosageform;
}

View File

@ -1,4 +1,4 @@
package org.example.dto;
package org.example.drug.dto;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;

View File

@ -1,4 +1,4 @@
package org.example.dto;
package org.example.drug.dto;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;

View File

@ -0,0 +1,32 @@
package org.example.zhejiang;
import com.jfirer.jsql.annotation.AutoIncrement;
import com.jfirer.jsql.annotation.Pk;
import com.jfirer.jsql.annotation.TableDef;
import lombok.Data;
@Data
@TableDef("hi_trt_serv_hunan")
public class HiTrtServ
{
@AutoIncrement
@Pk
private Integer id;
private String hilistCode;
private String hilistName;
/**
*
*/
private String connotation;
private String ext_contents;
/**
*
*/
private String unit;
private String remark;
private String hiLevel;
/**
*
*/
private String limitPayScope;
}

View File

@ -0,0 +1,93 @@
package org.example.zhejiang;
import com.jfirer.baseutil.StringUtil;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
public class ReadHunan
{
public static void main(String[] args) throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int num = 332;
CountDownLatch countDownLatch = new CountDownLatch(num);
AtomicReferenceArray<List<HiTrtServ>> array = new AtomicReferenceArray<>(num + 1);
Queue<HiTrtServ> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = 1; i <= num; i++)
{
int finalI = i;
executorService.submit(() -> {
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/湖南省医疗服务价格项目目录2024 年版)-part/湖南省医疗服务价格项目目录2024 年版)-part " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<HiTrtServ> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
HiTrtServ serv = new HiTrtServ();
//忽略第一列,财务列
serv.setHilistCode(table.getText(row, 1).replace("\n", ""));
serv.setHilistName(table.getText(row, 2).replace("\n", ""));
serv.setConnotation(table.getText(row, 5).replace("\n", ""));
serv.setExt_contents(table.getText(row, 6).replace("\n", ""));
serv.setUnit(table.getText(row, 7).replace("\n", ""));
if (StringUtil.isNotBlank(serv.getHilistCode()))
{
list.add(serv);
}
}
}
array.set(finalI, list);
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = 1; i <= num; i++)
{
List<HiTrtServ> hiTrtServs = array.get(i);
queue.addAll(hiTrtServs);
}
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(queue, 1000);
}
}
}

View File

@ -0,0 +1,97 @@
package org.example.zhejiang;
import com.jfirer.baseutil.StringUtil;
import com.jfirer.jsql.SessionFactory;
import com.jfirer.jsql.SessionfactoryConfig;
import com.jfirer.jsql.session.SqlSession;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.utilities.PdfTable;
import com.spire.pdf.utilities.PdfTableExtractor;
import com.zaxxer.hikari.HikariDataSource;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
import java.util.concurrent.atomic.AtomicReferenceArray;
public class ReadZhejiang
{
public static void main(String[] args) throws InterruptedException
{
ExecutorService executorService = Executors.newVirtualThreadPerTaskExecutor();
int num = 354;
CountDownLatch countDownLatch = new CountDownLatch(num);
AtomicReferenceArray<List<HiTrtServ>> array = new AtomicReferenceArray<>(num + 1);
Queue<HiTrtServ> queue = new LinkedTransferQueue<>();
HiTrtServ lastCodedRecord;
for (int i = 1; i <= num; i++)
{
int finalI = i;
executorService.submit(() -> {
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.loadFromFile("/Users/linbin/Downloads/吉林省基本医疗保险、工伤保险和生育保险诊疗项目及医疗服务设施项目目录2022年-part/吉林省基本医疗保险、工伤保险和生育保险诊疗项目及医疗服务设施项目目录2022年-part " + finalI + ".pdf");
//创建StringBuilder类的实例
//创建PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//遍历每一页
for (int page = 0; page < pdf.getPages().getCount(); page++)
{
//提取页面中的表格存入PdfTable[]数组
PdfTable[] tableLists = extractor.extractTable(page);
List<HiTrtServ> list = new LinkedList<>();
if (tableLists != null && tableLists.length > 0)
{
PdfTable table = tableLists[0];
//获取表格中的行数和列数
int rowCount = table.getRowCount();
//遍历表格中的每一个单元格
for (int row = 1; row < rowCount; row++)
{
HiTrtServ serv = new HiTrtServ();
serv.setHilistCode(table.getText(row, 0).replace("\n", ""));
serv.setHilistName(table.getText(row, 1).replace("\n", ""));
serv.setConnotation(table.getText(row, 2).replace("\n", ""));
serv.setExt_contents(table.getText(row, 3).replace("\n", ""));
serv.setUnit(table.getText(row, 4).replace("\n", ""));
//跳过升级价格这一项
serv.setRemark(table.getText(row, 6).replace("\n", ""));
serv.setHiLevel(table.getText(row, 7).replace("\n", ""));
if (StringUtil.isNotBlank(serv.getHilistCode()))
{
list.add(serv);
}
System.out.println(serv);
}
array.set(finalI, list);
}
}
countDownLatch.countDown();
});
}
countDownLatch.await();
for (int i = 1; i <= num; i++)
{
List<HiTrtServ> hiTrtServs = array.get(i);
queue.addAll(hiTrtServs);
}
SessionfactoryConfig config = new SessionfactoryConfig();
HikariDataSource dataSource = new HikariDataSource();
dataSource.setJdbcUrl("jdbc:mysql://yynas.cn:53306/fee_control");
dataSource.setDriverClassName(com.mysql.cj.jdbc.Driver.class.getName());
dataSource.setUsername("root");
dataSource.setPassword("root");
config.setDataSource(dataSource);
SessionFactory sessionFactory = config.build();
try (SqlSession session = sessionFactory.openSession())
{
session.batchInsert(queue, 1000);
}
}
}