pdf 转html(不需要本地生成html上传oss,输出流量直接传输OSS) <dependency> <groupId>e-iceblue</groupId> <artifactId>spire.pdf.free</artifactId> <version>4.3.0</version> </dependency> import com.spire.pdf.FileFormat; import com.spire.pdf.PdfDocument; @Override public void synchJuchaoOssPdfToHtml() { // 记录起始时间 long start = System.currentTimeMillis(); //2查询出MySQL 需要同步的公告数据 | status=0 初始 | status=1已上传pdf | status=2已解析html | status=3已上传html OssMetaGq ossMetaGq = new OssMetaGq(); ossMetaGq.setStatus(1); ossMetaGq.setPublishdate(DateUtils.parseDate("2019-12-30"));//最近三年; List<OssMetaGq> searchList = ossMetaGqMapper.selectOssMetaGqList(ossMetaGq); try { if (searchList != null && searchList.size() > 0) { // 1 文件处理pdf/txt 上传阿里云,并更新数据库pdf路径地址 searchList.forEach(ossMetaGqDto -> { try { //加载pdf文档 PdfDocument pdf = new PdfDocument(); pdf.loadFromStream(HttpUtils.byteByUrl(ossMetaGqDto.getUrl())); //设置useembeddedsvg和 useembeddedimg布尔值为true pdf.getConvertOptions().setPdfToHtmlOptions(true, true); //保存到流 ByteArrayOutputStream outputstream = new ByteArrayOutputStream(); pdf.saveToStream(outputstream, FileFormat.HTML); pdf.close(); 2上传阿里云html文件 String announcementTxtUrl = aliyunOSSService.fileUploadOssByte(Constants.ANNOUNCEMENT_TXT, ossMetaGqDto.getStockcode(), outputstream.toByteArray(), ".html"); ossMetaGqDto.setParsepath(announcementTxtUrl); } catch (IOException e) { e.printStackTrace(); ossMetaGqDto.setStatus(1); log.error("读取pdf格式转换异常:{}", e); } ossMetaGqDto.setStatus(3); }); //获取阿里云OSS文件 //log.info("getUrl " aliyunOSSService.getFileUrl("announcement/pdf/000502/2022/04/08/38EFE9B5521B471A93AB0D2CEC79B43D.PDF")); //log.info("getUrl " aliyunOSSService.getFileUrl("announcement/txt/000502/2022/04/08/71A93AB0D2CEC79B43D.txt")); //3 批处理更新处理状态; SqlSession sqlSession = null; try { sqlSession = sqlSessionFactory.openSession(ExecutorType.BATCH, false); OssMetaGqMapper metaGqMapperNew = sqlSession.getMapper(OssMetaGqMapper.class); searchList.stream().forEach(student -> metaGqMapperNew.updateOssMetaGq(student)); sqlSession.commit(); } catch (Exception e) { sqlSession.rollback(); log.error("异常批量处理:{}", e); } finally { if (sqlSession != null) { sqlSession.close(); } } log.info("{}", (System.currentTimeMillis() - start)); } } catch (Exception e) { e.printStackTrace(); } } /** * 按照Byte数组上传 * @param fileDir 文件目录 * @param stockCode 股票代码 * @param content 文件内容 * @param fileSuffix 文件后缀名 * @throws Exception */ public String fileUploadOssByte(String fileDir, String stockCode, byte[] content, String fileSuffix) { //在文件名称中添加随机唯一值,使用UUID生成 String newFileName = Long.toString(snowflakeIdWorker.nextId()); //获取当前日期,转换/2019/11/17 String generateDayDir = format.format(new Date()); // 填写Object不能包含完整的路径Bucket名称,例如exampledir/exampleobject.txt。 String fullFileName = fileDir stockCode generateDayDir "/" newFileName fileSuffix; // 创建OSSClient实例 OSS ossClient = new OSSClientBuilder().build(AliyunOSSConfig.OSS_ENDPOINT_URL, AliyunOSSConfig.ACCESS_KEY_ID, AliyunOSSConfig.ACCESS_KEY_SECRET); try { // 创建PutObject请求。 ossClient.putObject(AliyunOSSConfig.OSS_BUCKET_NAME, fullFileName, new ByteArrayInputStream(content)); } catch (Exception ce) { log.error("Caught an ClientException, which means the client encountered " "a serious internal problem while trying to communicate with OSS, " "such as not being able to access the network."); log.error("Error Message:" ce.getMessage()); } finally { if (ossClient != null) { ossClient.shutdown(); } } return fullFileName; } package com.realize.framework.util; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.HttpClient; import org.apache.http.client.entity.UrlEnodedFormEntity;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.*;
import java.net.URL;
import java.net.URLEncoder;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class HttpUtils {
/**
* get
*
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @return
* @throws Exception
*/
public static HttpResponse doGet(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpGet request = new HttpGet(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
return httpClient.execute(request);
}
/**
* post form
*
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @param bodys
* @return
* @throws Exception
*/
public static HttpResponse doPost(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
Map<String, String> bodys)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPost request = new HttpPost(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (bodys != null) {
List<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();
for (String key : bodys.keySet()) {
nameValuePairList.add(new BasicNameValuePair(key, bodys.get(key)));
}
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(nameValuePairList, "utf-8");
formEntity.setContentType("application/x-www-form-urlencoded; charset=UTF-8");
request.setEntity(formEntity);
}
return httpClient.execute(request);
}
/**
* Post String
*
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @param body
* @return
* @throws Exception
*/
public static HttpResponse doPost(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
String body)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPost request = new HttpPost(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (StringUtils.isNotBlank(body)) {
request.setEntity(new StringEntity(body, "utf-8"));
}
return httpClient.execute(request);
}
/**
* Post stream
*
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @param body
* @return
* @throws Exception
*/
public static HttpResponse doPost(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
byte[] body)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPost request = new HttpPost(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (body != null) {
request.setEntity(new ByteArrayEntity(body));
}
return httpClient.execute(request);
}
/**
* Put String
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @param body
* @return
* @throws Exception
*/
public static HttpResponse doPut(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
String body)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPut request = new HttpPut(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (StringUtils.isNotBlank(body)) {
request.setEntity(new StringEntity(body, "utf-8"));
}
return httpClient.execute(request);
}
/**
* Put stream
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @param body
* @return
* @throws Exception
*/
public static HttpResponse doPut(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
byte[] body)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPut request = new HttpPut(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (body != null) {
request.setEntity(new ByteArrayEntity(body));
}
return httpClient.execute(request);
}
/**
* Delete
*
* @param host
* @param path
* @param method
* @param headers
* @param querys
* @return
* @throws Exception
*/
public static HttpResponse doDelete(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpDelete request = new HttpDelete(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
return httpClient.execute(request);
}
private static String buildUrl(String host, String path, Map<String, String> querys) throws UnsupportedEncodingException {
StringBuilder sbUrl = new StringBuilder();
sbUrl.append(host);
if (!StringUtils.isBlank(path)) {
sbUrl.append(path);
}
if (null != querys) {
StringBuilder sbQuery = new StringBuilder();
for (Map.Entry<String, String> query : querys.entrySet()) {
if (0 < sbQuery.length()) {
sbQuery.append("&");
}
if (StringUtils.isBlank(query.getKey()) && !StringUtils.isBlank(query.getValue())) {
sbQuery.append(query.getValue());
}
if (!StringUtils.isBlank(query.getKey())) {
sbQuery.append(query.getKey());
if (!StringUtils.isBlank(query.getValue())) {
sbQuery.append("=");
sbQuery.append(URLEncoder.encode(query.getValue(), "utf-8"));
}
}
}
if (0 < sbQuery.length()) {
sbUrl.append("?").append(sbQuery);
}
}
return sbUrl.toString();
}
private static HttpClient wrapClient(String host) {
HttpClient httpClient = new DefaultHttpClient();
if (host.startsWith("https://")) {
sslClient(httpClient);
}
return httpClient;
}
private static void sslClient(HttpClient httpClient) {
try {
SSLContext ctx = SSLContext.getInstance("TLS");
X509TrustManager tm = new X509TrustManager() {
public X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(X509Certificate[] xcs, String str) {
}
public void checkServerTrusted(X509Certificate[] xcs, String str) {
}
};
ctx.init(null, new TrustManager[] { tm }, null);
SSLSocketFactory ssf = new SSLSocketFactory(ctx);
ssf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
ClientConnectionManager ccm = httpClient.getConnectionManager();
SchemeRegistry registry = ccm.getSchemeRegistry();
registry.register(new Scheme("https", 443, ssf));
} catch (KeyManagementException ex) {
throw new RuntimeException(ex);
} catch (NoSuchAlgorithmException ex) {
throw new RuntimeException(ex);
}
}
/**
* 根据URL文件地址转InputStream
* @param urlOrPath
* @return
* @throws IOException
*/
public static InputStream byteByUrl(String urlOrPath) throws IOException {
InputStream in = null;
byte[] bytes;
if (urlOrPath.toLowerCase().startsWith("http")) {
URL url = new URL(urlOrPath);
return url.openStream();
} else {
File file = new File(urlOrPath);
if (!file.isFile() || !file.exists() || !file.canRead()) {
return null;
}
return new FileInputStream(file);
}
}
}