You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
20 KiB
Java

package com.ann.utils.downLoader;
import com.alibaba.fastjson.JSON;
import com.ann.utils.downLoader.download.CallBackPara;
import com.ann.utils.downLoader.download.FileCheckPoints;
import com.ann.utils.downLoader.download.IDownCallBack;
import com.ann.utils.downLoader.download.IDownloadInfo;
import com.ann.utils.downLoader.download.MultiDownFile;
import com.ann.utils.downLoader.download.SaveFileItem;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.httpclient.util.URIUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.security.cert.CertificateException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
*/
@Slf4j
public class HttpDownloader extends Thread {
private Logger logger = LoggerFactory.getLogger(HttpDownloader.class);
private IDownloadInfo info;
private int maxRetry = 5;
private IDownCallBack downCallBack;
private String userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36";
public HttpDownloader(IDownloadInfo info, int maxRetry) {
this.info = info;
this.maxRetry = maxRetry;
}
/**
*
*
* @param downCallBack
*/
public void intiCallBack(IDownCallBack downCallBack) {
this.downCallBack = downCallBack;
}
public HttpDownloader(IDownloadInfo info) {
this.info = info;
}
public String[] removeArraysEmpty(String[] arr) {
return Arrays.stream(arr).filter(s -> !"".equals(s)).toArray(String[]::new);
}
@Override
public void run() {
CallBackPara para = new CallBackPara();
para.setId(info.getPair().id);
para.setFilename(info.getPair().localName);
para.setLocalpath(info.getPair().localPath);
para.setStarttime(getNewtime());
para.setFileStorageFormat(info.getPair().fileStorageFormat);
para.setDatatype(info.getPair().datatype);
String url = info.getPair().remoteUrl;
List<MultiDownFile> files = new ArrayList<>();
String[] urls = removeArraysEmpty(url.split("http://|https://"));
if (urls.length > 1) {
for (int i = 0; i < urls.length; i++) {
String urltemp = "";
if (url.indexOf("http://" + urls[i]) >= 0) {
urltemp = "http://" + urls[i];
}
if (url.indexOf("https://" + urls[i]) >= 0) {
urltemp = "https://" + urls[i];
}
for (String o : this.info.getPair().separators) {
urltemp = urltemp.replaceAll(o + "$", "");
}
files.add(new MultiDownFile(urltemp, info.getPair().localPath, "docustemp_" + i + "_" + info.getPair().localName));
}
} else {
files.add(new MultiDownFile(url, info.getPair().localPath, info.getPair().localName));
}
para.setFiles(files);
if (info.getPair().proxyurls != null) {
Pattern p = Pattern.compile(String.join("|", info.getPair().proxyurls));
for (MultiDownFile o : files) {
Matcher matcher = p.matcher(o.getRemoteUrl());
if (matcher.find()) {
o.setRemoteUrl(String.format(info.getPair().failurl, o.getRemoteUrl()));
}
}
}
// URLHttpDownBootstrapBuilder builder=null;
// HttpDownBootstrap bootstrap;
try {
for (MultiDownFile file : files) {
// try {
// url = EncoderUrl(file.getRemoteUrl());
// } catch (Exception e) {
//
// }
downLoadFromUrl(url, file.getLocalName(), file.getLocalPath());
if (downCallBack != null) {
downCallBack.success(para);
}
//防止过快,第三链接无法支持
try {
Thread.sleep(100);
} catch (Exception e) {
}
// builder = HttpDownBootstrap.builder(url);
// builder.downConfig(new HttpDownConfigInfo()
// .setFilePath(file.getLocalPath())
// ).callBackPara(para);
// builder.response(new HttpResponseInfo(file.getLocalName()));
// bootstrap = builder.callback(new ConsoleHttpDownCallback()).build();
// bootstrap.start();
// bootstrap = null;
// builder = null;
}
} catch (Exception e) {
// e.printStackTrace();
log.error("nio下载失败" + JSON.toJSONString(para) + ";失败信息:" + e.getMessage());
if (downCallBack != null) {
downCallBack.fail(para);
}
}
// finally {
// bootstrap = null;
// builder = null;
// }
}
public String EncoderUrl(String url) throws UnsupportedEncodingException {
String resultURL = "";
for (int i = 0; i < url.length(); i++) {
char charAt = url.charAt(i);
//只对汉字处理
if (isChineseChar(charAt)) {
String encode = URLEncoder.encode(charAt + "", "UTF-8");
resultURL += encode;
} else {
resultURL += charAt;
}
}
return resultURL;
}
public boolean isChineseChar(char c) {
return String.valueOf(c).matches("[\u4e00-\u9fa5]");
}
/**
* 302
*
* @param uc
* @return
* @throws Exception
*/
private HttpURLConnection reload(HttpURLConnection uc) throws Exception {
HttpURLConnection huc = uc;
if (huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP
|| huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) {// 302, 301
String url = huc.getHeaderField("Location");
url = url.replace("\\", "/");
return reload((HttpURLConnection) new URL(url).openConnection());
}
return uc;
}
public void downLoadFromUrl(String urlStr, String fileName, String savePath) throws Exception {
long start = System.currentTimeMillis();
urlStr = urlStr.replace("\\", "/");
urlStr = URIUtil.encodePathQuery(urlStr);
URL url = new URL(urlStr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
boolean useHttps = urlStr.toLowerCase().startsWith("https");
if (useHttps) {
HttpsURLConnection https = (HttpsURLConnection) conn;
trustAllHosts(https);
https.setHostnameVerifier(DO_NOT_VERIFY);
}
//设置超时间为3秒
//防止屏蔽程序抓取而返回403错误
conn.setRequestProperty("User-Agent", userAgent);
conn.setRequestProperty("Accept-Encoding", "identity");
conn.setConnectTimeout(8 * 1000);
conn.setReadTimeout(8 * 1000);
conn = reload(conn);
long length = conn.getContentLength();
if (length < 0) {
String values = conn.getHeaderField("Content-Length");
if (values != null && !values.isEmpty()) {
length = Long.parseLong(values);
}
}
// log.info(urlStr+" 文件大小:"+length);
InputStream inputStream = null;
if (conn.getResponseCode() >= 400) {
throw new Exception("文件不存在");
// inputStream = conn.getErrorStream();
} else {
inputStream = conn.getInputStream();
}
//得到输入流
//InputStream inputStream = conn.getInputStream();
//获取自己数组
byte[] getData = readInputStream(inputStream);
//文件保存位置
File saveDir = new File(savePath);
if (!saveDir.exists()) {
saveDir.mkdirs();
}
File file = new File(saveDir + File.separator + fileName);
FileOutputStream fos = new FileOutputStream(file);
fos.write(getData);
if (fos != null) {
fos.close();
}
if (inputStream != null) {
inputStream.close();
}
long end = System.currentTimeMillis();
logger.info("info:" + url + " download success;用时:" + (end - start) + "ms");
} catch (Exception ex) {
throw ex;
} finally {
// 断开连接,释放资源
conn.disconnect();
}
}
/**
*
*
* @param inputStream
* @return
* @throws IOException
*/
public byte[] readInputStream(InputStream inputStream) throws IOException {
byte[] buffer = new byte[1024];
int len = 0;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
while ((len = inputStream.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
bos.close();
return bos.toByteArray();
}
public static FileCheckPoints initCheckPoint(int splitNum, long totalSize, long timeStamp) {
long[] startPos = new long[splitNum];
long[] endPos = new long[splitNum];
for (int i = 0, len = startPos.length; i < len; i++) {
long size = i * (totalSize / len);
startPos[i] = size;
// 设置最后一个结束点的位置
if (i == len - 1) {
endPos[i] = totalSize;
} else {
size = (i + 1) * (totalSize / len);
endPos[i] = size;
}
}
FileCheckPoints chp = new FileCheckPoints();
chp.setEndPos(endPos);
chp.setStartPos(startPos);
chp.totalSize = totalSize;
chp.timestamp = timeStamp;
return chp;
}
private FileCheckPoints getInitedCheckPoint() {
long fileLength = -1;
long timeStamp = -1;
HttpURLConnection conn = null;
int stateCode = 0;
try {
URL url = new URL(this.info.getPair().remoteUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("Accept-Encoding", "identity");
HttpDownloader.RetriveSingleStream.setHeader(conn);
stateCode = conn.getResponseCode();
// 判断http status是否为HTTP/1.1 206 Partial Content或者200 OK
if (stateCode != HttpURLConnection.HTTP_OK
&& stateCode != HttpURLConnection.HTTP_PARTIAL) {
logger.warn(info.getPair().remoteUrl + " #Error Code:# "
+ stateCode);
fileLength = -2;
} else if (stateCode >= 400) {
logger.warn(info.getPair().remoteUrl + " #Error Code:# "
+ stateCode);
fileLength = -2;
} else {
// 获取长度
fileLength = conn.getContentLengthLong();
timeStamp = conn.getLastModified();
logger.info(info.getPair().remoteUrl + " #FileLength:# "
+ fileLength);
}
} catch (MalformedURLException e) {
// e.printStackTrace();
} catch (IOException e) {
// e.printStackTrace();
} finally {
if (conn != null) {
conn.disconnect();
}
}
FileCheckPoints chp;
if (fileLength > 0) {
chp = initCheckPoint(info.getSplitNum(), fileLength, timeStamp);
chp.timestamp = timeStamp;
} else {
chp = new FileCheckPoints();
chp.statecode = stateCode;
}
return chp;
}
/**
* bug fixed change the RandomAccessFile size
*
* @author burkun
*/
protected static class RetriveSingleStream implements Runnable {
private boolean isDone = false;
private FileCheckPoints chp;
private int curIndex;
private SaveFileItem file;
private long startPos;
private long endPos;
byte[] buffer = new byte[1024 * 12];
private IDownloadInfo __info;
private int maxRetry;
private Logger logger = LoggerFactory.getLogger(RetriveSingleStream.class);
public boolean isDone() {
return isDone;
}
public RetriveSingleStream(IDownloadInfo info, FileCheckPoints chp,
int curIndex, int maxRetry) {
this.__info = info;
this.chp = chp;
this.curIndex = curIndex;
this.startPos = chp.getStartPos()[curIndex];
this.endPos = chp.getEndPos()[curIndex];
this.maxRetry = maxRetry;
}
@Override
public void run() {
InputStream in = null;
HttpURLConnection conn = null;
int curRetry = 0;
while (curRetry < maxRetry && !isDone) {
try {
URL url = new URL(__info.getPair().remoteUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(10000);
conn.setReadTimeout(30000);
setHeader(conn);
String property = "bytes=" + startPos + "-";
conn.setRequestProperty("RANGE", property);
logger.info(__info.getPair().localName + " #Block"
+ (curIndex + 1) + "# begin downloading...");
int length;
long counter = 0;
InputStream is = conn.getInputStream();
file = new SaveFileItem(__info.getPair().getLocalFullPath(), startPos);
//--bug fixed
file.setLength(__info.getCurCheckPoints().totalSize);
//--bug fixed
while (!isDone && startPos < endPos && (length = is.read(buffer)) > 0) {
startPos += file.write(buffer, 0, length);
counter += 1;
chp.getStartPos()[curIndex] = Math.min(startPos, endPos);
if (counter % 20 == 0) {
__info.writeInfo(chp);
logger.info(__info.getPair().remoteUrl + " #Block"
+ (curIndex + 1) + "# download "
+ getPercentage() + "%...");
Thread.yield();
}
}
__info.writeInfo(chp);
isDone = true;
} catch (IOException e) {
isDone = false;
logger.debug(__info.getPair().remoteUrl, e);
} finally {
if (!isDone) {
curRetry++;
logger.debug(__info.getPair().remoteUrl + " download failed, retry again!");
if (curRetry >= maxRetry) {
//保证循环跳出
isDone = true;
}
} else {
curRetry = maxRetry;
}
try {
if (in != null) {
in.close();
}
if (file != null) {
file.close();
}
if (conn != null) {
conn.disconnect();
}
} catch (IOException e) {
logger.debug(__info.getPair().remoteUrl, e);
}
}
}
}
public static void setHeader(URLConnection conn) {
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 BIDUBrowser/7.0 Safari/537.36");
conn.setRequestProperty("Accept-Language",
"en-us,en;q=0.7,zh-cn;q=0.3");
conn.setRequestProperty("Accept-Encoding", "utf-8");
conn.setRequestProperty("Accept-Charset",
"ISO-8859-1,utf-8;q=0.7,*;q=0.7");
conn.setRequestProperty("Keep-Alive", "300");
conn.setRequestProperty("connnection", "keep-alive");
// conn.setRequestProperty("If-Modified-Since",
// "Fri, 02 Jan 2009 17:00:05 GMT");
// conn.setRequestProperty("If-None-Match",
// "\"1261d8-4290-df64d224\"");
conn.setRequestProperty("Cache-conntrol", "max-age=0");
conn.setRequestProperty("Referer", "http://www.baidu.com");
}
private int getPercentage() {
long total = 0;
for (int i = 0; i < chp.getSplit(); i++) {
total += chp.getEndPos()[i] - chp.getStartPos()[i];
}
return (int) ((chp.totalSize - total) * 100 / chp.totalSize);
}
}
private Timestamp getNewtime() {
Date now = new Date();
Timestamp timestamp = new Timestamp(now.getTime());
return timestamp;
}
// public static void main(String[] args) {
// String url="http://ss,ss,https://bbbbb;http://ccc";
// List<String> separators = new ArrayList<>();
// separators.add(",");
// separators.add(";");
// String[] urls = url.split("http://|https://");
// for (int i = 0; i < urls.length; i++) {
// String urltemp = "";
// if (url.indexOf("http://" + urls[i]) >= 0) {
// urltemp="http://" + urls[i];
// }
// if (url.indexOf("https://" + urls[i]) >= 0) {
// urltemp="https://" + urls[i];
// }
// for(String o:separators){
// urltemp=urltemp.replaceAll(o+"$", "");
// }
// System.out.println(urltemp);
// }
// }
/**
* java
*/
private final TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException {
}
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
}};
/**
*
*/
private final HostnameVerifier DO_NOT_VERIFY = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
/**
*
*
* @param connection
* @return
*/
private SSLSocketFactory trustAllHosts(HttpsURLConnection connection) {
SSLSocketFactory oldFactory = connection.getSSLSocketFactory();
try {
SSLContext sc = SSLContext.getInstance("TLS");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
SSLSocketFactory newFactory = sc.getSocketFactory();
connection.setSSLSocketFactory(newFactory);
} catch (Exception e) {
e.printStackTrace();
}
return oldFactory;
}
}