You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
20 KiB
Java

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package com.ann.utils.downLoader;
import com.alibaba.fastjson.JSON;
import com.ann.utils.downLoader.download.CallBackPara;
import com.ann.utils.downLoader.download.FileCheckPoints;
import com.ann.utils.downLoader.download.IDownCallBack;
import com.ann.utils.downLoader.download.IDownloadInfo;
import com.ann.utils.downLoader.download.MultiDownFile;
import com.ann.utils.downLoader.download.SaveFileItem;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.httpclient.util.URIUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.security.cert.CertificateException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
*/
@Slf4j
public class HttpDownloader extends Thread {
private Logger logger = LoggerFactory.getLogger(HttpDownloader.class);
private IDownloadInfo info;
private int maxRetry = 5;
private IDownCallBack downCallBack;
private String userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36";
public HttpDownloader(IDownloadInfo info, int maxRetry) {
this.info = info;
this.maxRetry = maxRetry;
}
/**
* 初始化回调方法
*
* @param downCallBack
*/
public void intiCallBack(IDownCallBack downCallBack) {
this.downCallBack = downCallBack;
}
public HttpDownloader(IDownloadInfo info) {
this.info = info;
}
public String[] removeArraysEmpty(String[] arr) {
return Arrays.stream(arr).filter(s -> !"".equals(s)).toArray(String[]::new);
}
@Override
public void run() {
CallBackPara para = new CallBackPara();
para.setId(info.getPair().id);
para.setFilename(info.getPair().localName);
para.setLocalpath(info.getPair().localPath);
para.setStarttime(getNewtime());
para.setFileStorageFormat(info.getPair().fileStorageFormat);
para.setDatatype(info.getPair().datatype);
String url = info.getPair().remoteUrl;
List<MultiDownFile> files = new ArrayList<>();
String[] urls = removeArraysEmpty(url.split("http://|https://"));
if (urls.length > 1) {
for (int i = 0; i < urls.length; i++) {
String urltemp = "";
if (url.indexOf("http://" + urls[i]) >= 0) {
urltemp = "http://" + urls[i];
}
if (url.indexOf("https://" + urls[i]) >= 0) {
urltemp = "https://" + urls[i];
}
for (String o : this.info.getPair().separators) {
urltemp = urltemp.replaceAll(o + "$", "");
}
files.add(new MultiDownFile(urltemp, info.getPair().localPath, "docustemp_" + i + "_" + info.getPair().localName));
}
} else {
files.add(new MultiDownFile(url, info.getPair().localPath, info.getPair().localName));
}
para.setFiles(files);
if (info.getPair().proxyurls != null) {
Pattern p = Pattern.compile(String.join("|", info.getPair().proxyurls));
for (MultiDownFile o : files) {
Matcher matcher = p.matcher(o.getRemoteUrl());
if (matcher.find()) {
o.setRemoteUrl(String.format(info.getPair().failurl, o.getRemoteUrl()));
}
}
}
// URLHttpDownBootstrapBuilder builder=null;
// HttpDownBootstrap bootstrap;
try {
for (MultiDownFile file : files) {
// try {
// url = EncoderUrl(file.getRemoteUrl());
// } catch (Exception e) {
//
// }
downLoadFromUrl(url, file.getLocalName(), file.getLocalPath());
if (downCallBack != null) {
downCallBack.success(para);
}
//防止过快,第三链接无法支持
try {
Thread.sleep(100);
} catch (Exception e) {
}
// builder = HttpDownBootstrap.builder(url);
// builder.downConfig(new HttpDownConfigInfo()
// .setFilePath(file.getLocalPath())
// ).callBackPara(para);
// builder.response(new HttpResponseInfo(file.getLocalName()));
// bootstrap = builder.callback(new ConsoleHttpDownCallback()).build();
// bootstrap.start();
// bootstrap = null;
// builder = null;
}
} catch (Exception e) {
// e.printStackTrace();
log.error("nio下载失败" + JSON.toJSONString(para) + ";失败信息:" + e.getMessage());
if (downCallBack != null) {
downCallBack.fail(para);
}
}
// finally {
// bootstrap = null;
// builder = null;
// }
}
public String EncoderUrl(String url) throws UnsupportedEncodingException {
String resultURL = "";
for (int i = 0; i < url.length(); i++) {
char charAt = url.charAt(i);
//只对汉字处理
if (isChineseChar(charAt)) {
String encode = URLEncoder.encode(charAt + "", "UTF-8");
resultURL += encode;
} else {
resultURL += charAt;
}
}
return resultURL;
}
public boolean isChineseChar(char c) {
return String.valueOf(c).matches("[\u4e00-\u9fa5]");
}
/**
* 处理多级302等跳转
*
* @param uc
* @return
* @throws Exception
*/
private HttpURLConnection reload(HttpURLConnection uc) throws Exception {
HttpURLConnection huc = uc;
if (huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP
|| huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) {// 302, 301
String url = huc.getHeaderField("Location");
url = url.replace("\\", "/");
return reload((HttpURLConnection) new URL(url).openConnection());
}
return uc;
}
public void downLoadFromUrl(String urlStr, String fileName, String savePath) throws Exception {
long start = System.currentTimeMillis();
urlStr = urlStr.replace("\\", "/");
urlStr = URIUtil.encodePathQuery(urlStr);
URL url = new URL(urlStr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
boolean useHttps = urlStr.toLowerCase().startsWith("https");
if (useHttps) {
HttpsURLConnection https = (HttpsURLConnection) conn;
trustAllHosts(https);
https.setHostnameVerifier(DO_NOT_VERIFY);
}
//设置超时间为3秒
//防止屏蔽程序抓取而返回403错误
conn.setRequestProperty("User-Agent", userAgent);
conn.setRequestProperty("Accept-Encoding", "identity");
conn.setConnectTimeout(8 * 1000);
conn.setReadTimeout(8 * 1000);
conn = reload(conn);
long length = conn.getContentLength();
if (length < 0) {
String values = conn.getHeaderField("Content-Length");
if (values != null && !values.isEmpty()) {
length = Long.parseLong(values);
}
}
// log.info(urlStr+" 文件大小:"+length);
InputStream inputStream = null;
if (conn.getResponseCode() >= 400) {
throw new Exception("文件不存在");
// inputStream = conn.getErrorStream();
} else {
inputStream = conn.getInputStream();
}
//得到输入流
//InputStream inputStream = conn.getInputStream();
//获取自己数组
byte[] getData = readInputStream(inputStream);
//文件保存位置
File saveDir = new File(savePath);
if (!saveDir.exists()) {
saveDir.mkdirs();
}
File file = new File(saveDir + File.separator + fileName);
FileOutputStream fos = new FileOutputStream(file);
fos.write(getData);
if (fos != null) {
fos.close();
}
if (inputStream != null) {
inputStream.close();
}
long end = System.currentTimeMillis();
logger.info("info:" + url + " download success;用时:" + (end - start) + "ms");
} catch (Exception ex) {
throw ex;
} finally {
// 断开连接,释放资源
conn.disconnect();
}
}
/**
* 从输入流中获取字节数组
*
* @param inputStream
* @return
* @throws IOException
*/
public byte[] readInputStream(InputStream inputStream) throws IOException {
byte[] buffer = new byte[1024];
int len = 0;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
while ((len = inputStream.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
bos.close();
return bos.toByteArray();
}
public static FileCheckPoints initCheckPoint(int splitNum, long totalSize, long timeStamp) {
long[] startPos = new long[splitNum];
long[] endPos = new long[splitNum];
for (int i = 0, len = startPos.length; i < len; i++) {
long size = i * (totalSize / len);
startPos[i] = size;
// 设置最后一个结束点的位置
if (i == len - 1) {
endPos[i] = totalSize;
} else {
size = (i + 1) * (totalSize / len);
endPos[i] = size;
}
}
FileCheckPoints chp = new FileCheckPoints();
chp.setEndPos(endPos);
chp.setStartPos(startPos);
chp.totalSize = totalSize;
chp.timestamp = timeStamp;
return chp;
}
private FileCheckPoints getInitedCheckPoint() {
long fileLength = -1;
long timeStamp = -1;
HttpURLConnection conn = null;
int stateCode = 0;
try {
URL url = new URL(this.info.getPair().remoteUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("Accept-Encoding", "identity");
HttpDownloader.RetriveSingleStream.setHeader(conn);
stateCode = conn.getResponseCode();
// 判断http status是否为HTTP/1.1 206 Partial Content或者200 OK
if (stateCode != HttpURLConnection.HTTP_OK
&& stateCode != HttpURLConnection.HTTP_PARTIAL) {
logger.warn(info.getPair().remoteUrl + " #Error Code:# "
+ stateCode);
fileLength = -2;
} else if (stateCode >= 400) {
logger.warn(info.getPair().remoteUrl + " #Error Code:# "
+ stateCode);
fileLength = -2;
} else {
// 获取长度
fileLength = conn.getContentLengthLong();
timeStamp = conn.getLastModified();
logger.info(info.getPair().remoteUrl + " #FileLength:# "
+ fileLength);
}
} catch (MalformedURLException e) {
// e.printStackTrace();
} catch (IOException e) {
// e.printStackTrace();
} finally {
if (conn != null) {
conn.disconnect();
}
}
FileCheckPoints chp;
if (fileLength > 0) {
chp = initCheckPoint(info.getSplitNum(), fileLength, timeStamp);
chp.timestamp = timeStamp;
} else {
chp = new FileCheckPoints();
chp.statecode = stateCode;
}
return chp;
}
/**
* bug fixed change the RandomAccessFile size
*
* @author burkun
*/
protected static class RetriveSingleStream implements Runnable {
private boolean isDone = false;
private FileCheckPoints chp;
private int curIndex;
private SaveFileItem file;
private long startPos;
private long endPos;
byte[] buffer = new byte[1024 * 12];
private IDownloadInfo __info;
private int maxRetry;
private Logger logger = LoggerFactory.getLogger(RetriveSingleStream.class);
public boolean isDone() {
return isDone;
}
public RetriveSingleStream(IDownloadInfo info, FileCheckPoints chp,
int curIndex, int maxRetry) {
this.__info = info;
this.chp = chp;
this.curIndex = curIndex;
this.startPos = chp.getStartPos()[curIndex];
this.endPos = chp.getEndPos()[curIndex];
this.maxRetry = maxRetry;
}
@Override
public void run() {
InputStream in = null;
HttpURLConnection conn = null;
int curRetry = 0;
while (curRetry < maxRetry && !isDone) {
try {
URL url = new URL(__info.getPair().remoteUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(10000);
conn.setReadTimeout(30000);
setHeader(conn);
String property = "bytes=" + startPos + "-";
conn.setRequestProperty("RANGE", property);
logger.info(__info.getPair().localName + " #Block"
+ (curIndex + 1) + "# begin downloading...");
int length;
long counter = 0;
InputStream is = conn.getInputStream();
file = new SaveFileItem(__info.getPair().getLocalFullPath(), startPos);
//--bug fixed
file.setLength(__info.getCurCheckPoints().totalSize);
//--bug fixed
while (!isDone && startPos < endPos && (length = is.read(buffer)) > 0) {
startPos += file.write(buffer, 0, length);
counter += 1;
chp.getStartPos()[curIndex] = Math.min(startPos, endPos);
if (counter % 20 == 0) {
__info.writeInfo(chp);
logger.info(__info.getPair().remoteUrl + " #Block"
+ (curIndex + 1) + "# download "
+ getPercentage() + "%...");
Thread.yield();
}
}
__info.writeInfo(chp);
isDone = true;
} catch (IOException e) {
isDone = false;
logger.debug(__info.getPair().remoteUrl, e);
} finally {
if (!isDone) {
curRetry++;
logger.debug(__info.getPair().remoteUrl + " download failed, retry again!");
if (curRetry >= maxRetry) {
//保证循环跳出
isDone = true;
}
} else {
curRetry = maxRetry;
}
try {
if (in != null) {
in.close();
}
if (file != null) {
file.close();
}
if (conn != null) {
conn.disconnect();
}
} catch (IOException e) {
logger.debug(__info.getPair().remoteUrl, e);
}
}
}
}
public static void setHeader(URLConnection conn) {
conn.setRequestProperty(
"User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 BIDUBrowser/7.0 Safari/537.36");
conn.setRequestProperty("Accept-Language",
"en-us,en;q=0.7,zh-cn;q=0.3");
conn.setRequestProperty("Accept-Encoding", "utf-8");
conn.setRequestProperty("Accept-Charset",
"ISO-8859-1,utf-8;q=0.7,*;q=0.7");
conn.setRequestProperty("Keep-Alive", "300");
conn.setRequestProperty("connnection", "keep-alive");
// conn.setRequestProperty("If-Modified-Since",
// "Fri, 02 Jan 2009 17:00:05 GMT");
// conn.setRequestProperty("If-None-Match",
// "\"1261d8-4290-df64d224\"");
conn.setRequestProperty("Cache-conntrol", "max-age=0");
conn.setRequestProperty("Referer", "http://www.baidu.com");
}
private int getPercentage() {
long total = 0;
for (int i = 0; i < chp.getSplit(); i++) {
total += chp.getEndPos()[i] - chp.getStartPos()[i];
}
return (int) ((chp.totalSize - total) * 100 / chp.totalSize);
}
}
private Timestamp getNewtime() {
Date now = new Date();
Timestamp timestamp = new Timestamp(now.getTime());
return timestamp;
}
// public static void main(String[] args) {
// String url="http://ss,ss,https://bbbbb;http://ccc";
// List<String> separators = new ArrayList<>();
// separators.add(",");
// separators.add(";");
// String[] urls = url.split("http://|https://");
// for (int i = 0; i < urls.length; i++) {
// String urltemp = "";
// if (url.indexOf("http://" + urls[i]) >= 0) {
// urltemp="http://" + urls[i];
// }
// if (url.indexOf("https://" + urls[i]) >= 0) {
// urltemp="https://" + urls[i];
// }
// for(String o:separators){
// urltemp=urltemp.replaceAll(o+"$", "");
// }
// System.out.println(urltemp);
// }
// }
/**
* 覆盖java默认的证书验证
*/
private final TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException {
}
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
}};
/**
* 设置不验证主机
*/
private final HostnameVerifier DO_NOT_VERIFY = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
/**
* 信任所有
*
* @param connection
* @return
*/
private SSLSocketFactory trustAllHosts(HttpsURLConnection connection) {
SSLSocketFactory oldFactory = connection.getSSLSocketFactory();
try {
SSLContext sc = SSLContext.getInstance("TLS");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
SSLSocketFactory newFactory = sc.getSocketFactory();
connection.setSSLSocketFactory(newFactory);
} catch (Exception e) {
e.printStackTrace();
}
return oldFactory;
}
}