package com.ann.utils.downLoader; import com.alibaba.fastjson.JSON; import com.ann.utils.downLoader.download.CallBackPara; import com.ann.utils.downLoader.download.FileCheckPoints; import com.ann.utils.downLoader.download.IDownCallBack; import com.ann.utils.downLoader.download.IDownloadInfo; import com.ann.utils.downLoader.download.MultiDownFile; import com.ann.utils.downLoader.download.SaveFileItem; import lombok.extern.slf4j.Slf4j; import org.apache.commons.httpclient.util.URIUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.security.cert.CertificateException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * */ @Slf4j public class HttpDownloader extends Thread { private Logger logger = LoggerFactory.getLogger(HttpDownloader.class); private IDownloadInfo info; private int maxRetry = 5; private IDownCallBack downCallBack; private String userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"; public HttpDownloader(IDownloadInfo info, int maxRetry) { this.info = info; this.maxRetry = maxRetry; } /** * 初始化回调方法 * * @param downCallBack */ public void intiCallBack(IDownCallBack downCallBack) { this.downCallBack = downCallBack; } public HttpDownloader(IDownloadInfo info) { this.info = info; } public String[] removeArraysEmpty(String[] arr) { return Arrays.stream(arr).filter(s -> !"".equals(s)).toArray(String[]::new); } @Override public void run() { CallBackPara para = new CallBackPara(); para.setId(info.getPair().id); para.setFilename(info.getPair().localName); para.setLocalpath(info.getPair().localPath); para.setStarttime(getNewtime()); para.setFileStorageFormat(info.getPair().fileStorageFormat); para.setDatatype(info.getPair().datatype); String url = info.getPair().remoteUrl; List files = new ArrayList<>(); String[] urls = removeArraysEmpty(url.split("http://|https://")); if (urls.length > 1) { for (int i = 0; i < urls.length; i++) { String urltemp = ""; if (url.indexOf("http://" + urls[i]) >= 0) { urltemp = "http://" + urls[i]; } if (url.indexOf("https://" + urls[i]) >= 0) { urltemp = "https://" + urls[i]; } for (String o : this.info.getPair().separators) { urltemp = urltemp.replaceAll(o + "$", ""); } files.add(new MultiDownFile(urltemp, info.getPair().localPath, "docustemp_" + i + "_" + info.getPair().localName)); } } else { files.add(new MultiDownFile(url, info.getPair().localPath, info.getPair().localName)); } para.setFiles(files); if (info.getPair().proxyurls != null) { Pattern p = Pattern.compile(String.join("|", info.getPair().proxyurls)); for (MultiDownFile o : files) { Matcher matcher = p.matcher(o.getRemoteUrl()); if (matcher.find()) { o.setRemoteUrl(String.format(info.getPair().failurl, o.getRemoteUrl())); } } } // URLHttpDownBootstrapBuilder builder=null; // HttpDownBootstrap bootstrap; try { for (MultiDownFile file : files) { // try { // url = EncoderUrl(file.getRemoteUrl()); // } catch (Exception e) { // // } downLoadFromUrl(url, file.getLocalName(), file.getLocalPath()); if (downCallBack != null) { downCallBack.success(para); } //防止过快,第三链接无法支持 try { Thread.sleep(100); } catch (Exception e) { } // builder = HttpDownBootstrap.builder(url); // builder.downConfig(new HttpDownConfigInfo() // .setFilePath(file.getLocalPath()) // ).callBackPara(para); // builder.response(new HttpResponseInfo(file.getLocalName())); // bootstrap = builder.callback(new ConsoleHttpDownCallback()).build(); // bootstrap.start(); // bootstrap = null; // builder = null; } } catch (Exception e) { // e.printStackTrace(); log.error("nio下载失败," + JSON.toJSONString(para) + ";失败信息:" + e.getMessage()); if (downCallBack != null) { downCallBack.fail(para); } } // finally { // bootstrap = null; // builder = null; // } } public String EncoderUrl(String url) throws UnsupportedEncodingException { String resultURL = ""; for (int i = 0; i < url.length(); i++) { char charAt = url.charAt(i); //只对汉字处理 if (isChineseChar(charAt)) { String encode = URLEncoder.encode(charAt + "", "UTF-8"); resultURL += encode; } else { resultURL += charAt; } } return resultURL; } public boolean isChineseChar(char c) { return String.valueOf(c).matches("[\u4e00-\u9fa5]"); } /** * 处理多级302等跳转 * * @param uc * @return * @throws Exception */ private HttpURLConnection reload(HttpURLConnection uc) throws Exception { HttpURLConnection huc = uc; if (huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP || huc.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM) {// 302, 301 String url = huc.getHeaderField("Location"); url = url.replace("\\", "/"); return reload((HttpURLConnection) new URL(url).openConnection()); } return uc; } public void downLoadFromUrl(String urlStr, String fileName, String savePath) throws Exception { long start = System.currentTimeMillis(); urlStr = urlStr.replace("\\", "/"); urlStr = URIUtil.encodePathQuery(urlStr); URL url = new URL(urlStr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); try { boolean useHttps = urlStr.toLowerCase().startsWith("https"); if (useHttps) { HttpsURLConnection https = (HttpsURLConnection) conn; trustAllHosts(https); https.setHostnameVerifier(DO_NOT_VERIFY); } //设置超时间为3秒 //防止屏蔽程序抓取而返回403错误 conn.setRequestProperty("User-Agent", userAgent); conn.setRequestProperty("Accept-Encoding", "identity"); conn.setConnectTimeout(8 * 1000); conn.setReadTimeout(8 * 1000); conn = reload(conn); long length = conn.getContentLength(); if (length < 0) { String values = conn.getHeaderField("Content-Length"); if (values != null && !values.isEmpty()) { length = Long.parseLong(values); } } // log.info(urlStr+" 文件大小:"+length); InputStream inputStream = null; if (conn.getResponseCode() >= 400) { throw new Exception("文件不存在"); // inputStream = conn.getErrorStream(); } else { inputStream = conn.getInputStream(); } //得到输入流 //InputStream inputStream = conn.getInputStream(); //获取自己数组 byte[] getData = readInputStream(inputStream); //文件保存位置 File saveDir = new File(savePath); if (!saveDir.exists()) { saveDir.mkdirs(); } File file = new File(saveDir + File.separator + fileName); FileOutputStream fos = new FileOutputStream(file); fos.write(getData); if (fos != null) { fos.close(); } if (inputStream != null) { inputStream.close(); } long end = System.currentTimeMillis(); logger.info("info:" + url + " download success;用时:" + (end - start) + "ms"); } catch (Exception ex) { throw ex; } finally { // 断开连接,释放资源 conn.disconnect(); } } /** * 从输入流中获取字节数组 * * @param inputStream * @return * @throws IOException */ public byte[] readInputStream(InputStream inputStream) throws IOException { byte[] buffer = new byte[1024]; int len = 0; ByteArrayOutputStream bos = new ByteArrayOutputStream(); while ((len = inputStream.read(buffer)) != -1) { bos.write(buffer, 0, len); } bos.close(); return bos.toByteArray(); } public static FileCheckPoints initCheckPoint(int splitNum, long totalSize, long timeStamp) { long[] startPos = new long[splitNum]; long[] endPos = new long[splitNum]; for (int i = 0, len = startPos.length; i < len; i++) { long size = i * (totalSize / len); startPos[i] = size; // 设置最后一个结束点的位置 if (i == len - 1) { endPos[i] = totalSize; } else { size = (i + 1) * (totalSize / len); endPos[i] = size; } } FileCheckPoints chp = new FileCheckPoints(); chp.setEndPos(endPos); chp.setStartPos(startPos); chp.totalSize = totalSize; chp.timestamp = timeStamp; return chp; } private FileCheckPoints getInitedCheckPoint() { long fileLength = -1; long timeStamp = -1; HttpURLConnection conn = null; int stateCode = 0; try { URL url = new URL(this.info.getPair().remoteUrl); conn = (HttpURLConnection) url.openConnection(); conn.setRequestProperty("Accept-Encoding", "identity"); HttpDownloader.RetriveSingleStream.setHeader(conn); stateCode = conn.getResponseCode(); // 判断http status是否为HTTP/1.1 206 Partial Content或者200 OK if (stateCode != HttpURLConnection.HTTP_OK && stateCode != HttpURLConnection.HTTP_PARTIAL) { logger.warn(info.getPair().remoteUrl + " #Error Code:# " + stateCode); fileLength = -2; } else if (stateCode >= 400) { logger.warn(info.getPair().remoteUrl + " #Error Code:# " + stateCode); fileLength = -2; } else { // 获取长度 fileLength = conn.getContentLengthLong(); timeStamp = conn.getLastModified(); logger.info(info.getPair().remoteUrl + " #FileLength:# " + fileLength); } } catch (MalformedURLException e) { // e.printStackTrace(); } catch (IOException e) { // e.printStackTrace(); } finally { if (conn != null) { conn.disconnect(); } } FileCheckPoints chp; if (fileLength > 0) { chp = initCheckPoint(info.getSplitNum(), fileLength, timeStamp); chp.timestamp = timeStamp; } else { chp = new FileCheckPoints(); chp.statecode = stateCode; } return chp; } /** * bug fixed change the RandomAccessFile size * * @author burkun */ protected static class RetriveSingleStream implements Runnable { private boolean isDone = false; private FileCheckPoints chp; private int curIndex; private SaveFileItem file; private long startPos; private long endPos; byte[] buffer = new byte[1024 * 12]; private IDownloadInfo __info; private int maxRetry; private Logger logger = LoggerFactory.getLogger(RetriveSingleStream.class); public boolean isDone() { return isDone; } public RetriveSingleStream(IDownloadInfo info, FileCheckPoints chp, int curIndex, int maxRetry) { this.__info = info; this.chp = chp; this.curIndex = curIndex; this.startPos = chp.getStartPos()[curIndex]; this.endPos = chp.getEndPos()[curIndex]; this.maxRetry = maxRetry; } @Override public void run() { InputStream in = null; HttpURLConnection conn = null; int curRetry = 0; while (curRetry < maxRetry && !isDone) { try { URL url = new URL(__info.getPair().remoteUrl); conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(10000); conn.setReadTimeout(30000); setHeader(conn); String property = "bytes=" + startPos + "-"; conn.setRequestProperty("RANGE", property); logger.info(__info.getPair().localName + " #Block" + (curIndex + 1) + "# begin downloading..."); int length; long counter = 0; InputStream is = conn.getInputStream(); file = new SaveFileItem(__info.getPair().getLocalFullPath(), startPos); //--bug fixed file.setLength(__info.getCurCheckPoints().totalSize); //--bug fixed while (!isDone && startPos < endPos && (length = is.read(buffer)) > 0) { startPos += file.write(buffer, 0, length); counter += 1; chp.getStartPos()[curIndex] = Math.min(startPos, endPos); if (counter % 20 == 0) { __info.writeInfo(chp); logger.info(__info.getPair().remoteUrl + " #Block" + (curIndex + 1) + "# download " + getPercentage() + "%..."); Thread.yield(); } } __info.writeInfo(chp); isDone = true; } catch (IOException e) { isDone = false; logger.debug(__info.getPair().remoteUrl, e); } finally { if (!isDone) { curRetry++; logger.debug(__info.getPair().remoteUrl + " download failed, retry again!"); if (curRetry >= maxRetry) { //保证循环跳出 isDone = true; } } else { curRetry = maxRetry; } try { if (in != null) { in.close(); } if (file != null) { file.close(); } if (conn != null) { conn.disconnect(); } } catch (IOException e) { logger.debug(__info.getPair().remoteUrl, e); } } } } public static void setHeader(URLConnection conn) { conn.setRequestProperty( "User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 BIDUBrowser/7.0 Safari/537.36"); conn.setRequestProperty("Accept-Language", "en-us,en;q=0.7,zh-cn;q=0.3"); conn.setRequestProperty("Accept-Encoding", "utf-8"); conn.setRequestProperty("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7"); conn.setRequestProperty("Keep-Alive", "300"); conn.setRequestProperty("connnection", "keep-alive"); // conn.setRequestProperty("If-Modified-Since", // "Fri, 02 Jan 2009 17:00:05 GMT"); // conn.setRequestProperty("If-None-Match", // "\"1261d8-4290-df64d224\""); conn.setRequestProperty("Cache-conntrol", "max-age=0"); conn.setRequestProperty("Referer", "http://www.baidu.com"); } private int getPercentage() { long total = 0; for (int i = 0; i < chp.getSplit(); i++) { total += chp.getEndPos()[i] - chp.getStartPos()[i]; } return (int) ((chp.totalSize - total) * 100 / chp.totalSize); } } private Timestamp getNewtime() { Date now = new Date(); Timestamp timestamp = new Timestamp(now.getTime()); return timestamp; } // public static void main(String[] args) { // String url="http://ss,ss,https://bbbbb;http://ccc"; // List separators = new ArrayList<>(); // separators.add(","); // separators.add(";"); // String[] urls = url.split("http://|https://"); // for (int i = 0; i < urls.length; i++) { // String urltemp = ""; // if (url.indexOf("http://" + urls[i]) >= 0) { // urltemp="http://" + urls[i]; // } // if (url.indexOf("https://" + urls[i]) >= 0) { // urltemp="https://" + urls[i]; // } // for(String o:separators){ // urltemp=urltemp.replaceAll(o+"$", ""); // } // System.out.println(urltemp); // } // } /** * 覆盖java默认的证书验证 */ private final TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() { @Override public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException { } @Override public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String s) throws CertificateException { } public java.security.cert.X509Certificate[] getAcceptedIssuers() { return new java.security.cert.X509Certificate[]{}; } }}; /** * 设置不验证主机 */ private final HostnameVerifier DO_NOT_VERIFY = new HostnameVerifier() { public boolean verify(String hostname, SSLSession session) { return true; } }; /** * 信任所有 * * @param connection * @return */ private SSLSocketFactory trustAllHosts(HttpsURLConnection connection) { SSLSocketFactory oldFactory = connection.getSSLSocketFactory(); try { SSLContext sc = SSLContext.getInstance("TLS"); sc.init(null, trustAllCerts, new java.security.SecureRandom()); SSLSocketFactory newFactory = sc.getSocketFactory(); connection.setSSLSocketFactory(newFactory); } catch (Exception e) { e.printStackTrace(); } return oldFactory; } }