node使用代理ip抓取
emer 发布于 2019-2-3 17:31 2332 次阅读
var request = require("request"); var iconv = require('iconv-lite'); var Promise = require("bluebird"); function getProxyList() { var apiURL = 'http://www.66ip.cn/mo.php?sxb=&tqsl=100&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=http%3A%2F%2Fwww.66ip.cn%2F%3Fsxb%3D%26tqsl%3D100%26ports%255B%255D2%3D%26ktip%3D%26sxa%3D%26radio%3Dradio%26submit%3D%25CC%25E1%2B%2B%25C8%25A1'; return new Promise((resolve, reject) => { var options = { method: 'GET', url: apiURL, gzip: true, encoding: null, headers: { // 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', //'Accept-Encoding': 'gzip, deflate', //'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', // 'User-Agent': 'Mozilla/8.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36', // 'referer': 'http://www.66ip.cn/', //'cookie':'yd_cookie=05e6639e-848d-48585a8dd83b82f4a8c7d2cf196d74d83ee8; Hm_lvt_1761fabf3c988e7f04bec51acd4073f4=1549174243; Hm_lpvt_1761fabf3c988e7f04bec51acd4073f4=1549174380; _ydclearance=562d0b4456ca1cc873e260da-4915-47f9-9b27-ae7cecbbce3d-1549191147' 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': 1, 'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3423.2 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'http://www.66daili.cn/', 'Accept-Encoding':'gzip, deflate', 'Accept-Language':'zh-CN,zh;q=0.9', 'Cookie': 'yd_cookie=05e6639e-848d-48585a8dd83b82f4a8c7d2cf196d74d83ee8; Hm_lvt_1761fabf3c988e7f04bec51acd4073f4=1549174243; Hm_lpvt_1761fabf3c988e7f04bec51acd4073f4=1549174380; _ydclearance=562d0b4456ca1cc873e260da-4915-47f9-9b27-ae7cecbbce3d-1549191147' }, }; request(options, function (error, response, body) { try { if (error) throw error; if (/meta.*charset=gb2312/.test(body)) { body = iconv.decode(body, 'gb2312'); } var ret = body.match(/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,4}/g); resolve(ret); } catch (e) { return reject(e); } }); }) } getProxyList().then(function (proxyList) { var targetOptions = { method: 'GET', url: 'http://lininn.cn', timeout: 8000, encoding: null, }; proxyList.forEach(function (proxyurl) { console.log(`testing ${proxyurl}`); targetOptions.proxy = 'http://' + proxyurl; request(targetOptions, function (error, response, body) { try { if (error) throw error; body = body.toString(); console.log(body); eval(`var ret = ${body}`); if (ret) { console.log(` ${ret.address}`); } } catch (e) { console.error('error'+e); } }); }); }).catch(e => { console.log(e); })