node抓取数据
emer 发布于 2018-4-10 18:06 2393 次阅读
var express=require('express');//引入模块 var cheerio=require('cheerio'); var superagent=require('superagent'); var app=express(); app.get('/',function(req,res,next){ superagent.get('http://weibo.com').set("Cookie","SINA90.109.1522659071912; login_sid_t=01a3d08aa6089968c7750ef2a6e38c04; cross_origin_proto=SSL; _s_tentry=passport.weibo.com; Apache=6641918621807.266.1523346097373; ULV=1523346097379:2:2:1:6641918621807.266.1523346097373:1522659072853; UOR=www.techweb.com.cn,widget.weibo.com,www.baidu.com; SSOLoginState=1523351767; un=liniaa@163.com; wvr=6; ALF=1554889803; SCF=AoHqnKOeWcLsQEwaxTXfQF_YygLUZhacT1TwPe4ViI4RbdZZKZOn4DpEgrZvQWohP47mQFysFHqEhPC58bjHLoM.; SUB=_2A253yPicDeRhGeNL7VAS9SrJzzSIHXVUvG1UrDV8PUNbmtBeLWXgkW9NSOLl0AbuhaVab2o7QYleQBeBoYbWK6aE; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhVFxvDs7nChvqxTlEXMORv5JpX5KzhUgL.Fo-fSoz0SKBfShn2dJLoI7LjIg4DdJ4kqcRt; SUHB=0d4ug0wyra6Tpg") .end(function(err,sres){//页面获取到的数据 if(err) return next(err); var $=cheerio.load(sres.text);//用cheerio解析页面数据 var arr=[]; $(".ulist.focuslistnews").each(function(index,element){//下面类似于jquery的操作,前端的小伙伴们肯定很熟悉啦 console.log(index); var $eleItem=$(element).find('.bold-item a'); var $eleItemSon=$(element).find('.bold-item ~ li a') arr.push( { title: $eleItem.text(), href: $eleItem.attr('href'), item:{ title: $eleItemSon.text(), href: $eleItemSon.attr('href') } } ); }); res.send(sres); }) }); app.listen(8888, function () { console.log('抓取成功~~~'); });