«

node抓取数据

emer 发布于 2018-4-10 18:06   2393 次阅读     


var express=require('express');//引入模块
var cheerio=require('cheerio');
var superagent=require('superagent');
var app=express();

app.get('/',function(req,res,next){
    superagent.get('http://weibo.com').set("Cookie","SINA90.109.1522659071912; login_sid_t=01a3d08aa6089968c7750ef2a6e38c04; cross_origin_proto=SSL; _s_tentry=passport.weibo.com; Apache=6641918621807.266.1523346097373; ULV=1523346097379:2:2:1:6641918621807.266.1523346097373:1522659072853; UOR=www.techweb.com.cn,widget.weibo.com,www.baidu.com; SSOLoginState=1523351767; un=liniaa@163.com; wvr=6; ALF=1554889803; SCF=AoHqnKOeWcLsQEwaxTXfQF_YygLUZhacT1TwPe4ViI4RbdZZKZOn4DpEgrZvQWohP47mQFysFHqEhPC58bjHLoM.; SUB=_2A253yPicDeRhGeNL7VAS9SrJzzSIHXVUvG1UrDV8PUNbmtBeLWXgkW9NSOLl0AbuhaVab2o7QYleQBeBoYbWK6aE; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhVFxvDs7nChvqxTlEXMORv5JpX5KzhUgL.Fo-fSoz0SKBfShn2dJLoI7LjIg4DdJ4kqcRt; SUHB=0d4ug0wyra6Tpg")
        .end(function(err,sres){//页面获取到的数据
            if(err) return next(err);

            var $=cheerio.load(sres.text);//用cheerio解析页面数据
            var arr=[];

            $(".ulist.focuslistnews").each(function(index,element){//下面类似于jquery的操作,前端的小伙伴们肯定很熟悉啦
                console.log(index);
                var $eleItem=$(element).find('.bold-item a');
                var $eleItemSon=$(element).find('.bold-item ~ li a')
                arr.push(
                    {
                        title: $eleItem.text(),
                        href: $eleItem.attr('href'),
                        item:{
                            title: $eleItemSon.text(),
                            href: $eleItemSon.attr('href')
                        }
                    }
                );
            });
            res.send(sres);
        })
    });
app.listen(8888, function () {
    console.log('抓取成功~~~');
});