Node.js 网页抓取与数据采集

 3个月前     2  

文章目录

var request = require('request');
var cheerio = require('cheerio');
var Promise = require("bluebird");

var url = 'http://kickass.so/search/yify/';
var getData = new Promise(function(resolve, reject) {
    request('http://kickass.so/search/yify/', function(error, response, body) {
        if(!error && response.statusCode == 200) {
            resolve(body);
        } else {
            reject(Error('fatch failed!'));
        }
    });
});

function getMagnet(url) {
    return new Promise(function(resolve, reject) {
        request(url, function(error, response, body) {
            if(!error && response.statusCode == 200) {
                resolve(body);
            } else {
                reject(Error('fatch failed!'));
            }
        });
    });
}

getData.then(function(data) {
    var $ = cheerio.load(data);
    var arr = [];
    $('.torrentname').each(function() {
        var $this = $(this);
        var title = $this.find('.cellMainLink').text();
        var link = $this.find('.cellMainLink').attr('href');
        arr.push({'title': title, 'src': link});
    });
    return arr;
}).then(function(data) {
    //console.log(data);
    data.forEach(function(row) {
        var src = 'http://kickass.so' + row.src;
        getMagnet(src).then(function(result) {
            var $ = cheerio.load(result);
            var magnetLink = $('.magnetlinkButton').attr('href');
            return magnetLink;
        }).then(function(result) {
            console.log('fetch:' + result + '\n');
        });
    });
});

暂无评论

暂无评论...