Finished web scraping example with a hacker news top 30 links

This commit is contained in:
Sahat Yalkabov
2013-12-07 18:53:00 -05:00
parent 5b99baa007
commit 44c90b7156
5 changed files with 32 additions and 4 deletions

2
app.js
View File

@ -64,7 +64,7 @@ app.get('/api', api.getApi);
app.get('/api/foursquare', passportConf.ensureAuthenticated, api.getFoursquare); app.get('/api/foursquare', passportConf.ensureAuthenticated, api.getFoursquare);
app.get('/api/tumblr', passportConf.ensureAuthenticated, api.getTumblr); app.get('/api/tumblr', passportConf.ensureAuthenticated, api.getTumblr);
app.get('/api/facebook', passportConf.ensureAuthenticated, api.getFacebook); app.get('/api/facebook', passportConf.ensureAuthenticated, api.getFacebook);
app.get('/api/scraping', passportConf.ensureAuthenticated, api.getScraping); app.get('/api/scraping', api.getScraping);
app.get('/contact', contact.getContact); app.get('/contact', contact.getContact);
app.post('/contact', contact.postContact); app.post('/contact', contact.postContact);

View File

@ -2,6 +2,7 @@ var config = require('../config/config');
var User = require('../models/User'); var User = require('../models/User');
var async = require('async'); var async = require('async');
var cheerio = require('cheerio'); var cheerio = require('cheerio');
var request = require('request');
var _ = require('underscore'); var _ = require('underscore');
var geoip = require('geoip-lite'); var geoip = require('geoip-lite');
var FB = require('fb'); var FB = require('fb');
@ -105,8 +106,16 @@ exports.getFacebook = function(req, res) {
}; };
exports.getScraping = function(req, res) { exports.getScraping = function(req, res) {
res.render('api/scraping', { request.get('https://news.ycombinator.com/', function(error, request, body) {
title: 'Web Scraping', var $ = cheerio.load(body);
user: req.user var links = [];
$('.title').find('a').slice(0,30).each(function(i, elem) {
links.push($(elem));
});
res.render('api/scraping', {
title: 'Web Scraping',
links: links,
user: req.user
});
}); });
}; };

View File

@ -540,6 +540,10 @@ label-info {
margin-bottom: 0; margin-bottom: 0;
} }
span {
vertical-align: middle;
}
// Datatables ================================================================ // Datatables ================================================================
// Search field // Search field

BIN
public/img/hacker_news.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 762 B

View File

@ -12,3 +12,18 @@ block content
a.btn.btn-lg.btn-primary(href='https://developer.foursquare.com/docs/explore', target='_blank') a.btn.btn-lg.btn-primary(href='https://developer.foursquare.com/docs/explore', target='_blank')
i.fa.fa-film i.fa.fa-film
| Cheerio Screencast | Cheerio Screencast
h3
img(src='/img/hacker_news.png', width=50, height=50)
span Hacker News Frontpage
table.table.table-condensed
thead
tr
th №
th Title
tbody
each link, index in links
tr
td= index + 1
td!= link