Finished web scraping example with a hacker news top 30 links
This commit is contained in:
2
app.js
2
app.js
@ -64,7 +64,7 @@ app.get('/api', api.getApi);
|
|||||||
app.get('/api/foursquare', passportConf.ensureAuthenticated, api.getFoursquare);
|
app.get('/api/foursquare', passportConf.ensureAuthenticated, api.getFoursquare);
|
||||||
app.get('/api/tumblr', passportConf.ensureAuthenticated, api.getTumblr);
|
app.get('/api/tumblr', passportConf.ensureAuthenticated, api.getTumblr);
|
||||||
app.get('/api/facebook', passportConf.ensureAuthenticated, api.getFacebook);
|
app.get('/api/facebook', passportConf.ensureAuthenticated, api.getFacebook);
|
||||||
app.get('/api/scraping', passportConf.ensureAuthenticated, api.getScraping);
|
app.get('/api/scraping', api.getScraping);
|
||||||
|
|
||||||
app.get('/contact', contact.getContact);
|
app.get('/contact', contact.getContact);
|
||||||
app.post('/contact', contact.postContact);
|
app.post('/contact', contact.postContact);
|
||||||
|
@ -2,6 +2,7 @@ var config = require('../config/config');
|
|||||||
var User = require('../models/User');
|
var User = require('../models/User');
|
||||||
var async = require('async');
|
var async = require('async');
|
||||||
var cheerio = require('cheerio');
|
var cheerio = require('cheerio');
|
||||||
|
var request = require('request');
|
||||||
var _ = require('underscore');
|
var _ = require('underscore');
|
||||||
var geoip = require('geoip-lite');
|
var geoip = require('geoip-lite');
|
||||||
var FB = require('fb');
|
var FB = require('fb');
|
||||||
@ -105,8 +106,16 @@ exports.getFacebook = function(req, res) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
exports.getScraping = function(req, res) {
|
exports.getScraping = function(req, res) {
|
||||||
|
request.get('https://news.ycombinator.com/', function(error, request, body) {
|
||||||
|
var $ = cheerio.load(body);
|
||||||
|
var links = [];
|
||||||
|
$('.title').find('a').slice(0,30).each(function(i, elem) {
|
||||||
|
links.push($(elem));
|
||||||
|
});
|
||||||
res.render('api/scraping', {
|
res.render('api/scraping', {
|
||||||
title: 'Web Scraping',
|
title: 'Web Scraping',
|
||||||
|
links: links,
|
||||||
user: req.user
|
user: req.user
|
||||||
});
|
});
|
||||||
|
});
|
||||||
};
|
};
|
@ -540,6 +540,10 @@ label-info {
|
|||||||
margin-bottom: 0;
|
margin-bottom: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
span {
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
// Datatables ================================================================
|
// Datatables ================================================================
|
||||||
|
|
||||||
// Search field
|
// Search field
|
||||||
|
BIN
public/img/hacker_news.png
Normal file
BIN
public/img/hacker_news.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 762 B |
@ -12,3 +12,18 @@ block content
|
|||||||
a.btn.btn-lg.btn-primary(href='https://developer.foursquare.com/docs/explore', target='_blank')
|
a.btn.btn-lg.btn-primary(href='https://developer.foursquare.com/docs/explore', target='_blank')
|
||||||
i.fa.fa-film
|
i.fa.fa-film
|
||||||
| Cheerio Screencast
|
| Cheerio Screencast
|
||||||
|
|
||||||
|
h3
|
||||||
|
img(src='/img/hacker_news.png', width=50, height=50)
|
||||||
|
span Hacker News Frontpage
|
||||||
|
|
||||||
|
table.table.table-condensed
|
||||||
|
thead
|
||||||
|
tr
|
||||||
|
th №
|
||||||
|
th Title
|
||||||
|
tbody
|
||||||
|
each link, index in links
|
||||||
|
tr
|
||||||
|
td= index + 1
|
||||||
|
td!= link
|
Reference in New Issue
Block a user