From 44c90b715675e5b990214bd5eeb817f7ddb7501f Mon Sep 17 00:00:00 2001 From: Sahat Yalkabov Date: Sat, 7 Dec 2013 18:53:00 -0500 Subject: [PATCH] Finished web scraping example with a hacker news top 30 links --- app.js | 2 +- controllers/api.js | 15 ++++++++++++--- public/css/ios7.less | 4 ++++ public/img/hacker_news.png | Bin 0 -> 762 bytes views/api/scraping.jade | 15 +++++++++++++++ 5 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 public/img/hacker_news.png diff --git a/app.js b/app.js index 58bea2d12a..0f461ac0f8 100755 --- a/app.js +++ b/app.js @@ -64,7 +64,7 @@ app.get('/api', api.getApi); app.get('/api/foursquare', passportConf.ensureAuthenticated, api.getFoursquare); app.get('/api/tumblr', passportConf.ensureAuthenticated, api.getTumblr); app.get('/api/facebook', passportConf.ensureAuthenticated, api.getFacebook); -app.get('/api/scraping', passportConf.ensureAuthenticated, api.getScraping); +app.get('/api/scraping', api.getScraping); app.get('/contact', contact.getContact); app.post('/contact', contact.postContact); diff --git a/controllers/api.js b/controllers/api.js index 75ac6c670b..16357e8943 100644 --- a/controllers/api.js +++ b/controllers/api.js @@ -2,6 +2,7 @@ var config = require('../config/config'); var User = require('../models/User'); var async = require('async'); var cheerio = require('cheerio'); +var request = require('request'); var _ = require('underscore'); var geoip = require('geoip-lite'); var FB = require('fb'); @@ -105,8 +106,16 @@ exports.getFacebook = function(req, res) { }; exports.getScraping = function(req, res) { - res.render('api/scraping', { - title: 'Web Scraping', - user: req.user + request.get('https://news.ycombinator.com/', function(error, request, body) { + var $ = cheerio.load(body); + var links = []; + $('.title').find('a').slice(0,30).each(function(i, elem) { + links.push($(elem)); + }); + res.render('api/scraping', { + title: 'Web Scraping', + links: links, + user: req.user + }); }); }; \ No newline at end of file diff --git a/public/css/ios7.less b/public/css/ios7.less index 5fccf63563..51b4e7b1cc 100644 --- a/public/css/ios7.less +++ b/public/css/ios7.less @@ -540,6 +540,10 @@ label-info { margin-bottom: 0; } +span { + vertical-align: middle; +} + // Datatables ================================================================ // Search field diff --git a/public/img/hacker_news.png b/public/img/hacker_news.png new file mode 100644 index 0000000000000000000000000000000000000000..45802e1dd65d617e01dbff296aeecb3054489b95 GIT binary patch literal 762 zcmXw%eJs>*0LH(+dvjf#Zn5DeuJ!)9Qj}7=lb55&YspI~HX_wJ)L8m;DaTnyFV+@o zm6E)~mX1rTQYn_aw7A{~aX7=M)OAm1t!I0l=a1*V&z?*_Ur#_(1*Rr&i$+B*rhg)QJ_=4i zeGQ|7IIs|TNAS21K4uvD3RN`vKOi#<-5rqoL)(m0DcYYcoZZxKVs*vKO%ilctAGD8 zz*QnR*JF2_^NP#0lD%;r$d{!-T~T`I!AFH(UQEyQMs-ofiaNbw?F;Lu>~;TQn?q=P zy~FjIP=j(mv$~b^N|?2=^!N?RNUS5}vEpTOI9IuI1Xb5VY}wL5HJ>fLGqSOOq862G zH)l6mjlA8*q6!NGZPC63||Gp0O!-%kIq*vV1t)mCA}pUehNbuXGEPjOe!S zIo@*8R#wE~2~=Hy56oCO3cJ)WVkqU%$D}Q%L^Nyaql&>YXsij