chore(cleanup): remove search indexing tools (#40070)
This commit is contained in:
committed by
GitHub
parent
6b45967997
commit
53ad373e47
@ -4,7 +4,6 @@
|
||||
"client",
|
||||
"client/plugins/*",
|
||||
"curriculum",
|
||||
"tools/search-indexing",
|
||||
"tools/challenge-md-parser",
|
||||
"tools/scripts/seed",
|
||||
"tools/scripts/build",
|
||||
|
@ -21,9 +21,6 @@ JWT_SECRET=a_jwt_secret
|
||||
# ---------------------
|
||||
# Search
|
||||
# ---------------------
|
||||
# Indexing
|
||||
ALGOLIA_ADMIN_KEY=admin_key_from_algolia_dashboard
|
||||
GHOST_CLIENT_KEY=client_key_from_ghost_dashboard
|
||||
# Client Search Bar
|
||||
ALGOLIA_APP_ID=app_id_from_algolia_dashboard
|
||||
ALGOLIA_API_KEY=api_key_from_algolia_dashboard
|
||||
|
@ -1,9 +0,0 @@
|
||||
const algoliasearch = require('algoliasearch');
|
||||
const path = require('path');
|
||||
|
||||
const envPath = path.resolve(__dirname, '../.env');
|
||||
require('dotenv').config({ path: envPath });
|
||||
|
||||
const { ALGOLIA_ADMIN_KEY, ALGOLIA_APP_ID } = process.env;
|
||||
|
||||
exports.client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_ADMIN_KEY);
|
@ -1,40 +0,0 @@
|
||||
const { client } = require('../../algolia');
|
||||
const debug = require('debug');
|
||||
const getChallengeData = require('../../data-sources/challenges');
|
||||
|
||||
const log = debug('fcc:search:init:challenge');
|
||||
|
||||
const index = client.initIndex('challenge');
|
||||
|
||||
index.setSettings(
|
||||
{
|
||||
searchableAttributes: ['title', 'description', 'blockName'],
|
||||
distinct: true,
|
||||
attributeForDistinct: 'id',
|
||||
attributesForFaceting: ['blockName']
|
||||
},
|
||||
(err, response) => {
|
||||
if (err) {
|
||||
log(err.message);
|
||||
log(err.debugData);
|
||||
throw new Error(err);
|
||||
}
|
||||
log('setSettings\n\n' + JSON.stringify(response, null, 2));
|
||||
}
|
||||
);
|
||||
|
||||
exports.insertChallenges = function insertChallenges() {
|
||||
return getChallengeData().subscribe(
|
||||
challenges => {
|
||||
index.addObjects(challenges, err => {
|
||||
if (err) {
|
||||
throw new Error(err);
|
||||
}
|
||||
});
|
||||
},
|
||||
err => {
|
||||
throw new Error(err);
|
||||
},
|
||||
() => log('complete')
|
||||
);
|
||||
};
|
@ -1,51 +0,0 @@
|
||||
const { from } = require('rxjs');
|
||||
const { toArray, switchMap } = require('rxjs/operators');
|
||||
const _ = require('lodash');
|
||||
const { client } = require('../../algolia');
|
||||
const { getGuideArticleData } = require('../../data-sources/guides');
|
||||
const debug = require('debug');
|
||||
|
||||
const log = debug('fcc:search:init:guides');
|
||||
|
||||
const index = client.initIndex('guide');
|
||||
|
||||
index.setSettings(
|
||||
{
|
||||
searchableAttributes: ['title', 'content', 'category'],
|
||||
distinct: true,
|
||||
attributeForDistinct: 'id',
|
||||
attributesForFaceting: ['category']
|
||||
},
|
||||
(err, response) => {
|
||||
if (err) {
|
||||
log(err.message);
|
||||
log(err.debugData);
|
||||
throw new Error(err);
|
||||
}
|
||||
log('setSettings\n' + JSON.stringify(response, null, 2));
|
||||
}
|
||||
);
|
||||
|
||||
exports.insertGuides = function insertGuides() {
|
||||
return getGuideArticleData()
|
||||
.pipe(
|
||||
toArray(),
|
||||
switchMap(articles => {
|
||||
const chunks = _.chunk(articles, 200).map(_.flatten);
|
||||
return from(chunks);
|
||||
})
|
||||
)
|
||||
.subscribe(
|
||||
articles => {
|
||||
index.addObjects(articles, err => {
|
||||
if (err) {
|
||||
throw new Error(err);
|
||||
}
|
||||
});
|
||||
},
|
||||
err => {
|
||||
throw new Error(err);
|
||||
},
|
||||
() => log('complete')
|
||||
);
|
||||
};
|
@ -1,35 +0,0 @@
|
||||
const envPath = require('path').resolve(__dirname, '../../../.env');
|
||||
require('dotenv').config({ path: envPath });
|
||||
|
||||
const { zip, timer, from } = require('rxjs');
|
||||
|
||||
/*
|
||||
* The below has been commented out to avoid inadvertant
|
||||
* ops usage with algolia
|
||||
*/
|
||||
|
||||
// const { getStoryData } = require('./news');
|
||||
// const { insertYoutube } = require('./youtube');
|
||||
// const { insertChallenges } = require('./challenges');
|
||||
// const { insertGuides } = require('./guides');
|
||||
|
||||
const dataSources = [
|
||||
// insertGuides,
|
||||
// insertChallenges
|
||||
// insertYoutube,
|
||||
// disable this until the roll out of news
|
||||
// getStoryData
|
||||
];
|
||||
|
||||
function init() {
|
||||
return zip(timer(0, 5000), from(dataSources), (a, b) => b).subscribe(
|
||||
fn => {
|
||||
fn();
|
||||
},
|
||||
err => {
|
||||
throw new Error(err);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
init();
|
@ -1,37 +0,0 @@
|
||||
const { client } = require('../../algolia');
|
||||
const debug = require('debug');
|
||||
const { getYoutubeData } = require('../../data-sources/youtube');
|
||||
|
||||
const log = debug('fcc:search:init:youtube');
|
||||
|
||||
const index = client.initIndex('youtube');
|
||||
|
||||
index.setSettings(
|
||||
{
|
||||
searchableAttributes: ['title', 'description', 'playlistTitle'],
|
||||
distinct: true,
|
||||
attributeForDistinct: 'id',
|
||||
attributesForFaceting: ['playlistTitle']
|
||||
},
|
||||
(err, response) => {
|
||||
if (err) {
|
||||
log(err.message);
|
||||
log(err.debugData);
|
||||
throw new Error(err);
|
||||
}
|
||||
log('setSettings\n\n' + JSON.stringify(response, null, 2));
|
||||
}
|
||||
);
|
||||
|
||||
exports.insertYoutube = function insertYoutube() {
|
||||
getYoutubeData().subscribe(
|
||||
videos =>
|
||||
index.addObjects(videos, err => {
|
||||
if (err) {
|
||||
throw new Error(err);
|
||||
}
|
||||
}),
|
||||
err => log(err, 'red'),
|
||||
() => log('Complete', 'blue')
|
||||
);
|
||||
};
|
@ -1,55 +0,0 @@
|
||||
const { client } = require('../../algolia');
|
||||
const _ = require('lodash');
|
||||
const { getGuideArticleData } = require('../../data-sources/guides');
|
||||
const debug = require('debug');
|
||||
|
||||
const log = debug('fcc:search:update:guides');
|
||||
const index = client.initIndex('guides');
|
||||
|
||||
const concatContents = (doc, current) => ({
|
||||
...current,
|
||||
content: doc.content.concat([current.content])
|
||||
});
|
||||
|
||||
exports.updateGuides = async function updateGuides() {
|
||||
const newDocs = await getGuideArticleData().toPromise();
|
||||
let hits = [];
|
||||
const browseAll = index.browseAll();
|
||||
browseAll.on('result', function onResult(content) {
|
||||
hits = hits.concat(content.hits);
|
||||
});
|
||||
browseAll.on('end', function onEnd() {
|
||||
log('Finished browsing this index');
|
||||
log(`We got ${hits.length} records`);
|
||||
const docIds = _.uniq(hits.map(doc => doc.id));
|
||||
docIds.map(id => {
|
||||
const allCurrentForId = hits.filter(doc => doc.id === id);
|
||||
const allNewForId = newDocs.filter(doc => doc.id === id);
|
||||
const newForId = allNewForId.reduce(concatContents, { content: [] });
|
||||
const currentForId = allCurrentForId.reduce(concatContents, {
|
||||
content: []
|
||||
});
|
||||
const isDiff = newForId.content.some(
|
||||
snippet => !currentForId.content.includes(snippet)
|
||||
);
|
||||
if (isDiff) {
|
||||
log(id, 'cyan');
|
||||
const objectIDs = allCurrentForId.map(doc => doc.objectID);
|
||||
index.addObjects(allNewForId, err => {
|
||||
if (err) {
|
||||
throw new Error(err);
|
||||
}
|
||||
index.deleteObjects(objectIDs, err => {
|
||||
if (err) {
|
||||
throw new Error(err);
|
||||
}
|
||||
log(`purge of stale data for ${id} complete`, 'magenta');
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
browseAll.on('error', function onError(err) {
|
||||
throw err;
|
||||
});
|
||||
};
|
@ -1,67 +0,0 @@
|
||||
const { from, of } = require('rxjs');
|
||||
const { switchMap, tap } = require('rxjs/operators');
|
||||
const debug = require('debug');
|
||||
|
||||
const { getChallengesForLang } = require('../../../curriculum/getChallenges');
|
||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
||||
|
||||
const log = debug('fcc:search:data-source:challenges');
|
||||
|
||||
const { LOCALE: lang } = process.env;
|
||||
|
||||
module.exports = function getChallenges() {
|
||||
log('sourcing challenges');
|
||||
return from(getChallengesForLang(lang)).pipe(
|
||||
tap(() => log('parsing curriculum')),
|
||||
switchMap(curriculum => {
|
||||
const superBlocks = Object.keys(curriculum).filter(
|
||||
x => x !== 'certificates'
|
||||
);
|
||||
return from(superBlocks.map(superBlock => curriculum[superBlock]));
|
||||
}),
|
||||
switchMap(superBlock => {
|
||||
const { blocks } = superBlock;
|
||||
return from(Object.keys(blocks).map(block => blocks[block]));
|
||||
}),
|
||||
switchMap(block => {
|
||||
const { meta, challenges } = block;
|
||||
const { dashedName: blockDashedName } = meta;
|
||||
return of(
|
||||
challenges.map(challenge => ({ ...challenge, blockDashedName }))
|
||||
);
|
||||
}),
|
||||
switchMap(challenges => {
|
||||
const formattedChallenges = challenges
|
||||
.filter(({ isPrivate }) => !isPrivate)
|
||||
.reduce((acc, current) => {
|
||||
const {
|
||||
id,
|
||||
title,
|
||||
description,
|
||||
instructions,
|
||||
dashedName,
|
||||
superBlock,
|
||||
blockDashedName,
|
||||
block
|
||||
} = current;
|
||||
const formattedChallenge = {
|
||||
blockName: block,
|
||||
id,
|
||||
title,
|
||||
description: stripURLs(stripHTML(description.concat(instructions))),
|
||||
url: `/${superBlock}/${blockDashedName}/${dashedName}`
|
||||
};
|
||||
return [
|
||||
...acc,
|
||||
...chunkDocument(
|
||||
formattedChallenge,
|
||||
['title', 'id', 'blockName', 'url'],
|
||||
'description'
|
||||
)
|
||||
];
|
||||
}, []);
|
||||
|
||||
return of(formattedChallenges);
|
||||
})
|
||||
);
|
||||
};
|
@ -1,57 +0,0 @@
|
||||
const path = require('path');
|
||||
const fs = require('fs-extra');
|
||||
const { Observable } = require('rxjs');
|
||||
const { map, filter } = require('rxjs/operators');
|
||||
const readdirp = require('readdirp-walk');
|
||||
const matter = require('gray-matter');
|
||||
|
||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
||||
|
||||
const { LOCALE: lang = 'english' } = process.env;
|
||||
|
||||
const selectedGuideDir = `../../../guide/${lang}`;
|
||||
const guideRoot = path.resolve(__dirname, selectedGuideDir);
|
||||
|
||||
function fileStream(root) {
|
||||
return Observable.create(observer =>
|
||||
readdirp({ root, fileFilter: '*.md' })
|
||||
.on('data', file => (file.stat.isFile() ? observer.next(file) : null))
|
||||
.on('error', e => observer.error(e))
|
||||
.on('end', () => observer.complete())
|
||||
);
|
||||
}
|
||||
|
||||
function parseFile(file) {
|
||||
const fileContents = fs.readFileSync(file.fullPath);
|
||||
return { ...file, ...matter(fileContents) };
|
||||
}
|
||||
|
||||
function buildArticle(file) {
|
||||
const {
|
||||
path,
|
||||
content,
|
||||
data: { title }
|
||||
} = file;
|
||||
const url = path.replace(/\/index.md$/, '');
|
||||
const article = {
|
||||
content: stripURLs(stripHTML(content)),
|
||||
category: url.split('/').filter(Boolean)[0],
|
||||
title,
|
||||
url: `/${url}`,
|
||||
id: url.replace('/', '-')
|
||||
};
|
||||
return chunkDocument(article, ['title', 'url', 'id', 'category'], 'content');
|
||||
}
|
||||
|
||||
function filterStubs(articleChunks) {
|
||||
return !articleChunks.some(chunk =>
|
||||
chunk.content.includes('This is a stub. Help our community expand it')
|
||||
);
|
||||
}
|
||||
|
||||
exports.getGuideArticleData = () =>
|
||||
fileStream(guideRoot).pipe(
|
||||
map(file => parseFile(file)),
|
||||
map(file => buildArticle(file)),
|
||||
filter(article => filterStubs(article))
|
||||
);
|
@ -1,62 +0,0 @@
|
||||
const path = require('path');
|
||||
|
||||
const envPath = path.resolve(__dirname, '../../../.env');
|
||||
require('dotenv').config({ path: envPath });
|
||||
|
||||
const { GHOST_CLIENT_KEY } = process.env;
|
||||
|
||||
const axios = require('axios');
|
||||
const fs = require('fs');
|
||||
|
||||
const getJson = async url => {
|
||||
return axios
|
||||
.get(url)
|
||||
.then(res => res.data)
|
||||
.catch(err => console.log(err));
|
||||
};
|
||||
|
||||
const constructIndex = async () => {
|
||||
let currPage = 1;
|
||||
let lastPage = 5;
|
||||
const delay = m => new Promise(resolve => setTimeout(resolve, m));
|
||||
const posts = [];
|
||||
|
||||
while (currPage && currPage <= lastPage) {
|
||||
const data = await getJson(
|
||||
`https://www.freecodecamp.org/news/ghost/api/v2/content/posts/?key=${GHOST_CLIENT_KEY}&include=tags,authors&page=${currPage}`
|
||||
);
|
||||
|
||||
data.posts.forEach(post => {
|
||||
const thisPost = {
|
||||
title: post.title,
|
||||
author: {
|
||||
name: post.primary_author.name,
|
||||
url: post.primary_author.url,
|
||||
profileImage: post.primary_author.profile_image
|
||||
},
|
||||
tags: post.tags.map(obj => {
|
||||
return {
|
||||
name: obj.name,
|
||||
url: obj.url
|
||||
};
|
||||
}),
|
||||
url: post.url,
|
||||
featureImage: post.feature_image,
|
||||
ghostId: post.id,
|
||||
publishedAt: post.published_at
|
||||
};
|
||||
|
||||
posts.push(thisPost);
|
||||
});
|
||||
|
||||
currPage = data.meta.pagination.next;
|
||||
lastPage = data.meta.pagination.pages;
|
||||
|
||||
console.log(posts);
|
||||
|
||||
fs.writeFileSync('posts.json', JSON.stringify(posts, null, 2));
|
||||
await delay(1000);
|
||||
}
|
||||
};
|
||||
|
||||
constructIndex();
|
@ -1,124 +0,0 @@
|
||||
const { timer, from, zip, iif, of } = require('rxjs');
|
||||
const { switchMap, concatMap } = require('rxjs/operators');
|
||||
const { google } = require('googleapis');
|
||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
||||
|
||||
const { YOUTUBE_SECRET } = process.env;
|
||||
const youtube = google.youtube({ version: 'v3', auth: YOUTUBE_SECRET });
|
||||
|
||||
function getPlaylistItems(
|
||||
{ playlistId, playlistTitle },
|
||||
nextPage,
|
||||
currentItems = []
|
||||
) {
|
||||
return zip(
|
||||
timer(2000),
|
||||
from(
|
||||
new Promise((resolve, reject) => {
|
||||
youtube.playlistItems.list(
|
||||
{
|
||||
part: 'snippet',
|
||||
playlistId,
|
||||
pageToken: nextPage ? nextPage : ''
|
||||
},
|
||||
(err, data) => {
|
||||
if (err) {
|
||||
return reject(err);
|
||||
}
|
||||
return resolve({ ...data.data, playlistTitle });
|
||||
}
|
||||
);
|
||||
})
|
||||
),
|
||||
(a, b) => b
|
||||
).pipe(
|
||||
switchMap(({ nextPageToken, items, playlistTitle }) => {
|
||||
const allItems = currentItems.concat(items);
|
||||
return iif(
|
||||
() => !!nextPageToken,
|
||||
getPlaylistItems(
|
||||
{ playlistId, playlistTitle },
|
||||
nextPageToken,
|
||||
allItems
|
||||
),
|
||||
of({ videos: allItems, playlistTitle })
|
||||
);
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
function getPlayLists(nextPage, currentItems = []) {
|
||||
return from(
|
||||
new Promise((resolve, reject) => {
|
||||
youtube.playlists.list(
|
||||
{
|
||||
auth: YOUTUBE_SECRET,
|
||||
part: 'snippet',
|
||||
channelId: 'UC8butISFwT-Wl7EV0hUK0BQ',
|
||||
pageToken: nextPage ? nextPage : ''
|
||||
},
|
||||
(err, data) => {
|
||||
if (err) {
|
||||
return reject(err);
|
||||
}
|
||||
return resolve(data.data);
|
||||
}
|
||||
);
|
||||
})
|
||||
).pipe(
|
||||
switchMap(({ nextPageToken, items }) => {
|
||||
const allItems = items.concat(currentItems);
|
||||
return iif(
|
||||
() => !!nextPageToken,
|
||||
getPlayLists(nextPageToken, allItems),
|
||||
of(allItems)
|
||||
);
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
exports.getYoutubeData = function getYoutubeData() {
|
||||
return getPlayLists().pipe(
|
||||
switchMap(playlists => {
|
||||
return from(playlists).pipe(
|
||||
concatMap(({ id, snippet: { title } }) =>
|
||||
getPlaylistItems({ playlistId: id, playlistTitle: title })
|
||||
)
|
||||
);
|
||||
}),
|
||||
switchMap(({ videos, playlistTitle }) => {
|
||||
const formattedVideos = videos
|
||||
.map(video => {
|
||||
const {
|
||||
id,
|
||||
snippet: {
|
||||
title,
|
||||
description,
|
||||
resourceId: { videoId },
|
||||
thumbnails
|
||||
}
|
||||
} = video;
|
||||
return {
|
||||
id,
|
||||
videoId,
|
||||
title: stripHTML(title),
|
||||
description: stripURLs(stripHTML(description)),
|
||||
thumbnails,
|
||||
playlistTitle
|
||||
};
|
||||
})
|
||||
.reduce(
|
||||
(chunked, current) => [
|
||||
...chunked,
|
||||
...chunkDocument(
|
||||
current,
|
||||
['id', 'videoId', 'title', 'thumbnail', 'playlistTitle'],
|
||||
'description'
|
||||
)
|
||||
],
|
||||
[]
|
||||
);
|
||||
return of(formattedVideos);
|
||||
})
|
||||
);
|
||||
};
|
5263
tools/search-indexing/package-lock.json
generated
5263
tools/search-indexing/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,50 +0,0 @@
|
||||
{
|
||||
"name": "@freecodecamp/search-indexing",
|
||||
"private": true,
|
||||
"version": "0.0.1",
|
||||
"description": "A repository to hold all of our search assets",
|
||||
"main": "N/A",
|
||||
"scripts": {
|
||||
"dev": "nodemon server",
|
||||
"init": "node init",
|
||||
"build:server": "npx babel server --out-dir lib --ignore spec.js,test.js",
|
||||
"format": "prettier --single-quote es5 --write './!(packages|www)/**/*.js'",
|
||||
"test": "jest"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/freeCodeCamp/freeCodeCamp.git"
|
||||
},
|
||||
"keywords": [
|
||||
"search",
|
||||
"algolia",
|
||||
"freeCodecamp"
|
||||
],
|
||||
"author": "freeCodeCamp",
|
||||
"license": "BSD-3-Clause",
|
||||
"bugs": {
|
||||
"url": "https://github.com/freeCodeCamp/freeCodeCamp/issues"
|
||||
},
|
||||
"homepage": "https://github.com/freeCodeCamp/freeCodeCamp#readme",
|
||||
"dependencies": {
|
||||
"algoliasearch": "^3.25.1",
|
||||
"axios": "^0.19.0",
|
||||
"chalk": "^2.3.2",
|
||||
"debug": "^4.1.1",
|
||||
"dotenv": "^6.2.0",
|
||||
"file": "^0.2.2",
|
||||
"fs-extra": "^7.0.1",
|
||||
"googleapis": "^37.2.0",
|
||||
"gray-matter": "^4.0.2",
|
||||
"lodash": "^4.17.19",
|
||||
"readdirp-walk": "^1.7.0",
|
||||
"rxjs": "^6.4.0",
|
||||
"secure-compare": "^3.0.1",
|
||||
"validator": "^10.11.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"html-entities": "^1.2.1",
|
||||
"jest": "^24.9.0",
|
||||
"striptags": "^3.1.1"
|
||||
}
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
const { Observable } = require('rxjs');
|
||||
const fse = require('fs-extra');
|
||||
const file = require('file');
|
||||
const _ = require('lodash');
|
||||
const { isURL } = require('validator');
|
||||
const stripTags = require('striptags');
|
||||
const Entities = require('html-entities').AllHtmlEntities;
|
||||
|
||||
const entities = new Entities();
|
||||
|
||||
const isAFileRE = /(\.md|\.jsx?|\.html?)$/;
|
||||
const isJSRE = /\.jsx?$/;
|
||||
const shouldBeIgnoredRE = /^(\_|\.)/;
|
||||
const excludedDirs = ['search'];
|
||||
const guideSvnRE = /guides\/svn$/;
|
||||
|
||||
exports.isAFileRE = isAFileRE;
|
||||
exports.isJSRE = isJSRE;
|
||||
exports.shouldBeIgnoredRE = shouldBeIgnoredRE;
|
||||
exports.excludedDirs = excludedDirs;
|
||||
|
||||
/*
|
||||
* *
|
||||
* Directory Helpers *
|
||||
* *
|
||||
*/
|
||||
|
||||
exports.listDirectory = function listDirectory(start) {
|
||||
let allDirs = [];
|
||||
file.walkSync(start, dirPath => {
|
||||
if (dirPath.includes('.svn')) {
|
||||
return;
|
||||
}
|
||||
allDirs = [...allDirs, dirPath];
|
||||
});
|
||||
return allDirs.filter(name => !guideSvnRE.test(name));
|
||||
};
|
||||
|
||||
function readDir(dir = __dirname, returnFiles = false) {
|
||||
const dirContent = fse
|
||||
.readdirSync(dir)
|
||||
.filter(dir => !excludedDirs.includes(dir))
|
||||
.filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file)))
|
||||
.filter(file => file !== 'LICENSE.md');
|
||||
return returnFiles
|
||||
? dirContent
|
||||
: dirContent.filter(item => !isAFileRE.test(item));
|
||||
}
|
||||
|
||||
exports.readDir = readDir;
|
||||
|
||||
exports.parseDirectory = function parseDirectory(dirLevel, cb) {
|
||||
return Observable.from(readDir(dirLevel)).flatMap(dir => {
|
||||
const dirPath = `${dirLevel}/${dir}`;
|
||||
const subDirs = readDir(dirPath);
|
||||
if (!subDirs) {
|
||||
cb(dirPath);
|
||||
return Observable.of(null);
|
||||
}
|
||||
cb(dirPath);
|
||||
return parseDirectory(dirPath, cb);
|
||||
});
|
||||
};
|
||||
|
||||
/*
|
||||
* *
|
||||
* Document Helpers *
|
||||
* *
|
||||
*/
|
||||
|
||||
exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) {
|
||||
const baseDoc = _.pick(doc, pickFields);
|
||||
const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g);
|
||||
if (!chunks) {
|
||||
return [doc];
|
||||
}
|
||||
return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk }));
|
||||
};
|
||||
|
||||
function stripURLs(str) {
|
||||
return str
|
||||
.split(/\s/)
|
||||
.filter(subStr => !_.isEmpty(subStr))
|
||||
.filter(subStr => !isURL(subStr))
|
||||
.join(' ');
|
||||
}
|
||||
|
||||
function fixEntities(str) {
|
||||
let newStr = str.slice(0);
|
||||
function entitiesFixer(match) {
|
||||
const tmpArr = match.split('');
|
||||
const fixed =
|
||||
tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]);
|
||||
newStr = newStr.split(match).join(fixed);
|
||||
}
|
||||
str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer);
|
||||
return newStr;
|
||||
}
|
||||
|
||||
exports.stripURLs = stripURLs;
|
||||
|
||||
exports.stripHTML = function stripHTML(text) {
|
||||
const unescapedStr = entities.decode(fixEntities(text));
|
||||
return stripTags(unescapedStr);
|
||||
};
|
@ -1,128 +0,0 @@
|
||||
/* global expect */
|
||||
const _ = require('lodash');
|
||||
|
||||
const { stripURLs, stripHTML } = require('../');
|
||||
|
||||
describe('utils', () => {
|
||||
describe('stripURLs', () => {
|
||||
it('should return a string', () => {
|
||||
const value = stripURLs('some string');
|
||||
|
||||
expect(_.isString(value)).toBe(true);
|
||||
});
|
||||
|
||||
it('should remove a url from the input', () => {
|
||||
const value = stripURLs('https://freecodecamp.org/en/challenges');
|
||||
expect(value).toEqual('');
|
||||
});
|
||||
|
||||
it('should leave non-urls intact', () => {
|
||||
const value = stripURLs(
|
||||
'Some text before https://freecodecamp.org/en/challenges some text ' +
|
||||
'after'
|
||||
);
|
||||
const expected = 'Some text before some text after';
|
||||
expect(value).toEqual(expected);
|
||||
});
|
||||
|
||||
it('should preserve punctuation, but remove new line chars', () => {
|
||||
const value = stripURLs(
|
||||
"http://search.freecodecamp.org\nI'm inclined to say, is just amazing!"
|
||||
);
|
||||
const expected = "I'm inclined to say, is just amazing!";
|
||||
expect(value).toEqual(expected);
|
||||
});
|
||||
|
||||
it('should remove urls from a block of text', () => {
|
||||
const value = stripURLs(
|
||||
'Learn how Symbols work in JavaScript ES6!\n\nCode:\n<> ' +
|
||||
'http://codepen.io/beaucarnes/pen/ZLQEWx?editors=0011\n\nMore ' +
|
||||
'info:\n<> http://www.2ality.com/2014/12/es6-symbols.html\n<> ' +
|
||||
'http://exploringjs.com/es6/ch_symbols.html\n\nBeau Carnes on ' +
|
||||
'Twitter: https://twitter.com/carnesbeau\n\n⭐JavaScript Playlists⭐' +
|
||||
'\n▶JavaScript Basics: https://www.youtube.com/playlist?list=' +
|
||||
'PLWKjhJtqVAbk2qRZtWSzCIN38JC_NdhW5\n▶ES6: https://www.youtube.com' +
|
||||
'/playlist?list=PLWKjhJtqVAbljtmmeS0c-CEl2LdE-eR_F\n▶Design ' +
|
||||
'Patterns: https://www.youtube.com/playlist?list=PLWKjhJtqVAbnZtkA' +
|
||||
'I3BqcYxKnfWn_C704\n▶Data Structures and Algorithms: https://www.' +
|
||||
'youtube.com/playlist?list=PLWKjhJtqVAbkso-IbgiiP48n-O-JQA9PJ\n▶' +
|
||||
'Clean Code: https://www.youtube.com/playlist?list=PLWKjhJtqVAbkK2' +
|
||||
"4EaPurzMq0-kw5U9pJh\n\n-\nWe're busy people who learn to code, " +
|
||||
'then practice by building projects for nonprofits. Learn ' +
|
||||
'Full-stack JavaScript, build a portfolio, and get great ' +
|
||||
'references with our open source community.\n\nJoin our community ' +
|
||||
'at https://freecodecamp.com\nFollow us on twitter: ' +
|
||||
'https://twitter.com/freecodecamp\nLike us on Facebook: https://' +
|
||||
'www.facebook.com/freecodecamp\nFollow Quincy on Quora: https://' +
|
||||
'www.quora.com/Quincy-Larson'
|
||||
);
|
||||
const expected =
|
||||
'Learn how Symbols work in JavaScript ES6! Code: <20> ' +
|
||||
'More info: <20> <20> Beau Carnes on Twitter: ⭐JavaScript Playlists⭐ ▶' +
|
||||
'JavaScript Basics: ▶ES6: ▶Design Patterns: ▶Data Structures and ' +
|
||||
"Algorithms: ▶Clean Code: - We're busy people who learn to code, " +
|
||||
'then practice by building projects for nonprofits. Learn Full-stack ' +
|
||||
'JavaScript, build a portfolio, and get great references with our ' +
|
||||
'open source community. Join our community at Follow us on twitter: ' +
|
||||
'Like us on Facebook: Follow Quincy on Quora:';
|
||||
expect(value).toEqual(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripHTML', () => {
|
||||
it('should remove simple html tags from a block of text', () => {
|
||||
const value = stripHTML(
|
||||
"Now we've proven that every HTML page has a <code>body</code> " +
|
||||
'element, and that its <code>body</code> element can also be ' +
|
||||
'styled with CSS.Remember, you can style your <code>body</code> ' +
|
||||
'element just like any other HTML element, and all your other ' +
|
||||
"elements will inherit your <code>body</code> element's styles." +
|
||||
'First, create a <code>h1</code> element with the text <code>' +
|
||||
"Hello World</code>Then, let's give all elements on your page the " +
|
||||
'color of <code>green</code> by adding <code>color: green;</code> ' +
|
||||
'to...'
|
||||
);
|
||||
const expected =
|
||||
"Now we've proven that every HTML page has a body " +
|
||||
'element, and that its body element can also be styled with CSS.' +
|
||||
'Remember, you can style your body element just like any other HTML ' +
|
||||
'element, and all your other elements will inherit your body ' +
|
||||
"element's styles.First, create a h1 element with the text Hello " +
|
||||
"WorldThen, let's give all elements on your page the color of green " +
|
||||
'by adding color: green; to...';
|
||||
expect(value).toEqual(expected);
|
||||
});
|
||||
|
||||
it('should remove escaped tags from a block of text', () => {
|
||||
const value = stripHTML(
|
||||
'You can add images to your website by using the <code>img</code> ' +
|
||||
"element, and point to a specific image's URL using the " +
|
||||
'<code>src</code> attribute.An example of this would be:<code>' +
|
||||
'<img src="https://www.your-image-source.com/your-image.jpg"' +
|
||||
'></code>All <code>img</code> elements <strong>must</strong> ' +
|
||||
'have an <code>alt</code> attribute. The text inside an ' +
|
||||
'<code>alt</code> attribute is used for screen readers to improve ' +
|
||||
"accessibility and is displayed if the image fails to load.Let's " +
|
||||
'add an <code>alt</code> attribute to our <code>img</code> example ' +
|
||||
'above:<code><img src="https://www.your-image-source.com/your-' +
|
||||
'image.jpg" alt="Author standing on a beach with two thumbs up. ' +
|
||||
'"></code>Note that in most cases, <code>img</code> elements ' +
|
||||
'are self-closing.Try it with this image:<code>https://bit.ly/fcc-' +
|
||||
'relaxing-cat</code>'
|
||||
);
|
||||
// the best I could do here is allow things like
|
||||
// image:https://bit.ly/fcc-relaxing-cat
|
||||
// trying to strip this further could invalidate other entries
|
||||
const expected =
|
||||
'You can add images to your website by using the img ' +
|
||||
"element, and point to a specific image's URL using the src " +
|
||||
'attribute.An example of this would be:All img elements must have ' +
|
||||
'an alt attribute. The text inside an alt attribute is used for ' +
|
||||
'screen readers to improve accessibility and is displayed if the ' +
|
||||
"image fails to load.Let's add an alt attribute to our img example " +
|
||||
'above:Note that in most cases, img elements are self-closing.Try ' +
|
||||
'it with this image:https://bit.ly/fcc-relaxing-cat';
|
||||
expect(value).toEqual(expected);
|
||||
});
|
||||
});
|
||||
});
|
Reference in New Issue
Block a user