chore(cleanup): remove search indexing tools (#40070)
This commit is contained in:
committed by
GitHub
parent
6b45967997
commit
53ad373e47
@ -4,7 +4,6 @@
|
|||||||
"client",
|
"client",
|
||||||
"client/plugins/*",
|
"client/plugins/*",
|
||||||
"curriculum",
|
"curriculum",
|
||||||
"tools/search-indexing",
|
|
||||||
"tools/challenge-md-parser",
|
"tools/challenge-md-parser",
|
||||||
"tools/scripts/seed",
|
"tools/scripts/seed",
|
||||||
"tools/scripts/build",
|
"tools/scripts/build",
|
||||||
|
@ -21,9 +21,6 @@ JWT_SECRET=a_jwt_secret
|
|||||||
# ---------------------
|
# ---------------------
|
||||||
# Search
|
# Search
|
||||||
# ---------------------
|
# ---------------------
|
||||||
# Indexing
|
|
||||||
ALGOLIA_ADMIN_KEY=admin_key_from_algolia_dashboard
|
|
||||||
GHOST_CLIENT_KEY=client_key_from_ghost_dashboard
|
|
||||||
# Client Search Bar
|
# Client Search Bar
|
||||||
ALGOLIA_APP_ID=app_id_from_algolia_dashboard
|
ALGOLIA_APP_ID=app_id_from_algolia_dashboard
|
||||||
ALGOLIA_API_KEY=api_key_from_algolia_dashboard
|
ALGOLIA_API_KEY=api_key_from_algolia_dashboard
|
||||||
|
@ -1,9 +0,0 @@
|
|||||||
const algoliasearch = require('algoliasearch');
|
|
||||||
const path = require('path');
|
|
||||||
|
|
||||||
const envPath = path.resolve(__dirname, '../.env');
|
|
||||||
require('dotenv').config({ path: envPath });
|
|
||||||
|
|
||||||
const { ALGOLIA_ADMIN_KEY, ALGOLIA_APP_ID } = process.env;
|
|
||||||
|
|
||||||
exports.client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_ADMIN_KEY);
|
|
@ -1,40 +0,0 @@
|
|||||||
const { client } = require('../../algolia');
|
|
||||||
const debug = require('debug');
|
|
||||||
const getChallengeData = require('../../data-sources/challenges');
|
|
||||||
|
|
||||||
const log = debug('fcc:search:init:challenge');
|
|
||||||
|
|
||||||
const index = client.initIndex('challenge');
|
|
||||||
|
|
||||||
index.setSettings(
|
|
||||||
{
|
|
||||||
searchableAttributes: ['title', 'description', 'blockName'],
|
|
||||||
distinct: true,
|
|
||||||
attributeForDistinct: 'id',
|
|
||||||
attributesForFaceting: ['blockName']
|
|
||||||
},
|
|
||||||
(err, response) => {
|
|
||||||
if (err) {
|
|
||||||
log(err.message);
|
|
||||||
log(err.debugData);
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
log('setSettings\n\n' + JSON.stringify(response, null, 2));
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
exports.insertChallenges = function insertChallenges() {
|
|
||||||
return getChallengeData().subscribe(
|
|
||||||
challenges => {
|
|
||||||
index.addObjects(challenges, err => {
|
|
||||||
if (err) {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
},
|
|
||||||
err => {
|
|
||||||
throw new Error(err);
|
|
||||||
},
|
|
||||||
() => log('complete')
|
|
||||||
);
|
|
||||||
};
|
|
@ -1,51 +0,0 @@
|
|||||||
const { from } = require('rxjs');
|
|
||||||
const { toArray, switchMap } = require('rxjs/operators');
|
|
||||||
const _ = require('lodash');
|
|
||||||
const { client } = require('../../algolia');
|
|
||||||
const { getGuideArticleData } = require('../../data-sources/guides');
|
|
||||||
const debug = require('debug');
|
|
||||||
|
|
||||||
const log = debug('fcc:search:init:guides');
|
|
||||||
|
|
||||||
const index = client.initIndex('guide');
|
|
||||||
|
|
||||||
index.setSettings(
|
|
||||||
{
|
|
||||||
searchableAttributes: ['title', 'content', 'category'],
|
|
||||||
distinct: true,
|
|
||||||
attributeForDistinct: 'id',
|
|
||||||
attributesForFaceting: ['category']
|
|
||||||
},
|
|
||||||
(err, response) => {
|
|
||||||
if (err) {
|
|
||||||
log(err.message);
|
|
||||||
log(err.debugData);
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
log('setSettings\n' + JSON.stringify(response, null, 2));
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
exports.insertGuides = function insertGuides() {
|
|
||||||
return getGuideArticleData()
|
|
||||||
.pipe(
|
|
||||||
toArray(),
|
|
||||||
switchMap(articles => {
|
|
||||||
const chunks = _.chunk(articles, 200).map(_.flatten);
|
|
||||||
return from(chunks);
|
|
||||||
})
|
|
||||||
)
|
|
||||||
.subscribe(
|
|
||||||
articles => {
|
|
||||||
index.addObjects(articles, err => {
|
|
||||||
if (err) {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
},
|
|
||||||
err => {
|
|
||||||
throw new Error(err);
|
|
||||||
},
|
|
||||||
() => log('complete')
|
|
||||||
);
|
|
||||||
};
|
|
@ -1,35 +0,0 @@
|
|||||||
const envPath = require('path').resolve(__dirname, '../../../.env');
|
|
||||||
require('dotenv').config({ path: envPath });
|
|
||||||
|
|
||||||
const { zip, timer, from } = require('rxjs');
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The below has been commented out to avoid inadvertant
|
|
||||||
* ops usage with algolia
|
|
||||||
*/
|
|
||||||
|
|
||||||
// const { getStoryData } = require('./news');
|
|
||||||
// const { insertYoutube } = require('./youtube');
|
|
||||||
// const { insertChallenges } = require('./challenges');
|
|
||||||
// const { insertGuides } = require('./guides');
|
|
||||||
|
|
||||||
const dataSources = [
|
|
||||||
// insertGuides,
|
|
||||||
// insertChallenges
|
|
||||||
// insertYoutube,
|
|
||||||
// disable this until the roll out of news
|
|
||||||
// getStoryData
|
|
||||||
];
|
|
||||||
|
|
||||||
function init() {
|
|
||||||
return zip(timer(0, 5000), from(dataSources), (a, b) => b).subscribe(
|
|
||||||
fn => {
|
|
||||||
fn();
|
|
||||||
},
|
|
||||||
err => {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
init();
|
|
@ -1,37 +0,0 @@
|
|||||||
const { client } = require('../../algolia');
|
|
||||||
const debug = require('debug');
|
|
||||||
const { getYoutubeData } = require('../../data-sources/youtube');
|
|
||||||
|
|
||||||
const log = debug('fcc:search:init:youtube');
|
|
||||||
|
|
||||||
const index = client.initIndex('youtube');
|
|
||||||
|
|
||||||
index.setSettings(
|
|
||||||
{
|
|
||||||
searchableAttributes: ['title', 'description', 'playlistTitle'],
|
|
||||||
distinct: true,
|
|
||||||
attributeForDistinct: 'id',
|
|
||||||
attributesForFaceting: ['playlistTitle']
|
|
||||||
},
|
|
||||||
(err, response) => {
|
|
||||||
if (err) {
|
|
||||||
log(err.message);
|
|
||||||
log(err.debugData);
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
log('setSettings\n\n' + JSON.stringify(response, null, 2));
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
exports.insertYoutube = function insertYoutube() {
|
|
||||||
getYoutubeData().subscribe(
|
|
||||||
videos =>
|
|
||||||
index.addObjects(videos, err => {
|
|
||||||
if (err) {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
err => log(err, 'red'),
|
|
||||||
() => log('Complete', 'blue')
|
|
||||||
);
|
|
||||||
};
|
|
@ -1,55 +0,0 @@
|
|||||||
const { client } = require('../../algolia');
|
|
||||||
const _ = require('lodash');
|
|
||||||
const { getGuideArticleData } = require('../../data-sources/guides');
|
|
||||||
const debug = require('debug');
|
|
||||||
|
|
||||||
const log = debug('fcc:search:update:guides');
|
|
||||||
const index = client.initIndex('guides');
|
|
||||||
|
|
||||||
const concatContents = (doc, current) => ({
|
|
||||||
...current,
|
|
||||||
content: doc.content.concat([current.content])
|
|
||||||
});
|
|
||||||
|
|
||||||
exports.updateGuides = async function updateGuides() {
|
|
||||||
const newDocs = await getGuideArticleData().toPromise();
|
|
||||||
let hits = [];
|
|
||||||
const browseAll = index.browseAll();
|
|
||||||
browseAll.on('result', function onResult(content) {
|
|
||||||
hits = hits.concat(content.hits);
|
|
||||||
});
|
|
||||||
browseAll.on('end', function onEnd() {
|
|
||||||
log('Finished browsing this index');
|
|
||||||
log(`We got ${hits.length} records`);
|
|
||||||
const docIds = _.uniq(hits.map(doc => doc.id));
|
|
||||||
docIds.map(id => {
|
|
||||||
const allCurrentForId = hits.filter(doc => doc.id === id);
|
|
||||||
const allNewForId = newDocs.filter(doc => doc.id === id);
|
|
||||||
const newForId = allNewForId.reduce(concatContents, { content: [] });
|
|
||||||
const currentForId = allCurrentForId.reduce(concatContents, {
|
|
||||||
content: []
|
|
||||||
});
|
|
||||||
const isDiff = newForId.content.some(
|
|
||||||
snippet => !currentForId.content.includes(snippet)
|
|
||||||
);
|
|
||||||
if (isDiff) {
|
|
||||||
log(id, 'cyan');
|
|
||||||
const objectIDs = allCurrentForId.map(doc => doc.objectID);
|
|
||||||
index.addObjects(allNewForId, err => {
|
|
||||||
if (err) {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
index.deleteObjects(objectIDs, err => {
|
|
||||||
if (err) {
|
|
||||||
throw new Error(err);
|
|
||||||
}
|
|
||||||
log(`purge of stale data for ${id} complete`, 'magenta');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
browseAll.on('error', function onError(err) {
|
|
||||||
throw err;
|
|
||||||
});
|
|
||||||
};
|
|
@ -1,67 +0,0 @@
|
|||||||
const { from, of } = require('rxjs');
|
|
||||||
const { switchMap, tap } = require('rxjs/operators');
|
|
||||||
const debug = require('debug');
|
|
||||||
|
|
||||||
const { getChallengesForLang } = require('../../../curriculum/getChallenges');
|
|
||||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
|
||||||
|
|
||||||
const log = debug('fcc:search:data-source:challenges');
|
|
||||||
|
|
||||||
const { LOCALE: lang } = process.env;
|
|
||||||
|
|
||||||
module.exports = function getChallenges() {
|
|
||||||
log('sourcing challenges');
|
|
||||||
return from(getChallengesForLang(lang)).pipe(
|
|
||||||
tap(() => log('parsing curriculum')),
|
|
||||||
switchMap(curriculum => {
|
|
||||||
const superBlocks = Object.keys(curriculum).filter(
|
|
||||||
x => x !== 'certificates'
|
|
||||||
);
|
|
||||||
return from(superBlocks.map(superBlock => curriculum[superBlock]));
|
|
||||||
}),
|
|
||||||
switchMap(superBlock => {
|
|
||||||
const { blocks } = superBlock;
|
|
||||||
return from(Object.keys(blocks).map(block => blocks[block]));
|
|
||||||
}),
|
|
||||||
switchMap(block => {
|
|
||||||
const { meta, challenges } = block;
|
|
||||||
const { dashedName: blockDashedName } = meta;
|
|
||||||
return of(
|
|
||||||
challenges.map(challenge => ({ ...challenge, blockDashedName }))
|
|
||||||
);
|
|
||||||
}),
|
|
||||||
switchMap(challenges => {
|
|
||||||
const formattedChallenges = challenges
|
|
||||||
.filter(({ isPrivate }) => !isPrivate)
|
|
||||||
.reduce((acc, current) => {
|
|
||||||
const {
|
|
||||||
id,
|
|
||||||
title,
|
|
||||||
description,
|
|
||||||
instructions,
|
|
||||||
dashedName,
|
|
||||||
superBlock,
|
|
||||||
blockDashedName,
|
|
||||||
block
|
|
||||||
} = current;
|
|
||||||
const formattedChallenge = {
|
|
||||||
blockName: block,
|
|
||||||
id,
|
|
||||||
title,
|
|
||||||
description: stripURLs(stripHTML(description.concat(instructions))),
|
|
||||||
url: `/${superBlock}/${blockDashedName}/${dashedName}`
|
|
||||||
};
|
|
||||||
return [
|
|
||||||
...acc,
|
|
||||||
...chunkDocument(
|
|
||||||
formattedChallenge,
|
|
||||||
['title', 'id', 'blockName', 'url'],
|
|
||||||
'description'
|
|
||||||
)
|
|
||||||
];
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
return of(formattedChallenges);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
};
|
|
@ -1,57 +0,0 @@
|
|||||||
const path = require('path');
|
|
||||||
const fs = require('fs-extra');
|
|
||||||
const { Observable } = require('rxjs');
|
|
||||||
const { map, filter } = require('rxjs/operators');
|
|
||||||
const readdirp = require('readdirp-walk');
|
|
||||||
const matter = require('gray-matter');
|
|
||||||
|
|
||||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
|
||||||
|
|
||||||
const { LOCALE: lang = 'english' } = process.env;
|
|
||||||
|
|
||||||
const selectedGuideDir = `../../../guide/${lang}`;
|
|
||||||
const guideRoot = path.resolve(__dirname, selectedGuideDir);
|
|
||||||
|
|
||||||
function fileStream(root) {
|
|
||||||
return Observable.create(observer =>
|
|
||||||
readdirp({ root, fileFilter: '*.md' })
|
|
||||||
.on('data', file => (file.stat.isFile() ? observer.next(file) : null))
|
|
||||||
.on('error', e => observer.error(e))
|
|
||||||
.on('end', () => observer.complete())
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseFile(file) {
|
|
||||||
const fileContents = fs.readFileSync(file.fullPath);
|
|
||||||
return { ...file, ...matter(fileContents) };
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildArticle(file) {
|
|
||||||
const {
|
|
||||||
path,
|
|
||||||
content,
|
|
||||||
data: { title }
|
|
||||||
} = file;
|
|
||||||
const url = path.replace(/\/index.md$/, '');
|
|
||||||
const article = {
|
|
||||||
content: stripURLs(stripHTML(content)),
|
|
||||||
category: url.split('/').filter(Boolean)[0],
|
|
||||||
title,
|
|
||||||
url: `/${url}`,
|
|
||||||
id: url.replace('/', '-')
|
|
||||||
};
|
|
||||||
return chunkDocument(article, ['title', 'url', 'id', 'category'], 'content');
|
|
||||||
}
|
|
||||||
|
|
||||||
function filterStubs(articleChunks) {
|
|
||||||
return !articleChunks.some(chunk =>
|
|
||||||
chunk.content.includes('This is a stub. Help our community expand it')
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
exports.getGuideArticleData = () =>
|
|
||||||
fileStream(guideRoot).pipe(
|
|
||||||
map(file => parseFile(file)),
|
|
||||||
map(file => buildArticle(file)),
|
|
||||||
filter(article => filterStubs(article))
|
|
||||||
);
|
|
@ -1,62 +0,0 @@
|
|||||||
const path = require('path');
|
|
||||||
|
|
||||||
const envPath = path.resolve(__dirname, '../../../.env');
|
|
||||||
require('dotenv').config({ path: envPath });
|
|
||||||
|
|
||||||
const { GHOST_CLIENT_KEY } = process.env;
|
|
||||||
|
|
||||||
const axios = require('axios');
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
const getJson = async url => {
|
|
||||||
return axios
|
|
||||||
.get(url)
|
|
||||||
.then(res => res.data)
|
|
||||||
.catch(err => console.log(err));
|
|
||||||
};
|
|
||||||
|
|
||||||
const constructIndex = async () => {
|
|
||||||
let currPage = 1;
|
|
||||||
let lastPage = 5;
|
|
||||||
const delay = m => new Promise(resolve => setTimeout(resolve, m));
|
|
||||||
const posts = [];
|
|
||||||
|
|
||||||
while (currPage && currPage <= lastPage) {
|
|
||||||
const data = await getJson(
|
|
||||||
`https://www.freecodecamp.org/news/ghost/api/v2/content/posts/?key=${GHOST_CLIENT_KEY}&include=tags,authors&page=${currPage}`
|
|
||||||
);
|
|
||||||
|
|
||||||
data.posts.forEach(post => {
|
|
||||||
const thisPost = {
|
|
||||||
title: post.title,
|
|
||||||
author: {
|
|
||||||
name: post.primary_author.name,
|
|
||||||
url: post.primary_author.url,
|
|
||||||
profileImage: post.primary_author.profile_image
|
|
||||||
},
|
|
||||||
tags: post.tags.map(obj => {
|
|
||||||
return {
|
|
||||||
name: obj.name,
|
|
||||||
url: obj.url
|
|
||||||
};
|
|
||||||
}),
|
|
||||||
url: post.url,
|
|
||||||
featureImage: post.feature_image,
|
|
||||||
ghostId: post.id,
|
|
||||||
publishedAt: post.published_at
|
|
||||||
};
|
|
||||||
|
|
||||||
posts.push(thisPost);
|
|
||||||
});
|
|
||||||
|
|
||||||
currPage = data.meta.pagination.next;
|
|
||||||
lastPage = data.meta.pagination.pages;
|
|
||||||
|
|
||||||
console.log(posts);
|
|
||||||
|
|
||||||
fs.writeFileSync('posts.json', JSON.stringify(posts, null, 2));
|
|
||||||
await delay(1000);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
constructIndex();
|
|
@ -1,124 +0,0 @@
|
|||||||
const { timer, from, zip, iif, of } = require('rxjs');
|
|
||||||
const { switchMap, concatMap } = require('rxjs/operators');
|
|
||||||
const { google } = require('googleapis');
|
|
||||||
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
|
|
||||||
|
|
||||||
const { YOUTUBE_SECRET } = process.env;
|
|
||||||
const youtube = google.youtube({ version: 'v3', auth: YOUTUBE_SECRET });
|
|
||||||
|
|
||||||
function getPlaylistItems(
|
|
||||||
{ playlistId, playlistTitle },
|
|
||||||
nextPage,
|
|
||||||
currentItems = []
|
|
||||||
) {
|
|
||||||
return zip(
|
|
||||||
timer(2000),
|
|
||||||
from(
|
|
||||||
new Promise((resolve, reject) => {
|
|
||||||
youtube.playlistItems.list(
|
|
||||||
{
|
|
||||||
part: 'snippet',
|
|
||||||
playlistId,
|
|
||||||
pageToken: nextPage ? nextPage : ''
|
|
||||||
},
|
|
||||||
(err, data) => {
|
|
||||||
if (err) {
|
|
||||||
return reject(err);
|
|
||||||
}
|
|
||||||
return resolve({ ...data.data, playlistTitle });
|
|
||||||
}
|
|
||||||
);
|
|
||||||
})
|
|
||||||
),
|
|
||||||
(a, b) => b
|
|
||||||
).pipe(
|
|
||||||
switchMap(({ nextPageToken, items, playlistTitle }) => {
|
|
||||||
const allItems = currentItems.concat(items);
|
|
||||||
return iif(
|
|
||||||
() => !!nextPageToken,
|
|
||||||
getPlaylistItems(
|
|
||||||
{ playlistId, playlistTitle },
|
|
||||||
nextPageToken,
|
|
||||||
allItems
|
|
||||||
),
|
|
||||||
of({ videos: allItems, playlistTitle })
|
|
||||||
);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function getPlayLists(nextPage, currentItems = []) {
|
|
||||||
return from(
|
|
||||||
new Promise((resolve, reject) => {
|
|
||||||
youtube.playlists.list(
|
|
||||||
{
|
|
||||||
auth: YOUTUBE_SECRET,
|
|
||||||
part: 'snippet',
|
|
||||||
channelId: 'UC8butISFwT-Wl7EV0hUK0BQ',
|
|
||||||
pageToken: nextPage ? nextPage : ''
|
|
||||||
},
|
|
||||||
(err, data) => {
|
|
||||||
if (err) {
|
|
||||||
return reject(err);
|
|
||||||
}
|
|
||||||
return resolve(data.data);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
})
|
|
||||||
).pipe(
|
|
||||||
switchMap(({ nextPageToken, items }) => {
|
|
||||||
const allItems = items.concat(currentItems);
|
|
||||||
return iif(
|
|
||||||
() => !!nextPageToken,
|
|
||||||
getPlayLists(nextPageToken, allItems),
|
|
||||||
of(allItems)
|
|
||||||
);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
exports.getYoutubeData = function getYoutubeData() {
|
|
||||||
return getPlayLists().pipe(
|
|
||||||
switchMap(playlists => {
|
|
||||||
return from(playlists).pipe(
|
|
||||||
concatMap(({ id, snippet: { title } }) =>
|
|
||||||
getPlaylistItems({ playlistId: id, playlistTitle: title })
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}),
|
|
||||||
switchMap(({ videos, playlistTitle }) => {
|
|
||||||
const formattedVideos = videos
|
|
||||||
.map(video => {
|
|
||||||
const {
|
|
||||||
id,
|
|
||||||
snippet: {
|
|
||||||
title,
|
|
||||||
description,
|
|
||||||
resourceId: { videoId },
|
|
||||||
thumbnails
|
|
||||||
}
|
|
||||||
} = video;
|
|
||||||
return {
|
|
||||||
id,
|
|
||||||
videoId,
|
|
||||||
title: stripHTML(title),
|
|
||||||
description: stripURLs(stripHTML(description)),
|
|
||||||
thumbnails,
|
|
||||||
playlistTitle
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.reduce(
|
|
||||||
(chunked, current) => [
|
|
||||||
...chunked,
|
|
||||||
...chunkDocument(
|
|
||||||
current,
|
|
||||||
['id', 'videoId', 'title', 'thumbnail', 'playlistTitle'],
|
|
||||||
'description'
|
|
||||||
)
|
|
||||||
],
|
|
||||||
[]
|
|
||||||
);
|
|
||||||
return of(formattedVideos);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
};
|
|
5263
tools/search-indexing/package-lock.json
generated
5263
tools/search-indexing/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,50 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "@freecodecamp/search-indexing",
|
|
||||||
"private": true,
|
|
||||||
"version": "0.0.1",
|
|
||||||
"description": "A repository to hold all of our search assets",
|
|
||||||
"main": "N/A",
|
|
||||||
"scripts": {
|
|
||||||
"dev": "nodemon server",
|
|
||||||
"init": "node init",
|
|
||||||
"build:server": "npx babel server --out-dir lib --ignore spec.js,test.js",
|
|
||||||
"format": "prettier --single-quote es5 --write './!(packages|www)/**/*.js'",
|
|
||||||
"test": "jest"
|
|
||||||
},
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "https://github.com/freeCodeCamp/freeCodeCamp.git"
|
|
||||||
},
|
|
||||||
"keywords": [
|
|
||||||
"search",
|
|
||||||
"algolia",
|
|
||||||
"freeCodecamp"
|
|
||||||
],
|
|
||||||
"author": "freeCodeCamp",
|
|
||||||
"license": "BSD-3-Clause",
|
|
||||||
"bugs": {
|
|
||||||
"url": "https://github.com/freeCodeCamp/freeCodeCamp/issues"
|
|
||||||
},
|
|
||||||
"homepage": "https://github.com/freeCodeCamp/freeCodeCamp#readme",
|
|
||||||
"dependencies": {
|
|
||||||
"algoliasearch": "^3.25.1",
|
|
||||||
"axios": "^0.19.0",
|
|
||||||
"chalk": "^2.3.2",
|
|
||||||
"debug": "^4.1.1",
|
|
||||||
"dotenv": "^6.2.0",
|
|
||||||
"file": "^0.2.2",
|
|
||||||
"fs-extra": "^7.0.1",
|
|
||||||
"googleapis": "^37.2.0",
|
|
||||||
"gray-matter": "^4.0.2",
|
|
||||||
"lodash": "^4.17.19",
|
|
||||||
"readdirp-walk": "^1.7.0",
|
|
||||||
"rxjs": "^6.4.0",
|
|
||||||
"secure-compare": "^3.0.1",
|
|
||||||
"validator": "^10.11.0"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"html-entities": "^1.2.1",
|
|
||||||
"jest": "^24.9.0",
|
|
||||||
"striptags": "^3.1.1"
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,105 +0,0 @@
|
|||||||
const { Observable } = require('rxjs');
|
|
||||||
const fse = require('fs-extra');
|
|
||||||
const file = require('file');
|
|
||||||
const _ = require('lodash');
|
|
||||||
const { isURL } = require('validator');
|
|
||||||
const stripTags = require('striptags');
|
|
||||||
const Entities = require('html-entities').AllHtmlEntities;
|
|
||||||
|
|
||||||
const entities = new Entities();
|
|
||||||
|
|
||||||
const isAFileRE = /(\.md|\.jsx?|\.html?)$/;
|
|
||||||
const isJSRE = /\.jsx?$/;
|
|
||||||
const shouldBeIgnoredRE = /^(\_|\.)/;
|
|
||||||
const excludedDirs = ['search'];
|
|
||||||
const guideSvnRE = /guides\/svn$/;
|
|
||||||
|
|
||||||
exports.isAFileRE = isAFileRE;
|
|
||||||
exports.isJSRE = isJSRE;
|
|
||||||
exports.shouldBeIgnoredRE = shouldBeIgnoredRE;
|
|
||||||
exports.excludedDirs = excludedDirs;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* *
|
|
||||||
* Directory Helpers *
|
|
||||||
* *
|
|
||||||
*/
|
|
||||||
|
|
||||||
exports.listDirectory = function listDirectory(start) {
|
|
||||||
let allDirs = [];
|
|
||||||
file.walkSync(start, dirPath => {
|
|
||||||
if (dirPath.includes('.svn')) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
allDirs = [...allDirs, dirPath];
|
|
||||||
});
|
|
||||||
return allDirs.filter(name => !guideSvnRE.test(name));
|
|
||||||
};
|
|
||||||
|
|
||||||
function readDir(dir = __dirname, returnFiles = false) {
|
|
||||||
const dirContent = fse
|
|
||||||
.readdirSync(dir)
|
|
||||||
.filter(dir => !excludedDirs.includes(dir))
|
|
||||||
.filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file)))
|
|
||||||
.filter(file => file !== 'LICENSE.md');
|
|
||||||
return returnFiles
|
|
||||||
? dirContent
|
|
||||||
: dirContent.filter(item => !isAFileRE.test(item));
|
|
||||||
}
|
|
||||||
|
|
||||||
exports.readDir = readDir;
|
|
||||||
|
|
||||||
exports.parseDirectory = function parseDirectory(dirLevel, cb) {
|
|
||||||
return Observable.from(readDir(dirLevel)).flatMap(dir => {
|
|
||||||
const dirPath = `${dirLevel}/${dir}`;
|
|
||||||
const subDirs = readDir(dirPath);
|
|
||||||
if (!subDirs) {
|
|
||||||
cb(dirPath);
|
|
||||||
return Observable.of(null);
|
|
||||||
}
|
|
||||||
cb(dirPath);
|
|
||||||
return parseDirectory(dirPath, cb);
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* *
|
|
||||||
* Document Helpers *
|
|
||||||
* *
|
|
||||||
*/
|
|
||||||
|
|
||||||
exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) {
|
|
||||||
const baseDoc = _.pick(doc, pickFields);
|
|
||||||
const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g);
|
|
||||||
if (!chunks) {
|
|
||||||
return [doc];
|
|
||||||
}
|
|
||||||
return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk }));
|
|
||||||
};
|
|
||||||
|
|
||||||
function stripURLs(str) {
|
|
||||||
return str
|
|
||||||
.split(/\s/)
|
|
||||||
.filter(subStr => !_.isEmpty(subStr))
|
|
||||||
.filter(subStr => !isURL(subStr))
|
|
||||||
.join(' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
function fixEntities(str) {
|
|
||||||
let newStr = str.slice(0);
|
|
||||||
function entitiesFixer(match) {
|
|
||||||
const tmpArr = match.split('');
|
|
||||||
const fixed =
|
|
||||||
tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]);
|
|
||||||
newStr = newStr.split(match).join(fixed);
|
|
||||||
}
|
|
||||||
str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer);
|
|
||||||
return newStr;
|
|
||||||
}
|
|
||||||
|
|
||||||
exports.stripURLs = stripURLs;
|
|
||||||
|
|
||||||
exports.stripHTML = function stripHTML(text) {
|
|
||||||
const unescapedStr = entities.decode(fixEntities(text));
|
|
||||||
return stripTags(unescapedStr);
|
|
||||||
};
|
|
@ -1,128 +0,0 @@
|
|||||||
/* global expect */
|
|
||||||
const _ = require('lodash');
|
|
||||||
|
|
||||||
const { stripURLs, stripHTML } = require('../');
|
|
||||||
|
|
||||||
describe('utils', () => {
|
|
||||||
describe('stripURLs', () => {
|
|
||||||
it('should return a string', () => {
|
|
||||||
const value = stripURLs('some string');
|
|
||||||
|
|
||||||
expect(_.isString(value)).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should remove a url from the input', () => {
|
|
||||||
const value = stripURLs('https://freecodecamp.org/en/challenges');
|
|
||||||
expect(value).toEqual('');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should leave non-urls intact', () => {
|
|
||||||
const value = stripURLs(
|
|
||||||
'Some text before https://freecodecamp.org/en/challenges some text ' +
|
|
||||||
'after'
|
|
||||||
);
|
|
||||||
const expected = 'Some text before some text after';
|
|
||||||
expect(value).toEqual(expected);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should preserve punctuation, but remove new line chars', () => {
|
|
||||||
const value = stripURLs(
|
|
||||||
"http://search.freecodecamp.org\nI'm inclined to say, is just amazing!"
|
|
||||||
);
|
|
||||||
const expected = "I'm inclined to say, is just amazing!";
|
|
||||||
expect(value).toEqual(expected);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should remove urls from a block of text', () => {
|
|
||||||
const value = stripURLs(
|
|
||||||
'Learn how Symbols work in JavaScript ES6!\n\nCode:\n<> ' +
|
|
||||||
'http://codepen.io/beaucarnes/pen/ZLQEWx?editors=0011\n\nMore ' +
|
|
||||||
'info:\n<> http://www.2ality.com/2014/12/es6-symbols.html\n<> ' +
|
|
||||||
'http://exploringjs.com/es6/ch_symbols.html\n\nBeau Carnes on ' +
|
|
||||||
'Twitter: https://twitter.com/carnesbeau\n\n⭐JavaScript Playlists⭐' +
|
|
||||||
'\n▶JavaScript Basics: https://www.youtube.com/playlist?list=' +
|
|
||||||
'PLWKjhJtqVAbk2qRZtWSzCIN38JC_NdhW5\n▶ES6: https://www.youtube.com' +
|
|
||||||
'/playlist?list=PLWKjhJtqVAbljtmmeS0c-CEl2LdE-eR_F\n▶Design ' +
|
|
||||||
'Patterns: https://www.youtube.com/playlist?list=PLWKjhJtqVAbnZtkA' +
|
|
||||||
'I3BqcYxKnfWn_C704\n▶Data Structures and Algorithms: https://www.' +
|
|
||||||
'youtube.com/playlist?list=PLWKjhJtqVAbkso-IbgiiP48n-O-JQA9PJ\n▶' +
|
|
||||||
'Clean Code: https://www.youtube.com/playlist?list=PLWKjhJtqVAbkK2' +
|
|
||||||
"4EaPurzMq0-kw5U9pJh\n\n-\nWe're busy people who learn to code, " +
|
|
||||||
'then practice by building projects for nonprofits. Learn ' +
|
|
||||||
'Full-stack JavaScript, build a portfolio, and get great ' +
|
|
||||||
'references with our open source community.\n\nJoin our community ' +
|
|
||||||
'at https://freecodecamp.com\nFollow us on twitter: ' +
|
|
||||||
'https://twitter.com/freecodecamp\nLike us on Facebook: https://' +
|
|
||||||
'www.facebook.com/freecodecamp\nFollow Quincy on Quora: https://' +
|
|
||||||
'www.quora.com/Quincy-Larson'
|
|
||||||
);
|
|
||||||
const expected =
|
|
||||||
'Learn how Symbols work in JavaScript ES6! Code: <20> ' +
|
|
||||||
'More info: <20> <20> Beau Carnes on Twitter: ⭐JavaScript Playlists⭐ ▶' +
|
|
||||||
'JavaScript Basics: ▶ES6: ▶Design Patterns: ▶Data Structures and ' +
|
|
||||||
"Algorithms: ▶Clean Code: - We're busy people who learn to code, " +
|
|
||||||
'then practice by building projects for nonprofits. Learn Full-stack ' +
|
|
||||||
'JavaScript, build a portfolio, and get great references with our ' +
|
|
||||||
'open source community. Join our community at Follow us on twitter: ' +
|
|
||||||
'Like us on Facebook: Follow Quincy on Quora:';
|
|
||||||
expect(value).toEqual(expected);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('stripHTML', () => {
|
|
||||||
it('should remove simple html tags from a block of text', () => {
|
|
||||||
const value = stripHTML(
|
|
||||||
"Now we've proven that every HTML page has a <code>body</code> " +
|
|
||||||
'element, and that its <code>body</code> element can also be ' +
|
|
||||||
'styled with CSS.Remember, you can style your <code>body</code> ' +
|
|
||||||
'element just like any other HTML element, and all your other ' +
|
|
||||||
"elements will inherit your <code>body</code> element's styles." +
|
|
||||||
'First, create a <code>h1</code> element with the text <code>' +
|
|
||||||
"Hello World</code>Then, let's give all elements on your page the " +
|
|
||||||
'color of <code>green</code> by adding <code>color: green;</code> ' +
|
|
||||||
'to...'
|
|
||||||
);
|
|
||||||
const expected =
|
|
||||||
"Now we've proven that every HTML page has a body " +
|
|
||||||
'element, and that its body element can also be styled with CSS.' +
|
|
||||||
'Remember, you can style your body element just like any other HTML ' +
|
|
||||||
'element, and all your other elements will inherit your body ' +
|
|
||||||
"element's styles.First, create a h1 element with the text Hello " +
|
|
||||||
"WorldThen, let's give all elements on your page the color of green " +
|
|
||||||
'by adding color: green; to...';
|
|
||||||
expect(value).toEqual(expected);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should remove escaped tags from a block of text', () => {
|
|
||||||
const value = stripHTML(
|
|
||||||
'You can add images to your website by using the <code>img</code> ' +
|
|
||||||
"element, and point to a specific image's URL using the " +
|
|
||||||
'<code>src</code> attribute.An example of this would be:<code>' +
|
|
||||||
'<img src="https://www.your-image-source.com/your-image.jpg"' +
|
|
||||||
'></code>All <code>img</code> elements <strong>must</strong> ' +
|
|
||||||
'have an <code>alt</code> attribute. The text inside an ' +
|
|
||||||
'<code>alt</code> attribute is used for screen readers to improve ' +
|
|
||||||
"accessibility and is displayed if the image fails to load.Let's " +
|
|
||||||
'add an <code>alt</code> attribute to our <code>img</code> example ' +
|
|
||||||
'above:<code><img src="https://www.your-image-source.com/your-' +
|
|
||||||
'image.jpg" alt="Author standing on a beach with two thumbs up. ' +
|
|
||||||
'"></code>Note that in most cases, <code>img</code> elements ' +
|
|
||||||
'are self-closing.Try it with this image:<code>https://bit.ly/fcc-' +
|
|
||||||
'relaxing-cat</code>'
|
|
||||||
);
|
|
||||||
// the best I could do here is allow things like
|
|
||||||
// image:https://bit.ly/fcc-relaxing-cat
|
|
||||||
// trying to strip this further could invalidate other entries
|
|
||||||
const expected =
|
|
||||||
'You can add images to your website by using the img ' +
|
|
||||||
"element, and point to a specific image's URL using the src " +
|
|
||||||
'attribute.An example of this would be:All img elements must have ' +
|
|
||||||
'an alt attribute. The text inside an alt attribute is used for ' +
|
|
||||||
'screen readers to improve accessibility and is displayed if the ' +
|
|
||||||
"image fails to load.Let's add an alt attribute to our img example " +
|
|
||||||
'above:Note that in most cases, img elements are self-closing.Try ' +
|
|
||||||
'it with this image:https://bit.ly/fcc-relaxing-cat';
|
|
||||||
expect(value).toEqual(expected);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
Reference in New Issue
Block a user