chore(cleanup): remove search indexing tools (#40070)

This commit is contained in:
Mrugesh Mohapatra
2020-11-02 11:35:59 +05:30
committed by GitHub
parent 6b45967997
commit 53ad373e47
16 changed files with 0 additions and 6087 deletions

View File

@ -4,7 +4,6 @@
"client",
"client/plugins/*",
"curriculum",
"tools/search-indexing",
"tools/challenge-md-parser",
"tools/scripts/seed",
"tools/scripts/build",

View File

@ -21,9 +21,6 @@ JWT_SECRET=a_jwt_secret
# ---------------------
# Search
# ---------------------
# Indexing
ALGOLIA_ADMIN_KEY=admin_key_from_algolia_dashboard
GHOST_CLIENT_KEY=client_key_from_ghost_dashboard
# Client Search Bar
ALGOLIA_APP_ID=app_id_from_algolia_dashboard
ALGOLIA_API_KEY=api_key_from_algolia_dashboard

View File

@ -1,9 +0,0 @@
const algoliasearch = require('algoliasearch');
const path = require('path');
const envPath = path.resolve(__dirname, '../.env');
require('dotenv').config({ path: envPath });
const { ALGOLIA_ADMIN_KEY, ALGOLIA_APP_ID } = process.env;
exports.client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_ADMIN_KEY);

View File

@ -1,40 +0,0 @@
const { client } = require('../../algolia');
const debug = require('debug');
const getChallengeData = require('../../data-sources/challenges');
const log = debug('fcc:search:init:challenge');
const index = client.initIndex('challenge');
index.setSettings(
{
searchableAttributes: ['title', 'description', 'blockName'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['blockName']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n\n' + JSON.stringify(response, null, 2));
}
);
exports.insertChallenges = function insertChallenges() {
return getChallengeData().subscribe(
challenges => {
index.addObjects(challenges, err => {
if (err) {
throw new Error(err);
}
});
},
err => {
throw new Error(err);
},
() => log('complete')
);
};

View File

@ -1,51 +0,0 @@
const { from } = require('rxjs');
const { toArray, switchMap } = require('rxjs/operators');
const _ = require('lodash');
const { client } = require('../../algolia');
const { getGuideArticleData } = require('../../data-sources/guides');
const debug = require('debug');
const log = debug('fcc:search:init:guides');
const index = client.initIndex('guide');
index.setSettings(
{
searchableAttributes: ['title', 'content', 'category'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['category']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n' + JSON.stringify(response, null, 2));
}
);
exports.insertGuides = function insertGuides() {
return getGuideArticleData()
.pipe(
toArray(),
switchMap(articles => {
const chunks = _.chunk(articles, 200).map(_.flatten);
return from(chunks);
})
)
.subscribe(
articles => {
index.addObjects(articles, err => {
if (err) {
throw new Error(err);
}
});
},
err => {
throw new Error(err);
},
() => log('complete')
);
};

View File

@ -1,35 +0,0 @@
const envPath = require('path').resolve(__dirname, '../../../.env');
require('dotenv').config({ path: envPath });
const { zip, timer, from } = require('rxjs');
/*
* The below has been commented out to avoid inadvertant
* ops usage with algolia
*/
// const { getStoryData } = require('./news');
// const { insertYoutube } = require('./youtube');
// const { insertChallenges } = require('./challenges');
// const { insertGuides } = require('./guides');
const dataSources = [
// insertGuides,
// insertChallenges
// insertYoutube,
// disable this until the roll out of news
// getStoryData
];
function init() {
return zip(timer(0, 5000), from(dataSources), (a, b) => b).subscribe(
fn => {
fn();
},
err => {
throw new Error(err);
}
);
}
init();

View File

@ -1,37 +0,0 @@
const { client } = require('../../algolia');
const debug = require('debug');
const { getYoutubeData } = require('../../data-sources/youtube');
const log = debug('fcc:search:init:youtube');
const index = client.initIndex('youtube');
index.setSettings(
{
searchableAttributes: ['title', 'description', 'playlistTitle'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['playlistTitle']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n\n' + JSON.stringify(response, null, 2));
}
);
exports.insertYoutube = function insertYoutube() {
getYoutubeData().subscribe(
videos =>
index.addObjects(videos, err => {
if (err) {
throw new Error(err);
}
}),
err => log(err, 'red'),
() => log('Complete', 'blue')
);
};

View File

@ -1,55 +0,0 @@
const { client } = require('../../algolia');
const _ = require('lodash');
const { getGuideArticleData } = require('../../data-sources/guides');
const debug = require('debug');
const log = debug('fcc:search:update:guides');
const index = client.initIndex('guides');
const concatContents = (doc, current) => ({
...current,
content: doc.content.concat([current.content])
});
exports.updateGuides = async function updateGuides() {
const newDocs = await getGuideArticleData().toPromise();
let hits = [];
const browseAll = index.browseAll();
browseAll.on('result', function onResult(content) {
hits = hits.concat(content.hits);
});
browseAll.on('end', function onEnd() {
log('Finished browsing this index');
log(`We got ${hits.length} records`);
const docIds = _.uniq(hits.map(doc => doc.id));
docIds.map(id => {
const allCurrentForId = hits.filter(doc => doc.id === id);
const allNewForId = newDocs.filter(doc => doc.id === id);
const newForId = allNewForId.reduce(concatContents, { content: [] });
const currentForId = allCurrentForId.reduce(concatContents, {
content: []
});
const isDiff = newForId.content.some(
snippet => !currentForId.content.includes(snippet)
);
if (isDiff) {
log(id, 'cyan');
const objectIDs = allCurrentForId.map(doc => doc.objectID);
index.addObjects(allNewForId, err => {
if (err) {
throw new Error(err);
}
index.deleteObjects(objectIDs, err => {
if (err) {
throw new Error(err);
}
log(`purge of stale data for ${id} complete`, 'magenta');
});
});
}
});
});
browseAll.on('error', function onError(err) {
throw err;
});
};

View File

@ -1,67 +0,0 @@
const { from, of } = require('rxjs');
const { switchMap, tap } = require('rxjs/operators');
const debug = require('debug');
const { getChallengesForLang } = require('../../../curriculum/getChallenges');
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
const log = debug('fcc:search:data-source:challenges');
const { LOCALE: lang } = process.env;
module.exports = function getChallenges() {
log('sourcing challenges');
return from(getChallengesForLang(lang)).pipe(
tap(() => log('parsing curriculum')),
switchMap(curriculum => {
const superBlocks = Object.keys(curriculum).filter(
x => x !== 'certificates'
);
return from(superBlocks.map(superBlock => curriculum[superBlock]));
}),
switchMap(superBlock => {
const { blocks } = superBlock;
return from(Object.keys(blocks).map(block => blocks[block]));
}),
switchMap(block => {
const { meta, challenges } = block;
const { dashedName: blockDashedName } = meta;
return of(
challenges.map(challenge => ({ ...challenge, blockDashedName }))
);
}),
switchMap(challenges => {
const formattedChallenges = challenges
.filter(({ isPrivate }) => !isPrivate)
.reduce((acc, current) => {
const {
id,
title,
description,
instructions,
dashedName,
superBlock,
blockDashedName,
block
} = current;
const formattedChallenge = {
blockName: block,
id,
title,
description: stripURLs(stripHTML(description.concat(instructions))),
url: `/${superBlock}/${blockDashedName}/${dashedName}`
};
return [
...acc,
...chunkDocument(
formattedChallenge,
['title', 'id', 'blockName', 'url'],
'description'
)
];
}, []);
return of(formattedChallenges);
})
);
};

View File

@ -1,57 +0,0 @@
const path = require('path');
const fs = require('fs-extra');
const { Observable } = require('rxjs');
const { map, filter } = require('rxjs/operators');
const readdirp = require('readdirp-walk');
const matter = require('gray-matter');
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
const { LOCALE: lang = 'english' } = process.env;
const selectedGuideDir = `../../../guide/${lang}`;
const guideRoot = path.resolve(__dirname, selectedGuideDir);
function fileStream(root) {
return Observable.create(observer =>
readdirp({ root, fileFilter: '*.md' })
.on('data', file => (file.stat.isFile() ? observer.next(file) : null))
.on('error', e => observer.error(e))
.on('end', () => observer.complete())
);
}
function parseFile(file) {
const fileContents = fs.readFileSync(file.fullPath);
return { ...file, ...matter(fileContents) };
}
function buildArticle(file) {
const {
path,
content,
data: { title }
} = file;
const url = path.replace(/\/index.md$/, '');
const article = {
content: stripURLs(stripHTML(content)),
category: url.split('/').filter(Boolean)[0],
title,
url: `/${url}`,
id: url.replace('/', '-')
};
return chunkDocument(article, ['title', 'url', 'id', 'category'], 'content');
}
function filterStubs(articleChunks) {
return !articleChunks.some(chunk =>
chunk.content.includes('This is a stub. Help our community expand it')
);
}
exports.getGuideArticleData = () =>
fileStream(guideRoot).pipe(
map(file => parseFile(file)),
map(file => buildArticle(file)),
filter(article => filterStubs(article))
);

View File

@ -1,62 +0,0 @@
const path = require('path');
const envPath = path.resolve(__dirname, '../../../.env');
require('dotenv').config({ path: envPath });
const { GHOST_CLIENT_KEY } = process.env;
const axios = require('axios');
const fs = require('fs');
const getJson = async url => {
return axios
.get(url)
.then(res => res.data)
.catch(err => console.log(err));
};
const constructIndex = async () => {
let currPage = 1;
let lastPage = 5;
const delay = m => new Promise(resolve => setTimeout(resolve, m));
const posts = [];
while (currPage && currPage <= lastPage) {
const data = await getJson(
`https://www.freecodecamp.org/news/ghost/api/v2/content/posts/?key=${GHOST_CLIENT_KEY}&include=tags,authors&page=${currPage}`
);
data.posts.forEach(post => {
const thisPost = {
title: post.title,
author: {
name: post.primary_author.name,
url: post.primary_author.url,
profileImage: post.primary_author.profile_image
},
tags: post.tags.map(obj => {
return {
name: obj.name,
url: obj.url
};
}),
url: post.url,
featureImage: post.feature_image,
ghostId: post.id,
publishedAt: post.published_at
};
posts.push(thisPost);
});
currPage = data.meta.pagination.next;
lastPage = data.meta.pagination.pages;
console.log(posts);
fs.writeFileSync('posts.json', JSON.stringify(posts, null, 2));
await delay(1000);
}
};
constructIndex();

View File

@ -1,124 +0,0 @@
const { timer, from, zip, iif, of } = require('rxjs');
const { switchMap, concatMap } = require('rxjs/operators');
const { google } = require('googleapis');
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');
const { YOUTUBE_SECRET } = process.env;
const youtube = google.youtube({ version: 'v3', auth: YOUTUBE_SECRET });
function getPlaylistItems(
{ playlistId, playlistTitle },
nextPage,
currentItems = []
) {
return zip(
timer(2000),
from(
new Promise((resolve, reject) => {
youtube.playlistItems.list(
{
part: 'snippet',
playlistId,
pageToken: nextPage ? nextPage : ''
},
(err, data) => {
if (err) {
return reject(err);
}
return resolve({ ...data.data, playlistTitle });
}
);
})
),
(a, b) => b
).pipe(
switchMap(({ nextPageToken, items, playlistTitle }) => {
const allItems = currentItems.concat(items);
return iif(
() => !!nextPageToken,
getPlaylistItems(
{ playlistId, playlistTitle },
nextPageToken,
allItems
),
of({ videos: allItems, playlistTitle })
);
})
);
}
function getPlayLists(nextPage, currentItems = []) {
return from(
new Promise((resolve, reject) => {
youtube.playlists.list(
{
auth: YOUTUBE_SECRET,
part: 'snippet',
channelId: 'UC8butISFwT-Wl7EV0hUK0BQ',
pageToken: nextPage ? nextPage : ''
},
(err, data) => {
if (err) {
return reject(err);
}
return resolve(data.data);
}
);
})
).pipe(
switchMap(({ nextPageToken, items }) => {
const allItems = items.concat(currentItems);
return iif(
() => !!nextPageToken,
getPlayLists(nextPageToken, allItems),
of(allItems)
);
})
);
}
exports.getYoutubeData = function getYoutubeData() {
return getPlayLists().pipe(
switchMap(playlists => {
return from(playlists).pipe(
concatMap(({ id, snippet: { title } }) =>
getPlaylistItems({ playlistId: id, playlistTitle: title })
)
);
}),
switchMap(({ videos, playlistTitle }) => {
const formattedVideos = videos
.map(video => {
const {
id,
snippet: {
title,
description,
resourceId: { videoId },
thumbnails
}
} = video;
return {
id,
videoId,
title: stripHTML(title),
description: stripURLs(stripHTML(description)),
thumbnails,
playlistTitle
};
})
.reduce(
(chunked, current) => [
...chunked,
...chunkDocument(
current,
['id', 'videoId', 'title', 'thumbnail', 'playlistTitle'],
'description'
)
],
[]
);
return of(formattedVideos);
})
);
};

File diff suppressed because it is too large Load Diff

View File

@ -1,50 +0,0 @@
{
"name": "@freecodecamp/search-indexing",
"private": true,
"version": "0.0.1",
"description": "A repository to hold all of our search assets",
"main": "N/A",
"scripts": {
"dev": "nodemon server",
"init": "node init",
"build:server": "npx babel server --out-dir lib --ignore spec.js,test.js",
"format": "prettier --single-quote es5 --write './!(packages|www)/**/*.js'",
"test": "jest"
},
"repository": {
"type": "git",
"url": "https://github.com/freeCodeCamp/freeCodeCamp.git"
},
"keywords": [
"search",
"algolia",
"freeCodecamp"
],
"author": "freeCodeCamp",
"license": "BSD-3-Clause",
"bugs": {
"url": "https://github.com/freeCodeCamp/freeCodeCamp/issues"
},
"homepage": "https://github.com/freeCodeCamp/freeCodeCamp#readme",
"dependencies": {
"algoliasearch": "^3.25.1",
"axios": "^0.19.0",
"chalk": "^2.3.2",
"debug": "^4.1.1",
"dotenv": "^6.2.0",
"file": "^0.2.2",
"fs-extra": "^7.0.1",
"googleapis": "^37.2.0",
"gray-matter": "^4.0.2",
"lodash": "^4.17.19",
"readdirp-walk": "^1.7.0",
"rxjs": "^6.4.0",
"secure-compare": "^3.0.1",
"validator": "^10.11.0"
},
"devDependencies": {
"html-entities": "^1.2.1",
"jest": "^24.9.0",
"striptags": "^3.1.1"
}
}

View File

@ -1,105 +0,0 @@
const { Observable } = require('rxjs');
const fse = require('fs-extra');
const file = require('file');
const _ = require('lodash');
const { isURL } = require('validator');
const stripTags = require('striptags');
const Entities = require('html-entities').AllHtmlEntities;
const entities = new Entities();
const isAFileRE = /(\.md|\.jsx?|\.html?)$/;
const isJSRE = /\.jsx?$/;
const shouldBeIgnoredRE = /^(\_|\.)/;
const excludedDirs = ['search'];
const guideSvnRE = /guides\/svn$/;
exports.isAFileRE = isAFileRE;
exports.isJSRE = isJSRE;
exports.shouldBeIgnoredRE = shouldBeIgnoredRE;
exports.excludedDirs = excludedDirs;
/*
* *
* Directory Helpers *
* *
*/
exports.listDirectory = function listDirectory(start) {
let allDirs = [];
file.walkSync(start, dirPath => {
if (dirPath.includes('.svn')) {
return;
}
allDirs = [...allDirs, dirPath];
});
return allDirs.filter(name => !guideSvnRE.test(name));
};
function readDir(dir = __dirname, returnFiles = false) {
const dirContent = fse
.readdirSync(dir)
.filter(dir => !excludedDirs.includes(dir))
.filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file)))
.filter(file => file !== 'LICENSE.md');
return returnFiles
? dirContent
: dirContent.filter(item => !isAFileRE.test(item));
}
exports.readDir = readDir;
exports.parseDirectory = function parseDirectory(dirLevel, cb) {
return Observable.from(readDir(dirLevel)).flatMap(dir => {
const dirPath = `${dirLevel}/${dir}`;
const subDirs = readDir(dirPath);
if (!subDirs) {
cb(dirPath);
return Observable.of(null);
}
cb(dirPath);
return parseDirectory(dirPath, cb);
});
};
/*
* *
* Document Helpers *
* *
*/
exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) {
const baseDoc = _.pick(doc, pickFields);
const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g);
if (!chunks) {
return [doc];
}
return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk }));
};
function stripURLs(str) {
return str
.split(/\s/)
.filter(subStr => !_.isEmpty(subStr))
.filter(subStr => !isURL(subStr))
.join(' ');
}
function fixEntities(str) {
let newStr = str.slice(0);
function entitiesFixer(match) {
const tmpArr = match.split('');
const fixed =
tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]);
newStr = newStr.split(match).join(fixed);
}
str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer);
return newStr;
}
exports.stripURLs = stripURLs;
exports.stripHTML = function stripHTML(text) {
const unescapedStr = entities.decode(fixEntities(text));
return stripTags(unescapedStr);
};

View File

@ -1,128 +0,0 @@
/* global expect */
const _ = require('lodash');
const { stripURLs, stripHTML } = require('../');
describe('utils', () => {
describe('stripURLs', () => {
it('should return a string', () => {
const value = stripURLs('some string');
expect(_.isString(value)).toBe(true);
});
it('should remove a url from the input', () => {
const value = stripURLs('https://freecodecamp.org/en/challenges');
expect(value).toEqual('');
});
it('should leave non-urls intact', () => {
const value = stripURLs(
'Some text before https://freecodecamp.org/en/challenges some text ' +
'after'
);
const expected = 'Some text before some text after';
expect(value).toEqual(expected);
});
it('should preserve punctuation, but remove new line chars', () => {
const value = stripURLs(
"http://search.freecodecamp.org\nI'm inclined to say, is just amazing!"
);
const expected = "I'm inclined to say, is just amazing!";
expect(value).toEqual(expected);
});
it('should remove urls from a block of text', () => {
const value = stripURLs(
'Learn how Symbols work in JavaScript ES6!\n\nCode:\n<> ' +
'http://codepen.io/beaucarnes/pen/ZLQEWx?editors=0011\n\nMore ' +
'info:\n<> http://www.2ality.com/2014/12/es6-symbols.html\n<> ' +
'http://exploringjs.com/es6/ch_symbols.html\n\nBeau Carnes on ' +
'Twitter: https://twitter.com/carnesbeau\n\n⭐JavaScript Playlists⭐' +
'\n▶JavaScript Basics: https://www.youtube.com/playlist?list=' +
'PLWKjhJtqVAbk2qRZtWSzCIN38JC_NdhW5\n▶ES6: https://www.youtube.com' +
'/playlist?list=PLWKjhJtqVAbljtmmeS0c-CEl2LdE-eR_F\n▶Design ' +
'Patterns: https://www.youtube.com/playlist?list=PLWKjhJtqVAbnZtkA' +
'I3BqcYxKnfWn_C704\n▶Data Structures and Algorithms: https://www.' +
'youtube.com/playlist?list=PLWKjhJtqVAbkso-IbgiiP48n-O-JQA9PJ\n▶' +
'Clean Code: https://www.youtube.com/playlist?list=PLWKjhJtqVAbkK2' +
"4EaPurzMq0-kw5U9pJh\n\n-\nWe're busy people who learn to code, " +
'then practice by building projects for nonprofits. Learn ' +
'Full-stack JavaScript, build a portfolio, and get great ' +
'references with our open source community.\n\nJoin our community ' +
'at https://freecodecamp.com\nFollow us on twitter: ' +
'https://twitter.com/freecodecamp\nLike us on Facebook: https://' +
'www.facebook.com/freecodecamp\nFollow Quincy on Quora: https://' +
'www.quora.com/Quincy-Larson'
);
const expected =
'Learn how Symbols work in JavaScript ES6! Code: <20> ' +
'More info: <20> <20> Beau Carnes on Twitter: ⭐JavaScript Playlists⭐ ▶' +
'JavaScript Basics: ▶ES6: ▶Design Patterns: ▶Data Structures and ' +
"Algorithms: ▶Clean Code: - We're busy people who learn to code, " +
'then practice by building projects for nonprofits. Learn Full-stack ' +
'JavaScript, build a portfolio, and get great references with our ' +
'open source community. Join our community at Follow us on twitter: ' +
'Like us on Facebook: Follow Quincy on Quora:';
expect(value).toEqual(expected);
});
});
describe('stripHTML', () => {
it('should remove simple html tags from a block of text', () => {
const value = stripHTML(
"Now we've proven that every HTML page has a <code>body</code> " +
'element, and that its <code>body</code> element can also be ' +
'styled with CSS.Remember, you can style your <code>body</code> ' +
'element just like any other HTML element, and all your other ' +
"elements will inherit your <code>body</code> element's styles." +
'First, create a <code>h1</code> element with the text <code>' +
"Hello World</code>Then, let's give all elements on your page the " +
'color of <code>green</code> by adding <code>color: green;</code> ' +
'to...'
);
const expected =
"Now we've proven that every HTML page has a body " +
'element, and that its body element can also be styled with CSS.' +
'Remember, you can style your body element just like any other HTML ' +
'element, and all your other elements will inherit your body ' +
"element's styles.First, create a h1 element with the text Hello " +
"WorldThen, let's give all elements on your page the color of green " +
'by adding color: green; to...';
expect(value).toEqual(expected);
});
it('should remove escaped tags from a block of text', () => {
const value = stripHTML(
'You can add images to your website by using the <code>img</code> ' +
"element, and point to a specific image's URL using the " +
'<code>src</code> attribute.An example of this would be:<code>' +
'&#60img src="https://www.your-image-source.com/your-image.jpg"' +
'&#62</code>All <code>img</code> elements <strong>must</strong> ' +
'have an <code>alt</code> attribute. The text inside an ' +
'<code>alt</code> attribute is used for screen readers to improve ' +
"accessibility and is displayed if the image fails to load.Let's " +
'add an <code>alt</code> attribute to our <code>img</code> example ' +
'above:<code>&#60img src="https://www.your-image-source.com/your-' +
'image.jpg" alt="Author standing on a beach with two thumbs up. ' +
'"&#62</code>Note that in most cases, <code>img</code> elements ' +
'are self-closing.Try it with this image:<code>https://bit.ly/fcc-' +
'relaxing-cat</code>'
);
// the best I could do here is allow things like
// image:https://bit.ly/fcc-relaxing-cat
// trying to strip this further could invalidate other entries
const expected =
'You can add images to your website by using the img ' +
"element, and point to a specific image's URL using the src " +
'attribute.An example of this would be:All img elements must have ' +
'an alt attribute. The text inside an alt attribute is used for ' +
'screen readers to improve accessibility and is displayed if the ' +
"image fails to load.Let's add an alt attribute to our img example " +
'above:Note that in most cases, img elements are self-closing.Try ' +
'it with this image:https://bit.ly/fcc-relaxing-cat';
expect(value).toEqual(expected);
});
});
});