106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
const { Observable } = require('rxjs');
 | 
						|
const fse = require('fs-extra');
 | 
						|
const file = require('file');
 | 
						|
const _ = require('lodash');
 | 
						|
const { isURL } = require('validator');
 | 
						|
const stripTags = require('striptags');
 | 
						|
const Entities = require('html-entities').AllHtmlEntities;
 | 
						|
 | 
						|
const entities = new Entities();
 | 
						|
 | 
						|
const isAFileRE = /(\.md|\.jsx?|\.html?)$/;
 | 
						|
const isJSRE = /\.jsx?$/;
 | 
						|
const shouldBeIgnoredRE = /^(\_|\.)/;
 | 
						|
const excludedDirs = ['search'];
 | 
						|
const guideSvnRE = /guides\/svn$/;
 | 
						|
 | 
						|
exports.isAFileRE = isAFileRE;
 | 
						|
exports.isJSRE = isJSRE;
 | 
						|
exports.shouldBeIgnoredRE = shouldBeIgnoredRE;
 | 
						|
exports.excludedDirs = excludedDirs;
 | 
						|
 | 
						|
/*
 | 
						|
 *                   *
 | 
						|
 * Directory Helpers *
 | 
						|
 *                   *
 | 
						|
 */
 | 
						|
 | 
						|
exports.listDirectory = function listDirectory(start) {
 | 
						|
  let allDirs = [];
 | 
						|
  file.walkSync(start, dirPath => {
 | 
						|
    if (dirPath.includes('.svn')) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    allDirs = [...allDirs, dirPath];
 | 
						|
  });
 | 
						|
  return allDirs.filter(name => !guideSvnRE.test(name));
 | 
						|
};
 | 
						|
 | 
						|
function readDir(dir = __dirname, returnFiles = false) {
 | 
						|
  const dirContent = fse
 | 
						|
    .readdirSync(dir)
 | 
						|
    .filter(dir => !excludedDirs.includes(dir))
 | 
						|
    .filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file)))
 | 
						|
    .filter(file => file !== 'LICENSE.md');
 | 
						|
  return returnFiles
 | 
						|
    ? dirContent
 | 
						|
    : dirContent.filter(item => !isAFileRE.test(item));
 | 
						|
}
 | 
						|
 | 
						|
exports.readDir = readDir;
 | 
						|
 | 
						|
exports.parseDirectory = function parseDirectory(dirLevel, cb) {
 | 
						|
  return Observable.from(readDir(dirLevel)).flatMap(dir => {
 | 
						|
    const dirPath = `${dirLevel}/${dir}`;
 | 
						|
    const subDirs = readDir(dirPath);
 | 
						|
    if (!subDirs) {
 | 
						|
      cb(dirPath);
 | 
						|
      return Observable.of(null);
 | 
						|
    }
 | 
						|
    cb(dirPath);
 | 
						|
    return parseDirectory(dirPath, cb);
 | 
						|
  });
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 *                  *
 | 
						|
 * Document Helpers *
 | 
						|
 *                  *
 | 
						|
 */
 | 
						|
 | 
						|
exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) {
 | 
						|
  const baseDoc = _.pick(doc, pickFields);
 | 
						|
  const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g);
 | 
						|
  if (!chunks) {
 | 
						|
    return [doc];
 | 
						|
  }
 | 
						|
  return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk }));
 | 
						|
};
 | 
						|
 | 
						|
function stripURLs(str) {
 | 
						|
  return str
 | 
						|
    .split(/\s/)
 | 
						|
    .filter(subStr => !_.isEmpty(subStr))
 | 
						|
    .filter(subStr => !isURL(subStr))
 | 
						|
    .join(' ');
 | 
						|
}
 | 
						|
 | 
						|
function fixEntities(str) {
 | 
						|
  let newStr = str.slice(0);
 | 
						|
  function entitiesFixer(match) {
 | 
						|
    const tmpArr = match.split('');
 | 
						|
    const fixed =
 | 
						|
      tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]);
 | 
						|
    newStr = newStr.split(match).join(fixed);
 | 
						|
  }
 | 
						|
  str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer);
 | 
						|
  return newStr;
 | 
						|
}
 | 
						|
 | 
						|
exports.stripURLs = stripURLs;
 | 
						|
 | 
						|
exports.stripHTML = function stripHTML(text) {
 | 
						|
  const unescapedStr = entities.decode(fixEntities(text));
 | 
						|
  return stripTags(unescapedStr);
 | 
						|
};
 |