106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| 
								 | 
							
								const { Observable } = require('rxjs');
							 | 
						||
| 
								 | 
							
								const fse = require('fs-extra');
							 | 
						||
| 
								 | 
							
								const file = require('file');
							 | 
						||
| 
								 | 
							
								const _ = require('lodash');
							 | 
						||
| 
								 | 
							
								const { isURL } = require('validator');
							 | 
						||
| 
								 | 
							
								const stripTags = require('striptags');
							 | 
						||
| 
								 | 
							
								const Entities = require('html-entities').AllHtmlEntities;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								const entities = new Entities();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								const isAFileRE = /(\.md|\.jsx?|\.html?)$/;
							 | 
						||
| 
								 | 
							
								const isJSRE = /\.jsx?$/;
							 | 
						||
| 
								 | 
							
								const shouldBeIgnoredRE = /^(\_|\.)/;
							 | 
						||
| 
								 | 
							
								const excludedDirs = ['search'];
							 | 
						||
| 
								 | 
							
								const guideSvnRE = /guides\/svn$/;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.isAFileRE = isAFileRE;
							 | 
						||
| 
								 | 
							
								exports.isJSRE = isJSRE;
							 | 
						||
| 
								 | 
							
								exports.shouldBeIgnoredRE = shouldBeIgnoredRE;
							 | 
						||
| 
								 | 
							
								exports.excludedDirs = excludedDirs;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 *                   *
							 | 
						||
| 
								 | 
							
								 * Directory Helpers *
							 | 
						||
| 
								 | 
							
								 *                   *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.listDirectory = function listDirectory(start) {
							 | 
						||
| 
								 | 
							
								  let allDirs = [];
							 | 
						||
| 
								 | 
							
								  file.walkSync(start, dirPath => {
							 | 
						||
| 
								 | 
							
								    if (dirPath.includes('.svn')) {
							 | 
						||
| 
								 | 
							
								      return;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    allDirs = [...allDirs, dirPath];
							 | 
						||
| 
								 | 
							
								  });
							 | 
						||
| 
								 | 
							
								  return allDirs.filter(name => !guideSvnRE.test(name));
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function readDir(dir = __dirname, returnFiles = false) {
							 | 
						||
| 
								 | 
							
								  const dirContent = fse
							 | 
						||
| 
								 | 
							
								    .readdirSync(dir)
							 | 
						||
| 
								 | 
							
								    .filter(dir => !excludedDirs.includes(dir))
							 | 
						||
| 
								 | 
							
								    .filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file)))
							 | 
						||
| 
								 | 
							
								    .filter(file => file !== 'LICENSE.md');
							 | 
						||
| 
								 | 
							
								  return returnFiles
							 | 
						||
| 
								 | 
							
								    ? dirContent
							 | 
						||
| 
								 | 
							
								    : dirContent.filter(item => !isAFileRE.test(item));
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.readDir = readDir;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.parseDirectory = function parseDirectory(dirLevel, cb) {
							 | 
						||
| 
								 | 
							
								  return Observable.from(readDir(dirLevel)).flatMap(dir => {
							 | 
						||
| 
								 | 
							
								    const dirPath = `${dirLevel}/${dir}`;
							 | 
						||
| 
								 | 
							
								    const subDirs = readDir(dirPath);
							 | 
						||
| 
								 | 
							
								    if (!subDirs) {
							 | 
						||
| 
								 | 
							
								      cb(dirPath);
							 | 
						||
| 
								 | 
							
								      return Observable.of(null);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    cb(dirPath);
							 | 
						||
| 
								 | 
							
								    return parseDirectory(dirPath, cb);
							 | 
						||
| 
								 | 
							
								  });
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 *                  *
							 | 
						||
| 
								 | 
							
								 * Document Helpers *
							 | 
						||
| 
								 | 
							
								 *                  *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) {
							 | 
						||
| 
								 | 
							
								  const baseDoc = _.pick(doc, pickFields);
							 | 
						||
| 
								 | 
							
								  const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g);
							 | 
						||
| 
								 | 
							
								  if (!chunks) {
							 | 
						||
| 
								 | 
							
								    return [doc];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk }));
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function stripURLs(str) {
							 | 
						||
| 
								 | 
							
								  return str
							 | 
						||
| 
								 | 
							
								    .split(/\s/)
							 | 
						||
| 
								 | 
							
								    .filter(subStr => !_.isEmpty(subStr))
							 | 
						||
| 
								 | 
							
								    .filter(subStr => !isURL(subStr))
							 | 
						||
| 
								 | 
							
								    .join(' ');
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function fixEntities(str) {
							 | 
						||
| 
								 | 
							
								  let newStr = str.slice(0);
							 | 
						||
| 
								 | 
							
								  function entitiesFixer(match) {
							 | 
						||
| 
								 | 
							
								    const tmpArr = match.split('');
							 | 
						||
| 
								 | 
							
								    const fixed =
							 | 
						||
| 
								 | 
							
								      tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]);
							 | 
						||
| 
								 | 
							
								    newStr = newStr.split(match).join(fixed);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer);
							 | 
						||
| 
								 | 
							
								  return newStr;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.stripURLs = stripURLs;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.stripHTML = function stripHTML(text) {
							 | 
						||
| 
								 | 
							
								  const unescapedStr = entities.decode(fixEntities(text));
							 | 
						||
| 
								 | 
							
								  return stripTags(unescapedStr);
							 | 
						||
| 
								 | 
							
								};
							 |