106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			106 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
|   | const { Observable } = require('rxjs'); | ||
|  | const fse = require('fs-extra'); | ||
|  | const file = require('file'); | ||
|  | const _ = require('lodash'); | ||
|  | const { isURL } = require('validator'); | ||
|  | const stripTags = require('striptags'); | ||
|  | const Entities = require('html-entities').AllHtmlEntities; | ||
|  | 
 | ||
|  | const entities = new Entities(); | ||
|  | 
 | ||
|  | const isAFileRE = /(\.md|\.jsx?|\.html?)$/; | ||
|  | const isJSRE = /\.jsx?$/; | ||
|  | const shouldBeIgnoredRE = /^(\_|\.)/; | ||
|  | const excludedDirs = ['search']; | ||
|  | const guideSvnRE = /guides\/svn$/; | ||
|  | 
 | ||
|  | exports.isAFileRE = isAFileRE; | ||
|  | exports.isJSRE = isJSRE; | ||
|  | exports.shouldBeIgnoredRE = shouldBeIgnoredRE; | ||
|  | exports.excludedDirs = excludedDirs; | ||
|  | 
 | ||
|  | /* | ||
|  |  *                   * | ||
|  |  * Directory Helpers * | ||
|  |  *                   * | ||
|  |  */ | ||
|  | 
 | ||
|  | exports.listDirectory = function listDirectory(start) { | ||
|  |   let allDirs = []; | ||
|  |   file.walkSync(start, dirPath => { | ||
|  |     if (dirPath.includes('.svn')) { | ||
|  |       return; | ||
|  |     } | ||
|  |     allDirs = [...allDirs, dirPath]; | ||
|  |   }); | ||
|  |   return allDirs.filter(name => !guideSvnRE.test(name)); | ||
|  | }; | ||
|  | 
 | ||
|  | function readDir(dir = __dirname, returnFiles = false) { | ||
|  |   const dirContent = fse | ||
|  |     .readdirSync(dir) | ||
|  |     .filter(dir => !excludedDirs.includes(dir)) | ||
|  |     .filter(file => !(shouldBeIgnoredRE.test(file) || isJSRE.test(file))) | ||
|  |     .filter(file => file !== 'LICENSE.md'); | ||
|  |   return returnFiles | ||
|  |     ? dirContent | ||
|  |     : dirContent.filter(item => !isAFileRE.test(item)); | ||
|  | } | ||
|  | 
 | ||
|  | exports.readDir = readDir; | ||
|  | 
 | ||
|  | exports.parseDirectory = function parseDirectory(dirLevel, cb) { | ||
|  |   return Observable.from(readDir(dirLevel)).flatMap(dir => { | ||
|  |     const dirPath = `${dirLevel}/${dir}`; | ||
|  |     const subDirs = readDir(dirPath); | ||
|  |     if (!subDirs) { | ||
|  |       cb(dirPath); | ||
|  |       return Observable.of(null); | ||
|  |     } | ||
|  |     cb(dirPath); | ||
|  |     return parseDirectory(dirPath, cb); | ||
|  |   }); | ||
|  | }; | ||
|  | 
 | ||
|  | /* | ||
|  |  *                  * | ||
|  |  * Document Helpers * | ||
|  |  *                  * | ||
|  |  */ | ||
|  | 
 | ||
|  | exports.chunkDocument = function chunkDocument(doc, pickFields, chunkField) { | ||
|  |   const baseDoc = _.pick(doc, pickFields); | ||
|  |   const chunks = doc[chunkField].match(/(?:[\n\s]+[\S]+){1,200}/g); | ||
|  |   if (!chunks) { | ||
|  |     return [doc]; | ||
|  |   } | ||
|  |   return chunks.map(chunk => ({ ...baseDoc, [chunkField]: chunk })); | ||
|  | }; | ||
|  | 
 | ||
|  | function stripURLs(str) { | ||
|  |   return str | ||
|  |     .split(/\s/) | ||
|  |     .filter(subStr => !_.isEmpty(subStr)) | ||
|  |     .filter(subStr => !isURL(subStr)) | ||
|  |     .join(' '); | ||
|  | } | ||
|  | 
 | ||
|  | function fixEntities(str) { | ||
|  |   let newStr = str.slice(0); | ||
|  |   function entitiesFixer(match) { | ||
|  |     const tmpArr = match.split(''); | ||
|  |     const fixed = | ||
|  |       tmpArr.slice(0, -1).join('') + ';'.concat(tmpArr[tmpArr.length - 1]); | ||
|  |     newStr = newStr.split(match).join(fixed); | ||
|  |   } | ||
|  |   str.replace(/&#\d\d[^(!?;)]/g, entitiesFixer); | ||
|  |   return newStr; | ||
|  | } | ||
|  | 
 | ||
|  | exports.stripURLs = stripURLs; | ||
|  | 
 | ||
|  | exports.stripHTML = function stripHTML(text) { | ||
|  |   const unescapedStr = entities.decode(fixEntities(text)); | ||
|  |   return stripTags(unescapedStr); | ||
|  | }; |