| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  | --- | 
					
						
							|  |  |  | title: Tokenize a string with escaping | 
					
						
							|  |  |  | id: 594faaab4e2a8626833e9c3d | 
					
						
							|  |  |  | challengeType: 5 | 
					
						
							| 
									
										
										
										
											2019-08-05 09:17:33 -07:00
										 |  |  | forumTopicId: 302338 | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  | --- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## Description
 | 
					
						
							|  |  |  | <section id='description'> | 
					
						
							|  |  |  | Write a function or program that can split a string at each non-escaped occurrence of a separator character. | 
					
						
							|  |  |  | It should accept three input parameters: | 
					
						
							| 
									
										
										
										
											2019-03-10 22:12:52 +09:00
										 |  |  | <ul> | 
					
						
							| 
									
										
										
										
											2019-06-14 20:04:16 +09:00
										 |  |  |   <li>The <strong>string</strong></li> | 
					
						
							|  |  |  |   <li>The <strong>separator character</strong></li> | 
					
						
							|  |  |  |   <li>The <strong>escape character</strong></li> | 
					
						
							| 
									
										
										
										
											2019-03-10 22:12:52 +09:00
										 |  |  | </ul> | 
					
						
							|  |  |  | It should output a list of strings. | 
					
						
							|  |  |  | Rules for splitting: | 
					
						
							|  |  |  | <ul> | 
					
						
							|  |  |  |   <li>The fields that were separated by the separators, become the elements of the output list.</li> | 
					
						
							|  |  |  |   <li>Empty fields should be preserved, even at the start and end.</li> | 
					
						
							|  |  |  | </ul> | 
					
						
							|  |  |  | Rules for escaping: | 
					
						
							|  |  |  | <ul> | 
					
						
							|  |  |  |   <li>"Escaped" means preceded by an occurrence of the escape character that is not already escaped itself.</li> | 
					
						
							|  |  |  |   <li>When the escape character precedes a character that has no special meaning, it still counts as an escape (but does not do anything special).</li> | 
					
						
							|  |  |  |   <li>Each occurrences of the escape character that was used to escape something, should not become part of the output.</li> | 
					
						
							|  |  |  | </ul> | 
					
						
							|  |  |  | Demonstrate that your function satisfies the following test-case: | 
					
						
							|  |  |  | Given the string | 
					
						
							|  |  |  | <pre>one^|uno||three^^^^|four^^^|^cuatro|</pre> | 
					
						
							|  |  |  | and using <code>|</code> as a separator and <code>^</code> as escape character, your function should output the following array: | 
					
						
							|  |  |  | <pre> | 
					
						
							|  |  |  |   ['one|uno', '', 'three^^', 'four^|cuatro', ''] | 
					
						
							|  |  |  | </pre> | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  | </section> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## Instructions
 | 
					
						
							|  |  |  | <section id='instructions'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </section> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## Tests
 | 
					
						
							|  |  |  | <section id='tests'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ```yml | 
					
						
							| 
									
										
										
										
											2018-10-04 14:37:37 +01:00
										 |  |  | tests: | 
					
						
							|  |  |  |   - text: <code>tokenize</code> is a function. | 
					
						
							| 
									
										
										
										
											2019-07-26 05:24:52 -07:00
										 |  |  |     testString: assert(typeof tokenize === 'function'); | 
					
						
							| 
									
										
										
										
											2018-10-04 14:37:37 +01:00
										 |  |  |   - text: <code>tokenize</code> should return an array. | 
					
						
							| 
									
										
										
										
											2019-07-26 05:24:52 -07:00
										 |  |  |     testString: assert(typeof tokenize('a', 'b', 'c') === 'object'); | 
					
						
							| 
									
										
										
										
											2019-03-19 15:04:03 +05:30
										 |  |  |   - text: <code>tokenize('one^|uno||three^^^^|four^^^|^cuatro|', '|', '^') </code> should return <code>['one|uno', '', 'three^^', 'four^|cuatro', '']</code> | 
					
						
							| 
									
										
										
										
											2019-07-26 05:24:52 -07:00
										 |  |  |     testString: assert.deepEqual(tokenize(testStr1, '|', '^'), res1); | 
					
						
							| 
									
										
										
										
											2018-10-20 21:02:47 +03:00
										 |  |  |   - text: <code>tokenize('a@&bcd&ef&&@@hi', '&', '@')</code> should return <code>['a&bcd', 'ef', '', '@hi']</code> | 
					
						
							| 
									
										
										
										
											2019-07-26 05:24:52 -07:00
										 |  |  |     testString: assert.deepEqual(tokenize(testStr2, '&', '@'), res2); | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | ``` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </section> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## Challenge Seed
 | 
					
						
							|  |  |  | <section id='challengeSeed'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div id='js-seed'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ```js | 
					
						
							|  |  |  | function tokenize(str, esc, sep) { | 
					
						
							|  |  |  |   return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | ``` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ### After Test
 | 
					
						
							|  |  |  | <div id='js-teardown'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ```js | 
					
						
							| 
									
										
										
										
											2018-10-20 21:02:47 +03:00
										 |  |  | const testStr1 = 'one^|uno||three^^^^|four^^^|^cuatro|'; | 
					
						
							|  |  |  | const res1 = ['one|uno', '', 'three^^', 'four^|cuatro', '']; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TODO add more tests | 
					
						
							|  |  |  | const testStr2 = 'a@&bcd&ef&&@@hi'; | 
					
						
							|  |  |  | const res2 = ['a&bcd', 'ef', '', '@hi']; | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  | ``` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </section> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ## Solution
 | 
					
						
							|  |  |  | <section id='solution'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ```js | 
					
						
							|  |  |  | // tokenize :: String -> Character -> Character -> [String] | 
					
						
							|  |  |  | function tokenize(str, charDelim, charEsc) { | 
					
						
							| 
									
										
										
										
											2018-10-02 15:02:53 +01:00
										 |  |  |   const dctParse = str.split('') | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  |     .reduce((a, x) => { | 
					
						
							|  |  |  |       const blnEsc = a.esc; | 
					
						
							|  |  |  |       const blnBreak = !blnEsc && x === charDelim; | 
					
						
							|  |  |  |       const blnEscChar = !blnEsc && x === charEsc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       return { | 
					
						
							|  |  |  |         esc: blnEscChar, | 
					
						
							| 
									
										
										
										
											2018-10-20 21:02:47 +03:00
										 |  |  |         token: blnBreak ? '' : ( | 
					
						
							|  |  |  |           a.token + (blnEscChar ? '' : x) | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  |         ), | 
					
						
							|  |  |  |         list: a.list.concat(blnBreak ? a.token : []) | 
					
						
							|  |  |  |       }; | 
					
						
							|  |  |  |     }, { | 
					
						
							|  |  |  |       esc: false, | 
					
						
							| 
									
										
										
										
											2018-10-20 21:02:47 +03:00
										 |  |  |       token: '', | 
					
						
							| 
									
										
										
										
											2018-09-30 23:01:58 +01:00
										 |  |  |       list: [] | 
					
						
							|  |  |  |     }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return dctParse.list.concat( | 
					
						
							|  |  |  |     dctParse.token | 
					
						
							|  |  |  |   ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ``` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </section> |