2018-09-30 23:01:58 +01:00
|
|
|
---
|
|
|
|
id: 594faaab4e2a8626833e9c3d
|
2020-11-27 19:02:05 +01:00
|
|
|
title: Tokenize a string with escaping
|
2018-09-30 23:01:58 +01:00
|
|
|
challengeType: 5
|
2019-08-05 09:17:33 -07:00
|
|
|
forumTopicId: 302338
|
2021-01-13 03:31:00 +01:00
|
|
|
dashedName: tokenize-a-string-with-escaping
|
2018-09-30 23:01:58 +01:00
|
|
|
---
|
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
# --description--
|
|
|
|
|
2018-09-30 23:01:58 +01:00
|
|
|
Write a function or program that can split a string at each non-escaped occurrence of a separator character.
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2018-09-30 23:01:58 +01:00
|
|
|
It should accept three input parameters:
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
<ul>
|
2019-06-14 20:04:16 +09:00
|
|
|
<li>The <strong>string</strong></li>
|
|
|
|
<li>The <strong>separator character</strong></li>
|
|
|
|
<li>The <strong>escape character</strong></li>
|
2019-03-10 22:12:52 +09:00
|
|
|
</ul>
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
It should output a list of strings.
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
Rules for splitting:
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
<ul>
|
|
|
|
<li>The fields that were separated by the separators, become the elements of the output list.</li>
|
|
|
|
<li>Empty fields should be preserved, even at the start and end.</li>
|
|
|
|
</ul>
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
Rules for escaping:
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
<ul>
|
|
|
|
<li>"Escaped" means preceded by an occurrence of the escape character that is not already escaped itself.</li>
|
|
|
|
<li>When the escape character precedes a character that has no special meaning, it still counts as an escape (but does not do anything special).</li>
|
|
|
|
<li>Each occurrences of the escape character that was used to escape something, should not become part of the output.</li>
|
|
|
|
</ul>
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
Demonstrate that your function satisfies the following test-case:
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
Given the string
|
2020-11-27 19:02:05 +01:00
|
|
|
|
2019-03-10 22:12:52 +09:00
|
|
|
<pre>one^|uno||three^^^^|four^^^|^cuatro|</pre>
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
and using `|` as a separator and `^` as escape character, your function should output the following array:
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
<pre> ['one|uno', '', 'three^^', 'four^|cuatro', '']
|
|
|
|
</pre>
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
# --hints--
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
`tokenize` should be a function.
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
```js
|
|
|
|
assert(typeof tokenize === 'function');
|
2018-09-30 23:01:58 +01:00
|
|
|
```
|
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
`tokenize` should return an array.
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
```js
|
|
|
|
assert(typeof tokenize('a', 'b', 'c') === 'object');
|
|
|
|
```
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
`tokenize('one^|uno||three^^^^|four^^^|^cuatro|', '|', '^')` should return `['one|uno', '', 'three^^', 'four^|cuatro', '']`
|
2018-09-30 23:01:58 +01:00
|
|
|
|
|
|
|
```js
|
2020-11-27 19:02:05 +01:00
|
|
|
assert.deepEqual(tokenize(testStr1, '|', '^'), res1);
|
2018-09-30 23:01:58 +01:00
|
|
|
```
|
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
`tokenize('a@&bcd&ef&&@@hi', '&', '@')` should return `['a&bcd', 'ef', '', '@hi']`
|
|
|
|
|
|
|
|
```js
|
|
|
|
assert.deepEqual(tokenize(testStr2, '&', '@'), res2);
|
|
|
|
```
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
# --seed--
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
## --after-user-code--
|
2018-09-30 23:01:58 +01:00
|
|
|
|
|
|
|
```js
|
2018-10-20 21:02:47 +03:00
|
|
|
const testStr1 = 'one^|uno||three^^^^|four^^^|^cuatro|';
|
|
|
|
const res1 = ['one|uno', '', 'three^^', 'four^|cuatro', ''];
|
|
|
|
|
|
|
|
// TODO add more tests
|
|
|
|
const testStr2 = 'a@&bcd&ef&&@@hi';
|
|
|
|
const res2 = ['a&bcd', 'ef', '', '@hi'];
|
2018-09-30 23:01:58 +01:00
|
|
|
```
|
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
## --seed-contents--
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
```js
|
|
|
|
function tokenize(str, sep, esc) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
```
|
2018-09-30 23:01:58 +01:00
|
|
|
|
2020-11-27 19:02:05 +01:00
|
|
|
# --solutions--
|
2018-09-30 23:01:58 +01:00
|
|
|
|
|
|
|
```js
|
|
|
|
// tokenize :: String -> Character -> Character -> [String]
|
|
|
|
function tokenize(str, charDelim, charEsc) {
|
2018-10-02 15:02:53 +01:00
|
|
|
const dctParse = str.split('')
|
2018-09-30 23:01:58 +01:00
|
|
|
.reduce((a, x) => {
|
|
|
|
const blnEsc = a.esc;
|
|
|
|
const blnBreak = !blnEsc && x === charDelim;
|
|
|
|
const blnEscChar = !blnEsc && x === charEsc;
|
|
|
|
|
|
|
|
return {
|
|
|
|
esc: blnEscChar,
|
2018-10-20 21:02:47 +03:00
|
|
|
token: blnBreak ? '' : (
|
|
|
|
a.token + (blnEscChar ? '' : x)
|
2018-09-30 23:01:58 +01:00
|
|
|
),
|
|
|
|
list: a.list.concat(blnBreak ? a.token : [])
|
|
|
|
};
|
|
|
|
}, {
|
|
|
|
esc: false,
|
2018-10-20 21:02:47 +03:00
|
|
|
token: '',
|
2018-09-30 23:01:58 +01:00
|
|
|
list: []
|
|
|
|
});
|
|
|
|
|
|
|
|
return dctParse.list.concat(
|
|
|
|
dctParse.token
|
|
|
|
);
|
|
|
|
}
|
|
|
|
```
|