diff --git a/curriculum/challenges/_meta/rosetta-code/meta.json b/curriculum/challenges/_meta/rosetta-code/meta.json index c7c610e68a..d33b8097f6 100644 --- a/curriculum/challenges/_meta/rosetta-code/meta.json +++ b/curriculum/challenges/_meta/rosetta-code/meta.json @@ -624,6 +624,10 @@ "594810f028c0303b75339ad3", "Vector dot product" ], + [ + "5e94a54cc7b022105bf0fd2c", + "Word frequency" + ], [ "594810f028c0303b75339ad4", "Word wrap" diff --git a/curriculum/challenges/english/10-coding-interview-prep/rosetta-code/word-frequency.english.md b/curriculum/challenges/english/10-coding-interview-prep/rosetta-code/word-frequency.english.md new file mode 100644 index 0000000000..50c6059e7c --- /dev/null +++ b/curriculum/challenges/english/10-coding-interview-prep/rosetta-code/word-frequency.english.md @@ -0,0 +1,122 @@ +--- +id: 5e94a54cc7b022105bf0fd2c +title: Word frequency +challengeType: 5 +isHidden: false +forumTopicId: 393913 +--- + +## Description + +
+Given a text string and an integer n, return the n most common words in the file (and the number of their occurrences) in decreasing frequency. +
+ +## Instructions + +
+Write a function to count the occurrences of each word and return the n most commons words along with the number of their occurences in decreasing frequency. +The function should return a 2D array with each of the elements in the following form: [word, freq]. word should be the lowercase version of the word and freq the number denoting the count. +The function should return an empty array, if no string is provided. +The function should be case insensitive, for example, the strings "Hello" and "hello" should be treated the same. +You can treat words that have special characters such as underscores, dashes, apostrophes, commas, etc., as distinct words. +For example, given the string "Hello hello goodbye", your function should return [['hello', 2], ['goodbye', 1]]. +
+ +## Tests + +
+ +```yml +tests: + - text: wordFrequency should be a function. + testString: assert(typeof wordFrequency == 'function'); + - text: wordFrequency should return an array. + testString: assert(Array.isArray(wordFrequency("test"))); + - text: wordFrequency("Hello hello world", 2) should return [['hello', 2], ['world', 1]] + testString: assert.deepEqual(wordFrequency(example_1, 2), example_1_solution) + - text: wordFrequency("The quick brown fox jumped over the lazy dog", 1) should return [['the', 2]] + testString: assert.deepEqual(wordFrequency(example_2, 1), example_2_solution) + - text: wordFrequency("Opensource opensource open-source open source", 1) should return [['opensource', 2]] + testString: assert.deepEqual(wordFrequency(example_3, 1), example_3_solution) + - text: wordFrequency("Apple App apply aPP aPPlE", 3) should return [['app', 2], ['apple', 2], ['apply', 1]] or [['apple', 2], ['app', 2], ['apply', 1]] + testString: const arr = JSON.stringify(wordFrequency(example_4, 3)); assert(arr === example_4_solution_a || arr === example_4_solution_b) + - text: wordFrequency("c d a d c a b d d c", 4) should return [['d', 4], ['c', 3], ['a', 2], ['b', 1]] + testString: assert.deepEqual(wordFrequency(example_5, 4), example_5_solution) + - text: wordFrequency("", 5) should return [] + testString: assert.deepEqual(wordFrequency(example_6, 5), example_6_solution) +``` + +
+ +## Challenge Seed + +
+
+ +```js +function wordFrequency(txt, n) { + // Good luck! +} +``` + +
+ +### Before Test + +
+ +```js +var example_1 = 'Hello hello world'; +var example_1_solution = [['hello', 2], ['world', 1]]; +var example_2 = 'The quick brown fox jumped over the lazy dog'; +var example_2_solution = [['the', 2]]; +var example_3 = 'Opensource opensource open-source open source'; +var example_3_solution = [['opensource', 2]]; +var example_4 = 'Apple App apply aPP aPPlE'; +var example_4_solution_a = "[[\"app\",2],[\"apple\",2],[\"apply\",1]]"; +var example_4_solution_b = "[[\"apple\",2],[\"app\",2],[\"apply\",1]]"; +var example_5 = 'c d a d c a b d d c'; +var example_5_solution = [['d', 4], ['c', 3], ['a', 2], ['b', 1]]; +var example_6 = ''; +var example_6_solution = []; +``` + +
+
+ +## Solution + +
+ +```js +function wordFrequency(txt, n) { + var words = txt.split(/\s+/); + var wordCount = {}; + words.forEach(word => { + if (word == '') { + return; + } + const lowerWord = word.toLowerCase(); + wordCount[lowerWord] = + lowerWord in wordCount ? wordCount[lowerWord] + 1 : 1; + }); + + var wordsArray = []; + for (let [word, count] of Object.entries(wordCount)) { + wordsArray.push([word, count]); + } + + wordsArray.sort((a, b) => { + if (a[1] !== b[1]) { + return b[1] - a[1]; + } else if (a[0] !== b[0]) { + return a[0] < b[0] ? -1 : 1; + } + return 0; + }); + return wordsArray.slice(0, n); +} +``` + +