From b9265e5796f5d80c18d1f9231ab234225676780b Mon Sep 17 00:00:00 2001 From: altryne Date: Wed, 7 Dec 2022 11:45:31 -0700 Subject: [PATCH] Update Hebrew language code to he per IANA registry (#401) * Update Hebrew language code to he per IANA registry Per [IANA registry](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry), `iw` was deprecated as the code for Hebrew in 1989 and the preferred code is `he` The correct subtag: ``` %% Type: language Subtag: he Description: Hebrew Added: 2005-10-16 Suppress-Script: Hebr %% ``` And the deprecation ``` %% Type: language Subtag: iw Description: Hebrew Added: 2005-10-16 Deprecated: 1989-01-01 Preferred-Value: he Suppress-Script: Hebr %% ``` * Update hebrew ISO code to he Per discussion, it's ok to make this change without backwards compatibility --- whisper/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/tokenizer.py b/whisper/tokenizer.py index 88142c4..a27cb35 100644 --- a/whisper/tokenizer.py +++ b/whisper/tokenizer.py @@ -28,7 +28,7 @@ LANGUAGES = { "hi": "hindi", "fi": "finnish", "vi": "vietnamese", - "iw": "hebrew", + "he": "hebrew", "uk": "ukrainian", "el": "greek", "ms": "malay",