Update encoder.py to work on windows
This fixes https://github.com/openai/gpt-2/issues/26 ``` File "C:\Users\James Pollack\Desktop\gpt-2\src\encoder.py", line 112, in get_encoder bpe_data = f.read() File "C:\Anaconda\envs\gpt-2\lib\encodings\cp1252.py", line 23, in decode return codecs.charmap_decode(input,self.errors,decoding_table)[0] UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 66951: character maps to <undefined>```
This commit is contained in:
committed by
Jeff Wu
parent
ebd5894d22
commit
2d0b62225c
@ -108,7 +108,7 @@ class Encoder:
|
|||||||
def get_encoder(model_name):
|
def get_encoder(model_name):
|
||||||
with open(os.path.join('models', model_name, 'encoder.json'), 'r') as f:
|
with open(os.path.join('models', model_name, 'encoder.json'), 'r') as f:
|
||||||
encoder = json.load(f)
|
encoder = json.load(f)
|
||||||
with open(os.path.join('models', model_name, 'vocab.bpe'), 'r') as f:
|
with open(os.path.join('models', model_name, 'vocab.bpe'), 'r', encoding="utf-8") as f:
|
||||||
bpe_data = f.read()
|
bpe_data = f.read()
|
||||||
bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
|
bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
|
||||||
return Encoder(
|
return Encoder(
|
||||||
|
Reference in New Issue
Block a user