While running def translate_a_single_sentence(translation_config):
I have encountered an error in which the file en-de.tgz is not recognized as a gzip file. How could I do?
Below, it is reported the snippet of the error :
` downloading en-de.tgz
C:\Users..\pytorch-original-transformer\data\iwslt\en-de.tgz: 97.4kB [00:00, 1.60MB/s]
BadGzipFile Traceback (most recent call last)
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1669 try:
-> 1670 t = cls.taropen(name, mode, fileobj, **kwargs)
1671 except OSError:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in taropen(cls, name, mode, fileobj, **kwargs)
1646 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
-> 1647 return cls(name, mode, fileobj, **kwargs)
1648
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in init(self, name, mode, fileobj, format, tarinfo, dereference, ignore_zeros, encoding, errors, pax_headers, debug, errorlevel, copybufsize)
1509 self.firstmember = None
-> 1510 self.firstmember = self.next()
1511
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in next(self)
2310 try:
-> 2311 tarinfo = self.tarinfo.fromtarfile(self)
2312 except EOFHeaderError as e:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in fromtarfile(cls, tarfile)
1101 """
-> 1102 buf = tarfile.fileobj.read(BLOCKSIZE)
1103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
291 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 292 return self._buffer.read(size)
293
~\anaconda3\envs\pytorch-transformer\lib_compression.py in readinto(self, b)
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
478 self._init_read()
--> 479 if not self._read_gzip_header():
480 self._size = self._pos
~\anaconda3\envs\pytorch-transformer\lib\gzip.py in _read_gzip_header(self)
426 if magic != b'\037\213':
--> 427 raise BadGzipFile('Not a gzipped file (%r)' % magic)
428
BadGzipFile: Not a gzipped file (b'<!')
During handling of the above exception, another exception occurred:
ReadError Traceback (most recent call last)
in
85
86 # Translate the given source sentence
---> 87 translate_a_single_sentence(translation_config)
in translate_a_single_sentence(translation_config)
5 print(2)
6 # Step 1: Prepare the field processor (tokenizer, numericalizer)
----> 7 _, _, src_field_processor, trg_field_processor = get_datasets_and_vocabs(
8 translation_config['dataset_path'],
9 translation_config['language_direction'],
in get_datasets_and_vocabs(dataset_path, language_direction, use_iwslt, use_caching_mechanism)
41 dataset_split_fn = datasets.IWSLT.splits if use_iwslt else datasets.WMT14.splits
42
---> 43 train_dataset, val_dataset, test_dataset = dataset_split_fn(
44 exts=(src_ext, trg_ext),
45 fields=fields,
~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\datasets\translation.py in splits(cls, exts, fields, root, train, validation, test, **kwargs)
142 cls.urls = [cls.base_url.format(exts[0][1:], exts[1][1:], cls.dirname)]
143 check = os.path.join(root, cls.name, cls.dirname)
--> 144 path = cls.download(root, check=check)
145
146 train = '.'.join([train, cls.dirname])
~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\data\dataset.py in download(cls, root, check)
189 # tarfile cannot handle bare .gz files
190 elif ext == '.tgz' or ext == '.gz' and ext_inner == '.tar':
--> 191 with tarfile.open(zpath, 'r:gz') as tar:
192 dirs = [member for member in tar.getmembers()]
193 tar.extractall(path=path, members=dirs)
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1615 else:
1616 raise CompressionError("unknown compression type %r" % comptype)
-> 1617 return func(name, filemode, fileobj, **kwargs)
1618
1619 elif "|" in mode:
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1672 fileobj.close()
1673 if mode == 'r':
-> 1674 raise ReadError("not a gzip file")
1675 raise
1676 except:
ReadError: not a gzip file`
Thank you very much!