首页
学习
活动
专区
工具
TVP
发布
社区首页 >问答首页 >tensorflow2中tensorflow_datasets的函数load遇到的问题?

tensorflow2中tensorflow_datasets的函数load遇到的问题?

提问于 2023-05-25 17:25:19
回答 0关注 0查看 118
代码语言:js
复制
TypeError                                 Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 (train_data,test_data,info)=tfds.load("imdb_reviews/subwords8k",
      2                                      split=(tfds.Split.TRAIN,tfds.Split.TEST),
      3                                      with_info=True,
      4                                      as_supervised=True)

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\logging\__init__.py:169, in _FunctionDecorator.__call__(self, function, instance, args, kwargs)
    167 metadata = self._start_call()
    168 try:
--> 169   return function(*args, **kwargs)
    170 except Exception:
    171   metadata.mark_error()

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\load.py:640, in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
    521 """Loads the named dataset into a `tf.data.Dataset`.
    522 
    523 `tfds.load` is a convenience method that:
   (...)
    632     Split-specific information is available in `ds_info.splits`.
    633 """
    634 dbuilder = _fetch_builder(
    635     name,
    636     data_dir,
    637     builder_kwargs,
    638     try_gcs,
    639 )
--> 640 _download_and_prepare_builder(dbuilder, download, download_and_prepare_kwargs)
    642 if as_dataset_kwargs is None:
    643   as_dataset_kwargs = {}

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\load.py:499, in _download_and_prepare_builder(dbuilder, download, download_and_prepare_kwargs)
    497 if download:
    498   download_and_prepare_kwargs = download_and_prepare_kwargs or {}
--> 499   dbuilder.download_and_prepare(**download_and_prepare_kwargs)

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\logging\__init__.py:169, in _FunctionDecorator.__call__(self, function, instance, args, kwargs)
    167 metadata = self._start_call()
    168 try:
--> 169   return function(*args, **kwargs)
    170 except Exception:
    171   metadata.mark_error()

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:646, in DatasetBuilder.download_and_prepare(self, download_dir, download_config, file_format)
    644   self.info.read_from_directory(self._data_dir)
    645 else:
--> 646   self._download_and_prepare(
    647       dl_manager=dl_manager,
    648       download_config=download_config,
    649   )
    651   # NOTE: If modifying the lines below to put additional information in
    652   # DatasetInfo, you'll likely also want to update
    653   # DatasetInfo.read_from_directory to possibly restore these attributes
    654   # when reading from package data.
    655   self.info.download_size = dl_manager.downloaded_size

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\dataset_builder.py:1498, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
   1496 else:
   1497   optional_pipeline_kwargs = {}
-> 1498 split_generators = self._split_generators(  # pylint: disable=unexpected-keyword-arg
   1499     dl_manager, **optional_pipeline_kwargs
   1500 )
   1501 # TODO(tfds): Could be removed once all datasets are migrated.
   1502 # https://github.com/tensorflow/datasets/issues/2537
   1503 # Legacy mode (eventually convert list[SplitGeneratorLegacy] -> dict)
   1504 split_generators = split_builder.normalize_legacy_split_generators(
   1505     split_generators=split_generators,
   1506     generator_fn=self._generate_examples,
   1507     is_beam=isinstance(self, BeamBasedBuilder),
   1508 )

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:115, in Builder._split_generators(self, dl_manager)
    112 archive = lambda: dl_manager.iter_archive(arch_path)
    114 # Generate vocabulary from training data if SubwordTextEncoder configured
--> 115 self.info.features["text"].maybe_build_from_corpus(
    116     self._vocab_text_gen(archive())
    117 )
    119 return [
    120     tfds.core.SplitGenerator(
    121         name=tfds.Split.TRAIN,
   (...)
    141     ),
    142 ]

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\features\text_feature.py:169, in Text.maybe_build_from_corpus(self, corpus_generator, **kwargs)
    166   return
    168 vocab_size = self._encoder_config.vocab_size
--> 169 self.encoder = text_lib.SubwordTextEncoder.build_from_corpus(
    170     corpus_generator=corpus_generator,
    171     target_vocab_size=vocab_size,
    172     **kwargs,
    173 )

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\deprecated\text\subword_text_encoder.py:294, in SubwordTextEncoder.build_from_corpus(cls, corpus_generator, target_vocab_size, max_subword_length, max_corpus_chars, reserved_tokens)
    288 reserved_tokens = reserved_tokens or []
    289 _validate_build_arguments(
    290     max_subword_length=max_subword_length,
    291     reserved_tokens=reserved_tokens,
    292     target_vocab_size=target_vocab_size,
    293 )
--> 294 token_counts = _token_counts_from_generator(
    295     generator=corpus_generator,
    296     max_chars=max_corpus_chars,
    297     reserved_tokens=reserved_tokens,
    298 )
    300 # Binary search on the minimum token count to build a vocabulary with
    301 # approximately the right size
    302 def _binary_search(min_token_count, max_token_count):

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\deprecated\text\subword_text_encoder.py:413, in _token_counts_from_generator(generator, max_chars, reserved_tokens)
    411 num_chars = 0
    412 token_counts = collections.defaultdict(int)
--> 413 for s in generator:
    414   s = tf.compat.as_text(s)
    415   if max_chars and (num_chars + len(s)) >= max_chars:

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:105, in Builder._vocab_text_gen(self, archive)
    104 def _vocab_text_gen(self, archive):
--> 105   for _, ex in self._generate_examples(
    106       archive, os.path.join("aclImdb", "train")
    107   ):
    108     yield ex["text"]

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\datasets\imdb_reviews\imdb_reviews_dataset_builder.py:151, in Builder._generate_examples(self, archive, directory, labeled)
    147 reg_path = "(?P<label>neg|pos)" if labeled else "unsup"
    148 reg = re.compile(
    149     os.path.join("^%s" % directory, reg_path, "").replace("\\", "\\\\")
    150 )
--> 151 for path, imdb_f in archive:
    152   res = reg.match(path)
    153   if not res:

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow_datasets\core\download\extractor.py:179, in iter_tar(arch_f, stream)
    176 read_type = 'r' + ('|' if stream else ':') + '*'
    178 with _open_or_pass(arch_f) as fobj:
--> 179   tar = tarfile.open(mode=read_type, fileobj=fobj)
    180   for member in tar:
    181     if stream and (member.islnk() or member.issym()):
    182       # Links cannot be dereferenced in stream mode.

File D:\anaconda\envs\tensorflow_gpu\lib\tarfile.py:1599, in TarFile.open(cls, name, mode, fileobj, bufsize, **kwargs)
   1597 func = getattr(cls, cls.OPEN_METH[comptype])
   1598 if fileobj is not None:
-> 1599     saved_pos = fileobj.tell()
   1600 try:
   1601     return func(name, "r", fileobj, **kwargs)

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\lib\io\file_io.py:186, in FileIO.tell(self)
    184 """Returns the current position in the file."""
    185 if self._read_check_passed:
--> 186   self._preread_check()
    187   return self._read_buf.tell()
    188 else:

File D:\anaconda\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\lib\io\file_io.py:78, in FileIO._preread_check(self)
     75 if not self._read_check_passed:
     76   raise errors.PermissionDeniedError(None, None,
     77                                      "File isn't open for reading")
---> 78 self._read_buf = _pywrap_file_io.BufferedInputStream(
     79     self.__name, 1024 * 512)

TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
    1. tensorflow.python._pywrap_file_io.BufferedInputStream(arg0: str, arg1: int)

Invoked with: WindowsGPath('C:\\Users\\123456\\tensorflow_datasets\\downloads\\ai.stanfor.edu_amaas_sentime_aclImdb_v1xA90oY07YfkP66HhdzDg046Ll8Bf3nAIlC6Rkj0WWP4.tar.gz'), 524288

回答

和开发者交流更多问题细节吧,去 写回答
相关文章

相似问题

相关问答用户
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档