• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python generator_utils.maybe_download函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensor2tensor.data_generators.generator_utils.maybe_download函数的典型用法代码示例。如果您正苦于以下问题:Python maybe_download函数的具体用法?Python maybe_download怎么用?Python maybe_download使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了maybe_download函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _get_mnist

def _get_mnist(directory):
  """Download all MNIST files to directory unless they are there."""
  for filename in [
      _MNIST_TRAIN_DATA_FILENAME, _MNIST_TRAIN_LABELS_FILENAME,
      _MNIST_TEST_DATA_FILENAME, _MNIST_TEST_LABELS_FILENAME
  ]:
    generator_utils.maybe_download(directory, filename, _MNIST_URL + filename)
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:7,代码来源:mnist.py


示例2: maybe_prepare_text

  def maybe_prepare_text(self, tmp_dir):
    """Download corpus if necessary, decompress, split into multiple text files.

    Args:
      tmp_dir: directory containing dataset.

    Returns:
      list of filepaths for local text files.
    """
    compressed_filename = os.path.basename(self.corpus_url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)
    decompressed_filepath = compressed_filepath[:-4]
    split_file_prefix = decompressed_filepath + "-part-"
    split_filepattern = split_file_prefix + "?????"
    split_files = sorted(tf.gfile.Glob(split_filepattern))
    if not split_files:
      if not tf.gfile.Exists(decompressed_filepath):
        if not tf.gfile.Exists(compressed_filepath):
          generator_utils.maybe_download(
              tmp_dir, compressed_filepath, self.corpus_url)
        assert not subprocess.call(["bunzip2", compressed_filepath])
      assert tf.gfile.Exists(decompressed_filepath)
      assert not subprocess.call([
          "split", "--line-bytes=4M", "--suffix-length=5",
          "--numeric-suffixes", decompressed_filepath, split_file_prefix])
      split_files = sorted(tf.gfile.Glob(split_filepattern))
    assert split_files
    return split_files
开发者ID:kltony,项目名称:tensor2tensor,代码行数:28,代码来源:wiki.py


示例3: _get_fashion_mnist

def _get_fashion_mnist(directory):
  """Download all FashionMNIST files to directory unless they are there."""
  # Fashion mnist files have the same names as MNIST.
  # We must choose a separate name (by adding 'fashion-' prefix) in the tmp_dir.
  for filename in [
      _MNIST_TRAIN_DATA_FILENAME, _MNIST_TRAIN_LABELS_FILENAME,
      _MNIST_TEST_DATA_FILENAME, _MNIST_TEST_LABELS_FILENAME
  ]:
    generator_utils.maybe_download(directory,
                                   _FASHION_MNIST_LOCAL_FILE_PREFIX + filename,
                                   _FASHION_MNIST_URL + filename)
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:11,代码来源:mnist.py


示例4: _compile_data

def _compile_data(tmp_dir, datasets, filename):
  """Concatenate all `datasets` and save to `filename`."""
  filename = os.path.join(tmp_dir, filename)
  lang1_lines, lang2_lines = [], []
  for dataset in datasets:
    url = dataset[0]
    compressed_filename = os.path.basename(url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)

    lang1_filename, lang2_filename = dataset[1]
    lang1_filepath = os.path.join(tmp_dir, lang1_filename)
    lang2_filepath = os.path.join(tmp_dir, lang2_filename)

    if not os.path.exists(compressed_filepath):
      generator_utils.maybe_download(tmp_dir, compressed_filename, url)
    if not os.path.exists(lang1_filepath) or not os.path.exists(lang2_filepath):
      mode = "r:gz" if "gz" in compressed_filepath else "r"
      with tarfile.open(compressed_filepath, mode) as corpus_tar:
        corpus_tar.extractall(tmp_dir)
    if ".gz" in lang1_filepath:
      new_filepath = lang1_filepath.strip(".gz")
      generator_utils.gunzip_file(lang1_filepath, new_filepath)
      lang1_filepath = new_filepath
    if ".gz" in lang2_filepath:
      new_filepath = lang2_filepath.strip(".gz")
      generator_utils.gunzip_file(lang2_filepath, new_filepath)
      lang2_filepath = new_filepath
    with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file:
      with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file:
        lang1_file_lines = lang1_file.readlines()
        lang2_file_lines = lang2_file.readlines()
        assert len(lang1_file_lines) == len(lang2_file_lines), lang1_filepath
        lang1_lines.extend(lang1_file_lines)
        lang2_lines.extend(lang2_file_lines)

  write_chunk_size = 10000
  assert len(lang1_lines) == len(lang2_lines)
  with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_file:
    i = 0
    while i <= len(lang1_lines):
      for line in lang1_lines[i * write_chunk_size:(i + 1) * write_chunk_size]:
        lang1_file.write(line)
      i += 1
    for line in lang1_lines[i * write_chunk_size:]:
      lang1_file.write(line)
  with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_file:
    i = 0
    while i <= len(lang2_lines):
      for line in lang2_lines[i * write_chunk_size:(i + 1) * write_chunk_size]:
        lang2_file.write(line)
      i += 1
    for line in lang2_lines[i * write_chunk_size:]:
      lang2_file.write(line)
  return filename
开发者ID:TrunksLegendary,项目名称:tensor2tensor,代码行数:54,代码来源:wmt.py


示例5: _maybe_download_corpus

def _maybe_download_corpus(tmp_dir):
  """Download and unpack the corpus.

  Args:
    tmp_dir: directory containing dataset.
  """
  corpus_url = ("http://www.statmt.org/lm-benchmark/"
                "1-billion-word-language-modeling-benchmark-r13output.tar.gz")
  corpus_filename = os.path.basename(corpus_url)
  corpus_filepath = os.path.join(tmp_dir, corpus_filename)
  if not os.path.exists(corpus_filepath):
    generator_utils.maybe_download(tmp_dir, corpus_filename, corpus_url)
    with tarfile.open(corpus_filepath, "r:gz") as corpus_tar:
      corpus_tar.extractall(tmp_dir)
开发者ID:kltony,项目名称:tensor2tensor,代码行数:14,代码来源:lm1b.py


示例6: generate_samples

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    """A generator to return data samples.Returns the data generator to return.


    Args:
      data_dir: A string representing the data directory.
      tmp_dir: A string representing the temporary directory and is
              used to download files if not already available.
      dataset_split: Train, Test or Eval.

    Yields:
      Each element yielded is of a Python dict of the form
        {"inputs": "STRING", "targets": "STRING"}
    """

    # TODO(sanyamkapoor): Manually separate train/eval data set.
    csv_file_names = self.pair_files_list
    csv_files = [
        generator_utils.maybe_download(tmp_dir, file_list[0], uri)
        for uri, file_list in csv_file_names
    ]

    for pairs_file in csv_files:
      tf.logging.debug("Reading {}".format(pairs_file))
      with open(pairs_file, "r") as csv_file:
        for line in csv_file:
          reader = csv.reader(StringIO(line))
          for docstring_tokens, function_tokens in reader:
            yield {"inputs": docstring_tokens, "targets": function_tokens}
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:29,代码来源:function_docstring.py


示例7: _maybe_download_corpus

def _maybe_download_corpus(tmp_dir, vocab_type):
  """Download and unpack the corpus.

  Args:
    tmp_dir: directory containing dataset.
    vocab_type: which vocabulary are we using.

  Returns:
    The list of names of files.
  """
  filename = os.path.basename(PTB_URL)
  compressed_filepath = generator_utils.maybe_download(
      tmp_dir, filename, PTB_URL)
  ptb_files = []
  ptb_char_files = []

  with tarfile.open(compressed_filepath, "r:gz") as tgz:
    files = []
    # Selecting only relevant files.
    for m in tgz.getmembers():
      if "ptb" in m.name and ".txt" in m.name:
        if "char" in m.name:
          ptb_char_files += [m.name]
        else:
          ptb_files += [m.name]
        files += [m]

    tgz.extractall(tmp_dir, members=files)

  if vocab_type == text_problems.VocabType.CHARACTER:
    return ptb_char_files
  else:
    return ptb_files
开发者ID:kltony,项目名称:tensor2tensor,代码行数:33,代码来源:ptb.py


示例8: _maybe_download_corpus

def _maybe_download_corpus(tmp_dir):
  """Download corpus if necessary.

  Args:
    tmp_dir: directory containing dataset.

  Returns:
    filepath of the downloaded corpus file.
  """
  corpus_url = ("https://dumps.wikimedia.org/enwiki/20170620/"
                "enwiki-20170620-pages-articles-multistream.xml.bz2")
  corpus_filename = os.path.basename(corpus_url)
  corpus_filepath = os.path.join(tmp_dir, corpus_filename)
  if not tf.gfile.Exists(corpus_filepath):
    generator_utils.maybe_download(tmp_dir, corpus_filename, corpus_url)
  return corpus_filepath
开发者ID:AranKomat,项目名称:tensor2tensor,代码行数:16,代码来源:wiki.py


示例9: generator

  def generator(self, data_dir, tmp_dir, datasets,
                eos_list=None, start_from=0, how_many=0):
    i = 0
    for url, subdir in datasets:
      filename = os.path.basename(url)
      compressed_file = generator_utils.maybe_download(tmp_dir, filename, url)

      read_type = "r:gz" if filename.endswith("tgz") else "r"
      with tarfile.open(compressed_file, read_type) as corpus_tar:
        # Create a subset of files that don't already exist.
        #   tarfile.extractall errors when encountering an existing file
        #   and tarfile.extract is extremely slow
        members = []
        for f in corpus_tar:
          if not os.path.isfile(os.path.join(tmp_dir, f.name)):
            members.append(f)
        corpus_tar.extractall(tmp_dir, members=members)

      data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir)
      data_files = _collect_data(data_dir, "flac", "txt")
      data_pairs = data_files.values()

      encoders = self.feature_encoders(None)
      audio_encoder = encoders["waveforms"]
      text_encoder = encoders["targets"]

      for media_file, text_data in sorted(data_pairs)[start_from:]:
        if how_many > 0 and i == how_many:
          return
        i += 1
        yield {
            "waveforms": audio_encoder.encode(media_file),
            "targets": text_encoder.encode(text_data)
        }
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:34,代码来源:librispeech.py


示例10: load_examples

def load_examples(tmp_dir, prop_train=0.09, prop_val=0.01):
  """Loads exampls from the tsv file.

  Args:
    tmp_dir: temp directory.
    prop_train: proportion of the train data
    prop_val: proportion of the validation data

  Returns:
    All examples in the dataset pluse train, test, and development splits.

  """

  infile = generator_utils.maybe_download(tmp_dir, _TAR, _URL)
  tf.logging.info('Loading examples')

  all_examples = []
  for i, d in enumerate(csv.DictReader(gzip.open(infile), delimiter='\t')):
    if i % 100000 == 0:
      tf.logging.info('%d examples have been loaded....' % i)
    ex = {x: int(y) if y.isdigit() else y for x, y in d.items()}
    all_examples.append(ex)

  random.seed(1)
  random.shuffle(all_examples)
  n_train = int(len(all_examples) * prop_train)
  n_val = n_train + int(len(all_examples) * prop_val)
  train = all_examples[:n_train]
  val = all_examples[n_train:n_val]
  test = []
  for e in all_examples[n_val:]:
    if e['n_intervening'] == e['n_diff_intervening']:
      test.append(e)

  return all_examples, train, val, test
开发者ID:kltony,项目名称:tensor2tensor,代码行数:35,代码来源:subject_verb_agreement.py


示例11: _prepare_lambada_data

def _prepare_lambada_data(tmp_dir, data_dir, vocab_size, vocab_filename):
  """Downloading and preparing the dataset.

  Args:
    tmp_dir: tem directory
    data_dir: data directory
    vocab_size: size of vocabulary
    vocab_filename: name of vocab file

  """

  if not tf.gfile.Exists(data_dir):
    tf.gfile.MakeDirs(data_dir)

  file_path = generator_utils.maybe_download(tmp_dir, _TAR, _URL)
  tar_all = tarfile.open(file_path)
  tar_all.extractall(tmp_dir)
  tar_all.close()
  tar_train = tarfile.open(os.path.join(tmp_dir, "train-novels.tar"))
  tar_train.extractall(tmp_dir)
  tar_train.close()

  vocab_path = os.path.join(data_dir, vocab_filename)
  if not tf.gfile.Exists(vocab_path):
    with tf.gfile.GFile(os.path.join(tmp_dir, _VOCAB), "r") as infile:
      reader = csv.reader(infile, delimiter="\t")
      words = [row[0] for row in reader]
      words = [_UNK] + words[:vocab_size]
    with tf.gfile.GFile(vocab_path, "w") as outfile:
      outfile.write("\n".join(words))
开发者ID:kltony,项目名称:tensor2tensor,代码行数:30,代码来源:lambada.py


示例12: _get_mscoco

def _get_mscoco(directory):
  """Download and extract MSCOCO datasets to directory unless it is there."""
  for url in _MSCOCO_URLS:
    filename = os.path.basename(url)
    download_url = os.path.join(_MSCOCO_ROOT_URL, url)
    path = generator_utils.maybe_download(directory, filename, download_url)
    unzip_dir = os.path.join(directory, filename.strip(".zip"))
    if not tf.gfile.Exists(unzip_dir):
      zipfile.ZipFile(path, "r").extractall(directory)
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:9,代码来源:mscoco.py


示例13: _get_vqa_v2_image_raw_dataset

def _get_vqa_v2_image_raw_dataset(directory, image_root_url, image_urls):
  """Extract the VQA V2 image data set to directory unless it's there."""
  for url in image_urls:
    filename = os.path.basename(url)
    download_url = os.path.join(image_root_url, url)
    path = generator_utils.maybe_download(directory, filename, download_url)
    unzip_dir = os.path.join(directory, filename.strip(".zip"))
    if not tf.gfile.Exists(unzip_dir):
      zipfile.ZipFile(path, "r").extractall(directory)
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:9,代码来源:vqa.py


示例14: _download_and_parse_dataset

def _download_and_parse_dataset(tmp_dir, train):
  """Downloads and prepairs the dataset to be parsed by the data_generator."""
  file_path = generator_utils.maybe_download(tmp_dir, _SNLI_ZIP, _SNLI_URL)
  zip_ref = zipfile.ZipFile(file_path, 'r')
  zip_ref.extractall(tmp_dir)
  zip_ref.close()

  file_name = 'train' if train else 'dev'
  dataset_file_path = os.path.join(tmp_dir, _SNLI_DATA_PATH % file_name)
  _parse_dataset(dataset_file_path, tmp_dir, train)
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:10,代码来源:snli.py


示例15: _maybe_download_corpora

  def _maybe_download_corpora(self, tmp_dir):
    sst_binary_filename = "SST-2.zip"
    sst_binary_finalpath = os.path.join(tmp_dir, "SST-2")
    if not tf.gfile.Exists(sst_binary_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, sst_binary_filename, self._SST2_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return sst_binary_finalpath
开发者ID:kltony,项目名称:tensor2tensor,代码行数:11,代码来源:sst_binary.py


示例16: _maybe_download_corpora

  def _maybe_download_corpora(self, tmp_dir):
    qnli_filename = "QNLI.zip"
    qnli_finalpath = os.path.join(tmp_dir, "QNLI")
    if not tf.gfile.Exists(qnli_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, qnli_filename, self._QNLI_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return qnli_finalpath
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:11,代码来源:qnli.py


示例17: _maybe_download_corpora

  def _maybe_download_corpora(self, tmp_dir):
    cola_filename = "CoLA.zip"
    cola_finalpath = os.path.join(tmp_dir, "CoLA")
    if not tf.gfile.Exists(cola_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, cola_filename, self._COLA_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return cola_finalpath
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:11,代码来源:cola.py


示例18: _maybe_download_corpora

  def _maybe_download_corpora(self, tmp_dir):
    scitail_filename = "SciTailV1.1.zip"
    scitail_finalpath = os.path.join(tmp_dir, "SciTailV1.1")
    if not tf.gfile.Exists(scitail_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, scitail_filename, self._SCITAIL_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return scitail_finalpath
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:11,代码来源:scitail.py


示例19: generate_samples

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    dataset = self.dataset_url(dataset_split)

    url = dataset[0][0]
    compressed_filename = os.path.basename(url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)
    generator_utils.maybe_download(tmp_dir, compressed_filename, url)

    mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
    with tarfile.open(compressed_filepath, mode) as corpus_tar:
      corpus_tar.extractall(tmp_dir)

    if self.vocab_type == text_problems.VocabType.SUBWORD:
      generator_utils.get_or_generate_vocab(
          data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
          self.vocab_data_files())

    source_file, target_file = self.source_target_paths(dataset_split, tmp_dir)
    return text_problems.text2text_txt_iterator(source_file,
                                                target_file)
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:20,代码来源:style_transfer.py


示例20: _maybe_download_corpora

def _maybe_download_corpora(tmp_dir, dataset_split):
  """Download corpora if necessary and unzip them.

  Args:
    tmp_dir: directory containing dataset.
    dataset_split: whether we're in train/dev/test mode.

  Returns:
    List of all files generated and path to file containing
      train/dev/test split info.
  """
  cnn_filename = "cnn_stories.tgz"
  cnn_finalpath = os.path.join(tmp_dir, "cnn/stories/")
  dailymail_filename = "dailymail_stories.tgz"
  dailymail_finalpath = os.path.join(tmp_dir, "dailymail/stories/")
  if not tf.gfile.Exists(cnn_finalpath):
    cnn_file = generator_utils.maybe_download_from_drive(
        tmp_dir, cnn_filename, _CNN_STORIES_DRIVE_URL)
    with tarfile.open(cnn_file, "r:gz") as cnn_tar:
      cnn_tar.extractall(tmp_dir)
  if not tf.gfile.Exists(dailymail_finalpath):
    dailymail_file = generator_utils.maybe_download_from_drive(
        tmp_dir, dailymail_filename, _DAILYMAIL_STORIES_DRIVE_URL)
    with tarfile.open(dailymail_file, "r:gz") as dailymail_tar:
      dailymail_tar.extractall(tmp_dir)

  cnn_files = tf.gfile.Glob(cnn_finalpath + "*")
  dailymail_files = tf.gfile.Glob(dailymail_finalpath + "*")
  all_files = cnn_files + dailymail_files

  if dataset_split == problem.DatasetSplit.TRAIN:
    urls_path = generator_utils.maybe_download(tmp_dir, "all_train.txt",
                                               _TRAIN_URLS)
  elif dataset_split == problem.DatasetSplit.EVAL:
    urls_path = generator_utils.maybe_download(tmp_dir, "all_val.txt",
                                               _DEV_URLS)
  else:
    urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt",
                                               _TEST_URLS)

  return all_files, urls_path
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:41,代码来源:cnn_dailymail.py



注:本文中的tensor2tensor.data_generators.generator_utils.maybe_download函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python common_hparams.basic_params1函数代码示例发布时间:2022-05-27
下一篇:
Python tensor.tensor函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap