• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python util.create_datastore_write_config函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mapreduce.util.create_datastore_write_config函数的典型用法代码示例。如果您正苦于以下问题:Python create_datastore_write_config函数的具体用法?Python create_datastore_write_config怎么用?Python create_datastore_write_config使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了create_datastore_write_config函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: handle

  def handle(self):
    """Handles kick off request."""
    spec = model.MapreduceSpec.from_json_str(
        self._get_required_param("mapreduce_spec"))

    app_id = self.request.get("app", None)
    queue_name = os.environ.get("HTTP_X_APPENGINE_QUEUENAME", "default")
    mapper_input_reader_class = spec.mapper.input_reader_class()

    # StartJobHandler might have already saved the state, but it's OK
    # to override it because we're using the same mapreduce id.
    state = model.MapreduceState.create_new(spec.mapreduce_id)
    state.mapreduce_spec = spec
    state.active = True
    # TODO(user): Initialize UI fields correctly.
    state.char_url = ""
    state.sparkline_url = ""
    if app_id:
      state.app_id = app_id

    input_readers = mapper_input_reader_class.split_input(spec.mapper)
    if not input_readers:
      # We don't have any data. Finish map.
      logging.warning("Found no mapper input data to process.")
      state.active = False
      state.active_shards = 0
      state.put(config=util.create_datastore_write_config(spec))
      return

    # Update state and spec with actual shard count.
    spec.mapper.shard_count = len(input_readers)
    state.active_shards = len(input_readers)
    state.mapreduce_spec = spec

    output_writer_class = spec.mapper.output_writer_class()
    if output_writer_class:
      output_writer_class.init_job(state)

    output_writers = []
    if output_writer_class:
      for shard_number in range(len(input_readers)):
        writer = output_writer_class.create(state, shard_number)
        assert isinstance(writer, output_writer_class)
        output_writers.append(writer)
    else:
      output_writers = [None for ir in input_readers]

    state.put(config=util.create_datastore_write_config(spec))

    KickOffJobHandler._schedule_shards(
        spec, input_readers, output_writers, queue_name, self.base_path())

    ControllerCallbackHandler.reschedule(
        state, self.base_path(), spec, queue_name=queue_name, serial_id=0)
开发者ID:GeomancerProject,项目名称:Software,代码行数:54,代码来源:handlers.py


示例2: flush

 def flush(self):
   """Flush all information recorded in context."""
   for pool in self._pools.values():
     pool.flush()
   if self.shard_state:
     self.shard_state.put(
         config=util.create_datastore_write_config(self.mapreduce_spec))
开发者ID:KhanWorld,项目名称:KhanAcademy,代码行数:7,代码来源:context.py


示例3: _schedule_shards

    def _schedule_shards(cls, spec, input_readers, queue_name, base_path):
        """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
    """
        # Note: it's safe to re-attempt this handler because:
        # - shard state has deterministic and unique key.
        # - schedule_slice will fall back gracefully if a task already exists.
        shard_states = []
        for shard_number, input_reader in enumerate(input_readers):
            shard = model.ShardState.create_new(spec.mapreduce_id, shard_number)
            shard.shard_description = str(input_reader)
            shard_states.append(shard)

        # Retrievs already existing shards.
        existing_shard_states = db.get(shard.key() for shard in shard_states)
        existing_shard_keys = set(shard.key() for shard in existing_shard_states if shard is not None)

        # Puts only non-existing shards.
        db.put(
            (shard for shard in shard_states if shard.key() not in existing_shard_keys),
            config=util.create_datastore_write_config(spec),
        )

        for shard_number, input_reader in enumerate(input_readers):
            shard_id = model.ShardState.shard_id_from_number(spec.mapreduce_id, shard_number)
            MapperWorkerCallbackHandler.schedule_slice(
                base_path, spec, shard_id, 0, input_reader, queue_name=queue_name
            )
开发者ID:swsnider,项目名称:sse-pos,代码行数:33,代码来源:handlers.py


示例4: _schedule_shards

  def _schedule_shards(cls,
                       spec,
                       input_readers,
                       queue_name,
                       base_path,
                       mr_state):
    """Prepares shard states and schedules their execution.

    Args:
      spec: mapreduce specification as MapreduceSpec.
      input_readers: list of InputReaders describing shard splits.
      queue_name: The queue to run this job on.
      base_path: The base url path of mapreduce callbacks.
      mr_state: The MapReduceState of current job.
    """
    # Note: it's safe to re-attempt this handler because:
    # - shard state has deterministic and unique key.
    # - _schedule_slice will fall back gracefully if a task already exists.
    shard_states = []
    writer_class = spec.mapper.output_writer_class()
    output_writers = [None] * len(input_readers)
    for shard_number, input_reader in enumerate(input_readers):
      shard_state = model.ShardState.create_new(spec.mapreduce_id, shard_number)
      shard_state.shard_description = str(input_reader)
      if writer_class:
        output_writers[shard_number] = writer_class.create(
            mr_state, shard_state)
      shard_states.append(shard_state)

    # Retrievs already existing shards.
    existing_shard_states = db.get(shard.key() for shard in shard_states)
    existing_shard_keys = set(shard.key() for shard in existing_shard_states
                              if shard is not None)

    # Puts only non-existing shards.
    db.put((shard for shard in shard_states
            if shard.key() not in existing_shard_keys),
           config=util.create_datastore_write_config(spec))

    # Give each shard some quota to start with.
    processing_rate = int(spec.mapper.params.get(
        "processing_rate") or model._DEFAULT_PROCESSING_RATE_PER_SEC)
    quota_refill = processing_rate / len(shard_states)
    quota_manager = quota.QuotaManager(memcache.Client())
    for shard_state in shard_states:
      quota_manager.put(shard_state.shard_id, quota_refill)

    # Schedule shard tasks.
    for shard_number, (input_reader, output_writer) in enumerate(
        zip(input_readers, output_writers)):
      shard_id = model.ShardState.shard_id_from_number(
          spec.mapreduce_id, shard_number)
      MapperWorkerCallbackHandler._schedule_slice(
          shard_states[shard_number],
          model.TransientShardState(
              base_path, spec, shard_id, 0, input_reader, input_reader,
              output_writer=output_writer),
          queue_name=queue_name)
开发者ID:anmoln94,项目名称:twitter-sentiment-analysis-gae,代码行数:58,代码来源:handlers.py


示例5: handle

 def handle(self):
   mapreduce_id = self.request.get("mapreduce_id")
   mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id)
   if mapreduce_state:
     config=util.create_datastore_write_config(mapreduce_state.mapreduce_spec)
     db.delete(model.MapreduceControl.get_key_by_job_id(mapreduce_id),
             config=config)
     shard_states = model.ShardState.find_by_mapreduce_state(mapreduce_state)
     for shard_state in shard_states:
       db.delete(util._HugeTaskPayload.all().ancestor(shard_state),
                 config=config)
     db.delete(shard_states, config=config)
     db.delete(util._HugeTaskPayload.all().ancestor(mapreduce_state),
               config=config)
开发者ID:Aneurysm9,项目名称:EVE-Prosper,代码行数:14,代码来源:handlers.py


示例6: _finalize_job

  def _finalize_job(mapreduce_spec, mapreduce_state, base_path):
    """Finalize job execution.

    Finalizes output writer, invokes done callback an schedules
    finalize job execution.

    Args:
      mapreduce_spec: an instance of MapreduceSpec
      mapreduce_state: an instance of MapreduceState
      base_path: handler base path.
    """
    config = util.create_datastore_write_config(mapreduce_spec)

    # Only finalize the output writers if we the job is successful.
    if (mapreduce_spec.mapper.output_writer_class() and
        mapreduce_state.result_status == model.MapreduceState.RESULT_SUCCESS):
      mapreduce_spec.mapper.output_writer_class().finalize_job(mapreduce_state)

    # Enqueue done_callback if needed.
    def put_state(state):
      state.put(config=config)
      done_callback = mapreduce_spec.params.get(
          model.MapreduceSpec.PARAM_DONE_CALLBACK)
      if done_callback:
        done_task = taskqueue.Task(
            url=done_callback,
            headers={"Mapreduce-Id": mapreduce_spec.mapreduce_id},
            method=mapreduce_spec.params.get("done_callback_method", "POST"))
        queue_name = mapreduce_spec.params.get(
            model.MapreduceSpec.PARAM_DONE_CALLBACK_QUEUE,
            "default")

        if not _run_task_hook(mapreduce_spec.get_hooks(),
                              "enqueue_done_task",
                              done_task,
                              queue_name):
          done_task.add(queue_name, transactional=True)
      FinalizeJobHandler.schedule(base_path, mapreduce_spec)

    db.run_in_transaction(put_state, mapreduce_state)
开发者ID:MattFaus,项目名称:appengine-mapreduce,代码行数:40,代码来源:handlers.py


示例7: handle

  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      if tstate.output_writer:
        tstate.output_writer.finalize(ctx, shard_state.shard_number)
      # We recieved a command to abort. We don't care if we override
      # some data.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    context.Context._set(ctx)
    try:
      # consume quota ahead, because we do not want to run a datastore
      # query if there's not enough quota for the shard.
      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        # We shouldn't fetch an entity from the reader if there's not enough
        # quota to process it. Perform all quota checks proactively.
        if not quota_consumer or quota_consumer.consume():
          for entity in input_reader:
            if isinstance(entity, db.Model):
              shard_state.last_work_item = repr(entity.key())
            else:
              shard_state.last_work_item = repr(entity)[:100]

            scan_aborted = not self.process_data(
                entity, input_reader, ctx, tstate)

            # Check if we've got enough quota for the next entity.
            if (quota_consumer and not scan_aborted and
                not quota_consumer.consume()):
              scan_aborted = True
            if scan_aborted:
              break
        else:
          scan_aborted = True


        if not scan_aborted:
          logging.info("Processing done for shard %d of job '%s'",
                       shard_state.shard_number, shard_state.mapreduce_id)
          # We consumed extra quota item at the end of for loop.
          # Just be nice here and give it back :)
          if quota_consumer:
            quota_consumer.put(1)
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_SUCCESS

      operation.counters.Increment(
          context.COUNTER_MAPPER_WALLTIME_MS,
          int((time.time() - self._start_time)*1000))(ctx)

      # TODO(user): Mike said we don't want this happen in case of
      # exception while scanning. Figure out when it's appropriate to skip.
      ctx.flush()

      if not shard_state.active:
        # shard is going to stop. Finalize output writer if any.
        if tstate.output_writer:
          tstate.output_writer.finalize(ctx, shard_state.shard_number)
#.........这里部分代码省略.........
开发者ID:koush,项目名称:appengine-mapreduce,代码行数:101,代码来源:handlers.py


示例8: handle

    def handle(self):
        """Handle request."""
        spec = model.MapreduceSpec.from_json_str(self.request.get("mapreduce_spec"))
        self._start_time = self._time()
        shard_id = self.shard_id()

        # TODO(user): Make this prettier
        logging.debug("post: shard=%s slice=%s headers=%s", shard_id, self.slice_id(), self.request.headers)

        shard_state, control = db.get(
            [
                model.ShardState.get_key_by_shard_id(shard_id),
                model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
            ]
        )
        if not shard_state:
            # We're letting this task to die. It's up to controller code to
            # reinitialize and restart the task.
            logging.error("State not found for shard ID %r; shutting down", shard_id)
            return

        if control and control.command == model.MapreduceControl.ABORT:
            logging.info(
                "Abort command received by shard %d of job '%s'", shard_state.shard_number, shard_state.mapreduce_id
            )
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_ABORTED
            shard_state.put(config=util.create_datastore_write_config(spec))
            model.MapreduceControl.abort(spec.mapreduce_id)
            return

        input_reader = self.input_reader(spec.mapper)

        if spec.mapper.params.get("enable_quota", True):
            quota_consumer = quota.QuotaConsumer(quota.QuotaManager(memcache.Client()), shard_id, _QUOTA_BATCH_SIZE)
        else:
            quota_consumer = None

        ctx = context.Context(spec, shard_state, task_retry_count=self.task_retry_count())
        context.Context._set(ctx)

        try:
            # consume quota ahead, because we do not want to run a datastore
            # query if there's not enough quota for the shard.
            if not quota_consumer or quota_consumer.check():
                scan_aborted = False
                entity = None

                # We shouldn't fetch an entity from the reader if there's not enough
                # quota to process it. Perform all quota checks proactively.
                if not quota_consumer or quota_consumer.consume():
                    for entity in input_reader:
                        if isinstance(entity, db.Model):
                            shard_state.last_work_item = repr(entity.key())
                        else:
                            shard_state.last_work_item = repr(entity)[:100]

                        scan_aborted = not self.process_entity(entity, ctx)

                        # Check if we've got enough quota for the next entity.
                        if quota_consumer and not scan_aborted and not quota_consumer.consume():
                            scan_aborted = True
                        if scan_aborted:
                            break
                else:
                    scan_aborted = True

                if not scan_aborted:
                    logging.info(
                        "Processing done for shard %d of job '%s'", shard_state.shard_number, shard_state.mapreduce_id
                    )
                    # We consumed extra quota item at the end of for loop.
                    # Just be nice here and give it back :)
                    if quota_consumer:
                        quota_consumer.put(1)
                    shard_state.active = False
                    shard_state.result_status = model.ShardState.RESULT_SUCCESS

            # TODO(user): Mike said we don't want this happen in case of
            # exception while scanning. Figure out when it's appropriate to skip.
            ctx.flush()
        finally:
            context.Context._set(None)
            if quota_consumer:
                quota_consumer.dispose()

        # Rescheduling work should always be the last statement. It shouldn't happen
        # if there were any exceptions in code before it.
        if shard_state.active:
            self.reschedule(spec, input_reader)
开发者ID:swsnider,项目名称:sse-pos,代码行数:90,代码来源:handlers.py


示例9: testForceWrites

 def testForceWrites(self):
   self.spec.params["force_writes"] = "True"
   config = util.create_datastore_write_config(self.spec)
   self.assertTrue(config)
   self.assertTrue(config.force_writes)
开发者ID:Hao-Hsuan,项目名称:KhanLatest,代码行数:5,代码来源:util_test.py


示例10: handle

  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    try:
      # consume quota ahead, because we do not want to run a datastore
      # query if there's not enough quota for the shard.
      if not quota_consumer or quota_consumer.check():
        scan_aborted = False
        entity = None

        try:
          # We shouldn't fetch an entity from the reader if there's not enough
          # quota to process it. Perform all quota checks proactively.
          if not quota_consumer or quota_consumer.consume(verbose=True):
            for entity in input_reader:
              if isinstance(entity, db.Model):
                shard_state.last_work_item = repr(entity.key())
              else:
                shard_state.last_work_item = repr(entity)[:100]

              scan_aborted = not self.process_data(
                  entity, input_reader, ctx, tstate)

              # Check if we've got enough quota for the next entity.
              if (quota_consumer and not scan_aborted and
                  not quota_consumer.consume(verbose=True)):
                scan_aborted = True
              if scan_aborted:
                break
          else:
            scan_aborted = True

          if not scan_aborted:
            logging.info("Processing done for shard %d of job '%s'",
                         shard_state.shard_number, shard_state.mapreduce_id)
            # We consumed extra quota item at the end of for loop.
            # Just be nice here and give it back :)
            if quota_consumer:
              quota_consumer.put(1)
            shard_state.active = False
            shard_state.result_status = model.ShardState.RESULT_SUCCESS

          operation.counters.Increment(
              context.COUNTER_MAPPER_WALLTIME_MS,
              int((time.time() - self._start_time)*1000))(ctx)

#.........这里部分代码省略.........
开发者ID:MattFaus,项目名称:appengine-mapreduce,代码行数:101,代码来源:handlers.py


示例11: tx

          shard_state.result_status = model.ShardState.RESULT_FAILED
        except errors.FailJobError, e:
          logging.error("Job failed: %s", e)
          scan_aborted = True
          shard_state.active = False
          shard_state.result_status = model.ShardState.RESULT_FAILED

      if not shard_state.active:
        # shard is going to stop. Don't finalize output writer unless the job is
        # going to be successful, because writer might be stuck in some bad state
        # otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)
      # We don't want shard state to override active state, since that
      # may stuck job execution (see issue 116). Do a transactional
      # verification for status.
      # TODO(user): this might still result in some data inconsistency
      # which can be avoided. It doesn't seem to be worth it now, because
      # various crashes might result in all sort of data consistencies
      # anyway.
      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if not fresh_shard_state:
          raise db.Rollback()
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
开发者ID:MattFaus,项目名称:appengine-mapreduce,代码行数:31,代码来源:handlers.py


示例12: handle

  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:
        # shard is going to stop. Finalize output writer only when shard is
        # successful because writer might be stuck in some bad state otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state.shard_number)

      config = util.create_datastore_write_config(spec)
      # We don't want shard state to override active state, since that
      # may stuck job execution (see issue 116). Do a transactional
      # verification for status.
      # TODO(user): this might still result in some data inconsistency
      # which can be avoided. It doesn't seem to be worth it now, because
      # various crashes might result in all sort of data consistencies
      # anyway.
      @db.transactional(retries=5)
      def tx():
        fresh_shard_state = db.get(
            model.ShardState.get_key_by_shard_id(shard_id))
        if not fresh_shard_state:
          raise db.Rollback()
        if (not fresh_shard_state.active or
            "worker_active_state_collision" in _TEST_INJECTED_FAULTS):
          shard_state.active = False
          logging.error("Spurious task execution. Aborting the shard.")
          return
        fresh_shard_state.copy_from(shard_state)
        fresh_shard_state.put(config=config)
      tx()
    finally:
      context.Context._set(None)
      if quota_consumer:
        quota_consumer.dispose()

    # Rescheduling work should always be the last statement. It shouldn't happen
    # if there were any exceptions in code before it.
    if shard_state.active:
      self.reschedule(shard_state, tstate)
#.........这里部分代码省略.........
开发者ID:Paulius-Maruska,项目名称:marex,代码行数:101,代码来源:handlers.py


示例13: testDefaultConfig

 def testDefaultConfig(self):
   config = util.create_datastore_write_config(self.spec)
   self.assertTrue(config)
   self.assertFalse(config.force_writes)
开发者ID:Hao-Hsuan,项目名称:KhanLatest,代码行数:4,代码来源:util_test.py


示例14: _start_map

  def _start_map(cls, name, mapper_spec,
                 mapreduce_params,
                 base_path=None,
                 queue_name=None,
                 eta=None,
                 countdown=None,
                 hooks_class_name=None,
                 _app=None,
                 transactional=False):
    queue_name = queue_name or os.environ.get("HTTP_X_APPENGINE_QUEUENAME",
                                              "default")
    if queue_name[0] == "_":
      # We are currently in some special queue. E.g. __cron.
      queue_name = "default"

    # Check that handler can be instantiated.
    mapper_spec.get_handler()

    # Check that reader can be instantiated and is configured correctly
    mapper_input_reader_class = mapper_spec.input_reader_class()
    mapper_input_reader_class.validate(mapper_spec)

    mapper_output_writer_class = mapper_spec.output_writer_class()
    if mapper_output_writer_class:
      mapper_output_writer_class.validate(mapper_spec)

    mapreduce_id = model.MapreduceState.new_mapreduce_id()
    mapreduce_spec = model.MapreduceSpec(
        name,
        mapreduce_id,
        mapper_spec.to_json(),
        mapreduce_params,
        hooks_class_name)

    kickoff_params = {"mapreduce_spec": mapreduce_spec.to_json_str()}
    if _app:
      kickoff_params["app"] = _app
    kickoff_worker_task = util.HugeTask(
        url=base_path + "/kickoffjob_callback",
        params=kickoff_params,
        eta=eta,
        countdown=countdown)

    hooks = mapreduce_spec.get_hooks()
    config = util.create_datastore_write_config(mapreduce_spec)

    def start_mapreduce():
      parent = None
      if not transactional:
        # Save state in datastore so that UI can see it.
        # We can't save state in foreign transaction, but conventional UI
        # doesn't ask for transactional starts anyway.
        state = model.MapreduceState.create_new(mapreduce_spec.mapreduce_id)
        state.mapreduce_spec = mapreduce_spec
        state.active = True
        state.active_shards = mapper_spec.shard_count
        if _app:
          state.app_id = _app
        state.put(config=config)
        parent = state

      if hooks is not None:
        try:
          hooks.enqueue_kickoff_task(kickoff_worker_task, queue_name)
        except NotImplementedError:
          # Use the default task addition implementation.
          pass
        else:
          return
      kickoff_worker_task.add(queue_name, transactional=True, parent=parent)

    if transactional:
      start_mapreduce()
    else:
      db.run_in_transaction(start_mapreduce)

    return mapreduce_id
开发者ID:koush,项目名称:appengine-mapreduce,代码行数:77,代码来源:handlers.py


示例15: handle

  def handle(self):
    """Handle request."""
    tstate = model.TransientShardState.from_request(self.request)
    spec = tstate.mapreduce_spec
    self._start_time = self._time()
    shard_id = tstate.shard_id

    shard_state, control = db.get([
        model.ShardState.get_key_by_shard_id(shard_id),
        model.MapreduceControl.get_key_by_job_id(spec.mapreduce_id),
    ])
    if not shard_state:
      # We're letting this task to die. It's up to controller code to
      # reinitialize and restart the task.
      logging.error("State not found for shard ID %r; shutting down",
                    shard_id)
      return

    if not shard_state.active:
      logging.error("Shard is not active. Looks like spurious task execution.")
      return
    if shard_state.retries > tstate.retries:
      logging.error(
          "Got shard %s from previous shard retry %s. Drop",
          shard_state.shard_id,
          tstate.retries)
      return
    elif shard_state.retries < tstate.retries:
      # This happens when the transaction that updates shardstate and enqueues
      # task fails after the task has been added. That transaction will
      # be retried. Adding the same task will result in
      # TaskAlreadyExistsError but the error is ignored.
      raise ValueError(
          "ShardState for %s is behind slice. Waiting for it to catch up",
          shard_state.shard_id)

    ctx = context.Context(spec, shard_state,
                          task_retry_count=self.task_retry_count())

    if control and control.command == model.MapreduceControl.ABORT:
      logging.info("Abort command received by shard %d of job '%s'",
                   shard_state.shard_number, shard_state.mapreduce_id)
      # NOTE: When aborting, specifically do not finalize the output writer
      # because it might be in a bad state.
      shard_state.active = False
      shard_state.result_status = model.ShardState.RESULT_ABORTED
      shard_state.put(config=util.create_datastore_write_config(spec))
      model.MapreduceControl.abort(spec.mapreduce_id)
      return

    input_reader = tstate.input_reader

    if spec.mapper.params.get("enable_quota", True):
      quota_consumer = quota.QuotaConsumer(
          quota.QuotaManager(memcache.Client()),
          shard_id,
          _QUOTA_BATCH_SIZE)
    else:
      quota_consumer = None

    # Tell NDB to never cache anything in memcache or in-process. This ensures
    # that entities fetched from Datastore input_readers via NDB will not bloat
    # up the request memory size and Datastore Puts will avoid doing calls
    # to memcache. Without this you get soft memory limit exits, which hurts
    # overall throughput.
    if ndb is not None:
      ndb_ctx = ndb.get_context()
      ndb_ctx.set_cache_policy(lambda key: False)
      ndb_ctx.set_memcache_policy(lambda key: False)

    context.Context._set(ctx)
    retry_shard = False

    try:
      self.process_inputs(
          input_reader, shard_state, tstate, quota_consumer, ctx)

      if not shard_state.active:
        # shard is going to stop. Finalize output writer only when shard is
        # successful because writer might be stuck in some bad state otherwise.
        if (shard_state.result_status == model.ShardState.RESULT_SUCCESS and
            tstate.output_writer):
          tstate.output_writer.finalize(ctx, shard_state)
    # pylint: disable=broad-except
    except Exception, e:
      retry_shard = self._retry_logic(e, shard_state, tstate, spec.mapreduce_id)
开发者ID:anmoln94,项目名称:twitter-sentiment-analysis-gae,代码行数:86,代码来源:handlers.py



注:本文中的mapreduce.util.create_datastore_write_config函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python util.for_name函数代码示例发布时间:2022-05-27
下一篇:
Python test_support.execute_until_empty函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap