本文整理汇总了Python中pydoop.mapreduce.pipes.run_task函数的典型用法代码示例。如果您正苦于以下问题:Python run_task函数的具体用法?Python run_task怎么用?Python run_task使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了run_task函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_map_combiner_reduce
def test_map_combiner_reduce(self):
factory = TFactory(combiner=TReducer)
sas = SortAndShuffle()
run_task(factory, istream=self.stream, ostream=sas)
with self._mkf('foo_map_combiner_reduce.out') as o:
run_task(factory, istream=sas, ostream=o,
private_encoding=False)
self.check_result('foo_map_combiner_reduce.out', STREAM_1)
开发者ID:wtj,项目名称:pydoop,代码行数:8,代码来源:test_framework.py
示例2: test_timer
def test_timer(self):
factory = TFactory(mapper=SleepingMapper)
exp_count = {
'registerCounter': 1,
'incrementCounter': Counter(
[_[0] for _ in STREAM_1]
)[TextWriter.MAP_ITEM]
}
with self._mkf('foo_map_only.out') as o:
run_task(factory, istream=self.stream1, ostream=o)
self.check_counts(o.name, exp_count)
开发者ID:crs4,项目名称:pydoop,代码行数:11,代码来源:test_framework.py
示例3: test_map_only
def test_map_only(self):
factory = TFactory()
fname = self._mkfn('foo_map_only.out')
with open(fname, 'w') as o:
run_task(factory, istream=self.stream1, ostream=o)
exp_count = {
'done': 1,
'progress': 1,
'output': sum(len(_[2].split())
for _ in STREAM_1 if _[0] is TextWriter.MAP_ITEM)
}
self.check_counts(fname, exp_count)
开发者ID:crs4,项目名称:pydoop,代码行数:12,代码来源:test_framework.py
示例4: test_timer
def test_timer(self):
factory = TFactory(mapper=SleepingMapper)
with self._mkf('foo_map_only.out') as o:
run_task(factory, istream=self.stream1, ostream=o)
count = Counter()
with open(o.name) as f:
for line in f:
count[line.strip().split('\t', 1)[0]] += 1
exp_count = {
'registerCounter': 2,
'incrementCounter': 2 * Counter([_[0] for _ in STREAM_1])['mapItem']
}
for k, v in exp_count.iteritems():
self.assertTrue(k in count)
self.assertEqual(count[k], v)
开发者ID:kikkomep,项目名称:pydoop,代码行数:15,代码来源:test_framework.py
示例5: __run_test
def __run_test(self, mode, mapper_class, context_class):
cmd_file = self.__write_cmd_file(mode)
pp.run_task(
pp.Factory(mapper_class=mapper_class), private_encoding=False,
context_class=context_class, cmd_file=cmd_file)
out_fn = cmd_file + '.out'
out_records = []
with open(out_fn, 'rb') as f:
bf = BinaryDownStreamAdapter(f)
for cmd, args in bf:
if cmd == bf.OUTPUT:
name, color = args
out_records.append({'name': name, 'favorite_color': color})
self.assertEqual(len(out_records), len(self.records))
for out_r, r in zip(out_records, self.records):
for k, v in iteritems(out_r):
self.assertEqual(v.decode('UTF-8'), r[k])
开发者ID:crs4,项目名称:pydoop,代码行数:17,代码来源:test_context.py
示例6: __run_test
def __run_test(self, mode, mapper_class, context_class):
cmd_file = self.__write_cmd_file(mode)
pp.run_task(
pp.Factory(mapper_class=mapper_class), private_encoding=False,
context_class=context_class, cmd_file=cmd_file
)
out_fn = cmd_file + '.out'
out_records = []
with open(out_fn) as ostream:
for cmd, args in BinaryDownStreamFilter(ostream):
if cmd == 'output':
name, color = args
out_records.append({'name': name, 'favorite_color': color})
self.assertEqual(len(out_records), len(self.records))
for out_r, r in zip(out_records, self.records):
for k, v in out_r.iteritems():
self.assertEqual(v, r[k])
开发者ID:kikkomep,项目名称:pydoop,代码行数:17,代码来源:test_context.py
示例7: _test_map_reduce_with_private_encoding_helper
def _test_map_reduce_with_private_encoding_helper(self, factory,
fast_combiner=False):
self.stream3.close()
cmd_file = self.stream3.name
out_file = cmd_file + '.out'
reduce_infile = cmd_file + '.reduce'
reduce_outfile = reduce_infile + '.out'
run_task(factory, cmd_file=cmd_file, private_encoding=True,
fast_combiner=fast_combiner)
data = {}
bw = BinaryWriter
with open(out_file, 'rb') as f:
bf = BinaryDownStreamAdapter(f)
for cmd, args in bf:
if cmd == bw.OUTPUT:
data.setdefault(args[0], []).append(args[1])
stream = []
stream.append((bw.START_MESSAGE, 0))
stream.append((bw.SET_JOB_CONF, 'key1', 'value1', 'key2', 'value2'))
stream.append((bw.RUN_REDUCE, 0, 0))
for k in data:
stream.append((bw.REDUCE_KEY, k))
for v in data[k]:
stream.append((bw.REDUCE_VALUE, v))
stream.append((bw.CLOSE,))
binary_stream_writer(reduce_infile, stream)
run_task(factory, cmd_file=reduce_infile, private_encoding=True)
with open(reduce_outfile, 'rb') as f:
with self._mkf('foo.out', mode='w') as o:
bf = BinaryUpStreamDecoder(f)
for cmd, args in bf:
if cmd == bw.PROGRESS:
o.write('progress\t%s\n' % args[0])
elif cmd == bw.OUTPUT:
o.write('output\t%s\n' %
'\t'.join([x.decode('utf-8') for x in args]))
elif cmd == bw.DONE:
o.write('done\n')
self.check_result('foo.out', STREAM_2)
开发者ID:crs4,项目名称:pydoop,代码行数:39,代码来源:test_framework.py
示例8: _test_map_reduce_with_private_encoding_helper
def _test_map_reduce_with_private_encoding_helper(self, factory,
fast_combiner=False):
self.stream3.close()
cmd_file = self.stream3.name
out_file = cmd_file + '.out'
reduce_infile = cmd_file + '.reduce'
reduce_outfile = reduce_infile + '.out'
run_task(factory, cmd_file=cmd_file, private_encoding=True,
fast_combiner=fast_combiner)
data = {}
with open(out_file) as f:
bf = BinaryDownStreamFilter(f)
for cmd, args in bf:
if cmd == 'output':
data.setdefault(args[0], []).append(args[1])
stream = []
stream.append(('start', 0))
stream.append(('setJobConf', ('key1', 'value1', 'key2', 'value2')))
stream.append(('runReduce', 0, False))
for k in data:
stream.append(('reduceKey', k))
for v in data[k]:
stream.append(('reduceValue', v))
stream.append(('close',))
binary_stream_writer(reduce_infile, stream)
run_task(factory, cmd_file=reduce_infile, private_encoding=True)
with open(reduce_outfile) as f, self._mkf('foo.out', mode='w') as o:
bf = BinaryUpStreamDecoder(f)
for cmd, args in bf:
if cmd == 'progress':
o.write('progress\t%s\n' % args[0])
elif cmd == 'output':
o.write('output\t%s\n' % '\t'.join(args))
elif cmd == 'done':
o.write('done\n')
self.check_result('foo.out', STREAM_3)
开发者ID:kikkomep,项目名称:pydoop,代码行数:36,代码来源:test_framework.py
示例9: __main__
def __main__():
factory = pp.Factory(mapper_class=Mapper)
pp.run_task(factory, context_class=AvroContext)
开发者ID:kikkomep,项目名称:pydoop,代码行数:3,代码来源:gen_data.py
示例10: FilterMapper
from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer
class FilterMapper(Mapper):
"""
Process a wordcount output stream, emitting only records relative to
words whose count is equal to or above the configured threshold.
"""
def __init__(self, context):
super(FilterMapper, self).__init__(context)
jc = context.job_conf
self.threshold = jc.get_int("filter.occurrence.threshold")
def map(self, context):
word, occurrence = context.key, context.value
occurrence = struct.unpack(">i", occurrence)[0]
if occurrence >= self.threshold:
context.emit(word, str(occurrence))
class FilterReducer(Reducer):
def reduce(self, context):
pass
if __name__ == "__main__":
run_task(Factory(FilterMapper, FilterReducer))
开发者ID:kikkomep,项目名称:pydoop,代码行数:29,代码来源:filter.py
示例11: __main__
def __main__():
factory = pp.Factory(mapper_class=Mapper, reducer_class=Reducer)
pp.run_task(factory, private_encoding=True, context_class=AvroContext)
开发者ID:pymzavro,项目名称:pymzavro,代码行数:3,代码来源:TICpydoop.py
示例12: __main__
def __main__():
pipes.run_task(pipes.Factory(mapper_class=Mapper))
开发者ID:crs4,项目名称:pydoop,代码行数:2,代码来源:map_only_java_writer.py
示例13: __main__
def __main__():
pipes.run_task(pipes.Factory(
mapper_class=Mapper,
record_writer_class=Writer,
))
开发者ID:crs4,项目名称:pydoop,代码行数:5,代码来源:map_only_python_writer.py
示例14: main
def main():
return run_task(Factory(Mapper, Reducer, combiner_class=Reducer))
开发者ID:CynthiaYiqingHuang,项目名称:pydoop,代码行数:2,代码来源:main.py
示例15: __main__
def __main__():
"""Main function to be executed by pydoop framework"""
factory = pp.Factory(mapper_class=Mapper, reducer_class=Reducer, record_reader_class=Reader)
pp.run_task(factory, private_encoding=True)
开发者ID:H4ml3t,项目名称:WMArchive,代码行数:4,代码来源:Skeleton.py
示例16: __main__
def __main__():
pp.run_task(factory)
开发者ID:kikkomep,项目名称:pydoop,代码行数:2,代码来源:wordcount_rr.py
示例17: test_map_only
def test_map_only(self):
factory = TFactory()
with self._mkf('foo_map_only.out') as o:
run_task(factory, istream=self.stream1, ostream=o)
开发者ID:kikkomep,项目名称:pydoop,代码行数:4,代码来源:test_framework.py
示例18: WordCountMapper
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# END_COPYRIGHT
import struct
import re
from pydoop.mapreduce.pipes import run_task, Factory
from pydoop.mapreduce.api import Mapper, Reducer
class WordCountMapper(Mapper):
def map(self, context):
words = re.sub('[^0-9a-zA-Z]+', ' ', context.value).split()
for w in words:
context.emit(w, 1)
class WordCountReducer(Reducer):
def reduce(self, context):
s = sum(context.values)
context.emit(context.key, struct.pack(">i", s))
if __name__ == "__main__":
run_task(Factory(WordCountMapper, WordCountReducer))
开发者ID:kikkomep,项目名称:pydoop,代码行数:30,代码来源:wordcount.py
示例19: run_task
def run_task(mapper_class, reducer_class=NoAvroColorCount):
pp.run_task(
pp.Factory(mapper_class=mapper_class, reducer_class=reducer_class),
private_encoding=True, context_class=AvroContext
)
开发者ID:kikkomep,项目名称:pydoop,代码行数:5,代码来源:avro_base.py
示例20: __main__
def __main__():
factory = pp.Factory(Mapper, Reducer)
pp.run_task(factory)
开发者ID:crs4,项目名称:seal,代码行数:3,代码来源:qseq2pair_plain.py
注:本文中的pydoop.mapreduce.pipes.run_task函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论