本文整理汇总了Python中toolz.take函数的典型用法代码示例。如果您正苦于以下问题:Python take函数的具体用法?Python take怎么用?Python take使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了take函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: sample_ssh
def sample_ssh(data, lines=500):
""" Grab a few lines from the remote file """
with tmpfile() as fn:
with open(fn, 'w') as f:
for line in take(lines, data.lines()):
f.write(line)
yield fn
开发者ID:MoherX,项目名称:odo,代码行数:7,代码来源:ssh.py
示例2: create_merge_tree
def create_merge_tree(func, keys, token):
"""Create a task tree that merges all the keys with a reduction function.
Parameters
----------
func: callable
Reduction function that accepts a single list of values to reduce.
keys: iterable
Keys to reduce from the source dask graph.
token: object
Included in each key of the returned dict.
This creates a k-ary tree where k depends on the current level and is
greater the further away a node is from the root node. This reduces the
total number of nodes (thereby reducing scheduler overhead), but still
has beneficial properties of trees.
For reasonable numbers of keys, N < 1e5, the total number of nodes in the
tree is roughly ``N**0.78``. For 1e5 < N < 2e5, is it roughly ``N**0.8``.
"""
level = 0
prev_width = len(keys)
prev_keys = iter(keys)
rv = {}
while prev_width > 1:
width = tree_width(prev_width)
groups = tree_groups(prev_width, width)
keys = [(token, level, i) for i in range(width)]
rv.update((key, (func, list(take(num, prev_keys))))
for num, key in zip(groups, keys))
prev_width = width
prev_keys = iter(keys)
level += 1
return rv
开发者ID:ankravch,项目名称:dask,代码行数:34,代码来源:partitionquantiles.py
示例3: test_map
def test_map(client):
with client.get_executor() as e:
N = 10
it = e.map(inc, range(N))
expected = set(range(1, N + 1))
for x in it:
expected.remove(x)
assert not expected
with client.get_executor(pure=False) as e:
N = 10
it = e.map(slowinc, range(N), [0.1] * N, timeout=0.4)
results = []
with pytest.raises(TimeoutError):
for x in it:
results.append(x)
assert 2 <= len(results) < 7
with client.get_executor(pure=False) as e:
N = 10
# Not consuming the iterator will cancel remaining tasks
it = e.map(slowinc, range(N), [0.1] * N)
for x in take(2, it):
pass
# Some tasks still processing
assert number_of_processing_tasks(client) > 0
# Garbage collect the iterator => remaining tasks are cancelled
del it
assert number_of_processing_tasks(client) == 0
开发者ID:tomMoral,项目名称:distributed,代码行数:29,代码来源:test_client_executor.py
示例4: test_local_client
def test_local_client(loop):
def produce(n):
with local_client() as c:
x = c.channel('x')
for i in range(n):
future = c.submit(slowinc, i, delay=0.01, key='f-%d' % i)
x.append(future)
x.flush()
def consume():
with local_client() as c:
x = c.channel('x')
y = c.channel('y')
last = 0
for i, future in enumerate(x):
last = c.submit(add, future, last, key='add-' + future.key)
y.append(last)
with cluster() as (s, [a, b]):
with Client(('127.0.0.1', s['port']), loop=loop) as c:
x = c.channel('x')
y = c.channel('y')
producers = (c.submit(produce, 5), c.submit(produce, 10))
consumer = c.submit(consume)
results = []
for i, future in enumerate(take(15, y)):
result = future.result()
results.append(result)
assert len(results) == 15
assert all(0 < r < 100 for r in results)
开发者ID:dask,项目名称:distributed,代码行数:34,代码来源:test_channels.py
示例5: _is_from_ncbi
def _is_from_ncbi(gff3_file):
with open(gff3_file) as in_handle:
for line in tz.take(10000, in_handle):
if "Dbxref" in line:
return "Dbxref"
if "db_xref" in line:
return "db_xref"
return None
开发者ID:chapmanb,项目名称:bcbio-nextgen,代码行数:8,代码来源:bcbio_setup_genome.py
示例6: forcastall
def forcastall(intid):
data=map(int,read_artist(intid)["action_1"])
sun=training(data,4)
fun=toolz.compose(str,int)
predictdata=map(fun,toolz.take(60,sun)) #focast 60 days
with open("./past_forcast/{aid}.csv".format(aid=intid),"wt") as f:
f.write(",".join(predictdata))
开发者ID:chengkeai,项目名称:tianchibigdata,代码行数:8,代码来源:data_artist.py
示例7: song_info
def song_info(artist, title):
if title is u'':
print("Searching for '%s'" % artist)
result = song.search(combined=artist)
else:
print("Searching for '%s - %s'" % (artist, title))
result = song.search(artist=artist, title=title)
print_search_results(take(3, result))
开发者ID:jasalt,项目名称:key-friend,代码行数:8,代码来源:keyfriend.py
示例8: create_categories
def create_categories(width, plus_one):
length = int(width / 8) + plus_one
return [
''.join(cs)
for cs in take(
2 ** width + plus_one,
product([chr(c) for c in range(256)], repeat=length),
)
]
开发者ID:FranSal,项目名称:zipline,代码行数:9,代码来源:test_labelarray.py
示例9: plot_lrates
def plot_lrates(f, df, x0, etas, niter):
fig, ax = plt.subplots(nrows=1, ncols=1)
for eta in etas:
ax.plot(list(xrange(1, niter + 1)),
list(take(niter,(f(e) for e in gradient_descent(df, x0, eta=eta)))),
label=unicode(eta))
ax.set_xlabel('Iteration Number')
ax.set_ylabel('f(x)')
plt.legend(title='Learning Rate')
plt.show()
plt.clf()
开发者ID:philiplessner,项目名称:FunctionalML,代码行数:11,代码来源:out_utils.py
示例10: collect
def collect(grouper, npartitions, group, pbags):
""" Collect partitions from disk and yield k,v group pairs """
from pbag import PBag
pbags = list(take(npartitions, pbags))
result = defaultdict(list)
for pb in pbags:
part = pb.get_partition(group)
groups = groupby(grouper, part)
for k, v in groups.items():
result[k].extend(v)
return list(result.items())
开发者ID:esc,项目名称:dask,代码行数:11,代码来源:core.py
示例11: discover
def discover(coll, n=50):
items = list(take(n, coll.find()))
for item in items:
del item['_id']
ds = discover(items)
if isdimension(ds[0]):
return coll.count() * ds.subshape[0]
else:
raise ValueError("Consistent datashape not found")
开发者ID:Casolt,项目名称:blaze,代码行数:11,代码来源:mongo.py
示例12: fit
def fit(cost_f, cost_df, h_theta0, data, eta=0.1, it_max=500, gf='gd'):
'''
Compute values of multiple linear regression coefficients
Parameters
cost_f: Cost function (J)
cost_df: gradient of cost function (gradJ for batch and gradJS for stochastic)
h_theta0: initial guess for fitting parameters (j cols)
data: list of tuples [(Xi, yi)]
X: matrix of independent variables (i rows of observations and j cols of variables). x0=1 for all i
y: dependent variable (i rows)
eta: learning rate
it_max: maximum number of iterations
Returns
Fitting parameters (j cols)
'''
X, y = zip(*data)
if gf == 'gd':
f = partial(cost_f, X, y)
df = partial(cost_df, X, y)
ans = list(take(it_max,
((h_theta, f(h_theta)) for h_theta in
fgd.gradient_descent(df, h_theta0, eta=eta))))
value = list(T(ans)[0])
cost = list(T(ans)[1])
#t = list(until_within_tol(cost, 1e-7))
return value[-1], cost
elif gf == 'sgd':
df = cost_df
cost = [sum(cost_f(xi, yi, h_theta0) for xi, yi in data)]
h_theta = h_theta0
eta_new = eta
for _ in xrange(it_max):
ans = list(take(len(y), (e for e in fgd.sgd(df, X, y, h_theta, eta=eta_new))))
h_theta = ans[-1]
cost.append(sum(cost_f(xi, yi, h_theta) for xi, yi in data))
eta_new = 0.99 * eta_new
return h_theta, cost
else:
print('Not a valid function')
return
开发者ID:philiplessner,项目名称:FunctionalML,代码行数:40,代码来源:linear_regression.py
示例13: discover_pymongo_collection
def discover_pymongo_collection(coll, n=50):
items = list(take(n, coll.find()))
oid_cols = [k for k, v in items[0].items() if isinstance(v, ObjectId)]
for item in items:
for col in oid_cols:
del item[col]
ds = discover(items)
if isdimension(ds[0]):
return coll.count() * ds.subshape[0]
else:
raise ValueError("Consistent datashape not found")
开发者ID:jreback,项目名称:into,代码行数:13,代码来源:mongo.py
示例14: get_dirs_and_files_in_path
def get_dirs_and_files_in_path(path):
# filter function
def isdir(a): return os.path.isdir(a)
# gives the opposite results as above
not_isdir = toolz.complement(isdir)
if not path and platform.system() == 'Windows':
import win32api
drives = win32api.GetLogicalDriveStrings()
drives = [d for d in drives.split('\000') if d]
return drives
elif os.path.exists(path):
r = os.listdir(path)
# 2x acccess means I have to remove the generator
f = [os.path.join(path, a) for a in r]
dirs = filter(isdir, f)
files = filter(not_isdir, f)
else:
try:
head, tail = os.path.split(path)
r = os.listdir(head)
filtered_everything = filter(lambda a: a.startswith(tail), r)
# because this was accesssed twice, I needed to remove the generator
filtered_everything = [os.path.join(head, a) for a in filtered_everything]
dirs = filter(isdir, filtered_everything)
files = filter(not_isdir, filtered_everything)
except Exception as e:
print('{0} doesn\'t even exist you stupid'.format(head))
return None
result = (sorted(list(toolz.take(100, dirs))),
sorted(list(toolz.take(100, files))))
return result
开发者ID:wingillis,项目名称:sarnaandra,代码行数:36,代码来源:helpers.py
示例15: _get_callable_regions
def _get_callable_regions(data):
"""Retrieve regions to parallelize by from callable regions, variant regions or chromosomes
"""
callable_files = data.get("callable_regions") or data.get("variant_regions")
if callable_files:
assert len(callable_files) == 1
regions = [(r.chrom, int(r.start), int(r.stop)) for r in pybedtools.BedTool(callable_files[0])]
else:
work_bam = list(tz.take(1, filter(lambda x: x.endswith(".bam"), data["work_bams"])))
if work_bam:
with contextlib.closing(pysam.Samfile(work_bam[0], "rb")) as pysam_bam:
regions = [(chrom, 0, length) for (chrom, length) in zip(pysam_bam.references,
pysam_bam.lengths)]
else:
raise NotImplementedError("No variant regions or BAM files to calculate chromosomes")
return regions
开发者ID:cybersiddhu,项目名称:bcbio-nextgen,代码行数:16,代码来源:joint.py
示例16: __init__
def __init__(self, path, mode='rt', schema=None, columns=None, types=None,
typehints=None, dialect=None, header=None, open=open,
nrows_discovery=50, chunksize=1024,
encoding=DEFAULT_ENCODING, **kwargs):
if 'r' in mode and not os.path.isfile(path):
raise ValueError('CSV file "%s" does not exist' % path)
if schema is None and 'w' in mode:
raise ValueError('Please specify schema for writable CSV file')
self.path = path
self.mode = mode
self.open = {'gz': gzip.open, 'bz2': bz2.BZ2File}.get(ext(path), open)
self._abspath = os.path.abspath(path)
self.chunksize = chunksize
self.encoding = encoding
sample = get_sample(self)
self.dialect = dialect = discover_dialect(sample, dialect, **kwargs)
if header is None:
header = has_header(sample, encoding=encoding)
elif isinstance(header, int):
header = True
self.header = header
if not schema and 'w' not in mode:
schema = discover_csv(path, encoding=encoding, dialect=dialect,
header=self.header, typehints=typehints,
types=types, columns=columns,
nrows_discovery=nrows_discovery)
self._schema = schema
self.header = header
if 'w' not in mode:
try:
nd.array(list(take(10, self._iter(chunksize=10))),
dtype=str(schema))
except (ValueError, TypeError) as e:
raise ValueError("Automatic datashape discovery failed\n"
"Discovered the following datashape: %s\n"
"But DyND generated the following error: %s\n"
"Consider providing type hints using "
"typehints={'column-name': 'type'}\n"
"like typehints={'start-time': 'string'}"
% (schema, e.args[0]))
开发者ID:Back2Basics,项目名称:blaze,代码行数:46,代码来源:csv.py
示例17: get_displacement
def get_displacement(n_input, n_tilings):
"""
Get the displacement vector to use in offsetting the tilings.
Essentially, we look for numbers less than `n_tilings//2` that are
coprime with `n_tilings`.
If we can find at least `n_input` of them, we just take the first
`n_input`. If there are fewer such viable numbers, we instead cycle
through the candidates, ensuring we repeat as seldom as possible.
..note::
It's recommended by the CMAC people to just increase the number of
tilings when there aren't enough candidate values for the
displacement vector.
"""
viable = [i for i in range(1, n_tilings//2) if gcd(i, n_tilings) == 1]
ret = list(take(n_input, cycle(viable)))
return np.array(ret)
开发者ID:rldotai,项目名称:flib,代码行数:18,代码来源:tile_coding.py
示例18: _get_callable_regions
def _get_callable_regions(data):
"""Retrieve regions to parallelize by from callable regions, variant regions or chromosomes
"""
import pybedtools
callable_files = data.get("callable_regions") or data.get("variant_regions")
if callable_files:
assert len(callable_files) == 1
regions = [(r.chrom, int(r.start), int(r.stop)) for r in pybedtools.BedTool(callable_files[0])]
else:
work_bam = list(tz.take(1, filter(lambda x: x.endswith(".bam"), data["work_bams"])))
if work_bam:
with pysam.Samfile(work_bam[0], "rb") as pysam_bam:
regions = [(chrom, 0, length) for (chrom, length) in zip(pysam_bam.references,
pysam_bam.lengths)]
else:
regions = [(r.name, 0, r.size) for r in
ref.file_contigs(dd.get_ref_file(data), data["config"])]
return regions
开发者ID:bennyyu686,项目名称:bcbio-nextgen,代码行数:18,代码来源:joint.py
示例19: bag_range
def bag_range(n, npartitions):
""" Numbers from zero to n
Examples
--------
>>> import dask.bag as db
>>> b = db.range(5, npartitions=2)
>>> list(b)
[0, 1, 2, 3, 4]
"""
size = n // npartitions
name = 'range-%d-npartitions-%d' % (n, npartitions)
ijs = list(enumerate(take(npartitions, range(0, n, size))))
dsk = dict(((name, i), (reify, (range, j, min(j + size, n))))
for i, j in ijs)
if n % npartitions != 0:
i, j = ijs[-1]
dsk[(name, i)] = (reify, (range, j, n))
return Bag(dsk, name, npartitions)
开发者ID:jcorbin,项目名称:dask,代码行数:22,代码来源:core.py
示例20: roll
def roll(request):
form = forms.LunchGroupForm
if request.method == 'GET':
ParticipantFormset = modelformset_factory(models.Participant)
formset = ParticipantFormset(queryset=models.Participant.objects.filter(is_participating=True))
context = {'form': form(),
'formset': formset
}
return render(request, 'lunch_roulette/base.html', context)
if request.method == 'POST':
form = form(request.POST)
if not form.is_valid():
context['messages'] = ['Date is not valid']
render(request, 'lunch_roulette/base.html', context)
date = form.cleaned_data.get('date')
participants = list(models.Participant.objects.filter(is_participating=True))
random.shuffle(participants)
while 1:
subgroup = list(toolz.take(4, participants))
participants = participants[4:]
logging.warn(subgroup)
if not subgroup:
break
group = models.LunchGroup(date=date)
group.save()
group.participants.add(*subgroup)
group.save()
# import pdb; pdb.set_trace()
return redirect(roll)
开发者ID:sesas,项目名称:django-lunch-roulette,代码行数:38,代码来源:views.py
注:本文中的toolz.take函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论