本文整理汇总了Python中web.group函数的典型用法代码示例。如果您正苦于以下问题:Python group函数的具体用法?Python group怎么用?Python group使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了group函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_all_editions
def get_all_editions(self):
"""Returns all the editions of this list in arbitrary order.
The return value is an iterator over all the edtions. Each entry is a dictionary.
(Compare the difference with get_editions.)
This works even for lists with too many seeds as it doesn't try to
return editions in the order of last-modified.
"""
rawseeds = self._get_rawseeds()
def get_edition_keys(seeds):
d = self._editions_view(seeds, limit=10000, stale="ok")
return [row['id'] for row in d['rows']]
keys = set()
# When there are too many seeds, couchdb-lucene fails because the query URL is too long.
# Splitting the seeds into groups of 50 to avoid that trouble.
for seeds in web.group(rawseeds, 50):
keys.update(get_edition_keys(seeds))
# Load docs from couchdb now.
for chunk in web.group(keys, 1000):
docs = self.get_couchdb_docs(self._get_editions_db(), chunk)
for doc in docs.values():
del doc['_id']
del doc['_rev']
yield doc
开发者ID:RaceList,项目名称:openlibrary,代码行数:29,代码来源:model.py
示例2: parse
def parse():
states = fips2state()
shapeid2district = {}
for lines in web.group(file(DATA_DIR + '/cd99_110a.dat'), 7):
num, fipscode, distnum, distname, distid, distdesc, ignore = [x.strip().strip('"') for x in lines]
if not fipscode.strip(): continue
shapeid2district[num] = states[fipscode] + '-' + distnum
out = {}
for line in file(DATA_DIR + '/cd99_110.dat'):
nums = line.strip().split()
if len(nums) == 3:
shapeid = nums[0] # other points are the center
if shapeid in shapeid2district:
SKIPME = False
district = shapeid2district[shapeid]
out.setdefault(district, [])
out[district].append([])
else:
SKIPME = True
elif len(nums) == 2 and not SKIPME:
out[district][-1].append((float(nums[0]), float(nums[1])))
return out
开发者ID:acgourley,项目名称:watchdog,代码行数:25,代码来源:shapes.py
示例3: parse
def parse():
shapeid2district = {}
for lines in web.group(file(DATA_DIR + '/cd99_110a.dat'), 7):
num, fipscode, distnum, distname, distid, distdesc, ignore = \
[x.strip().strip('"') for x in lines]
if not fipscode.strip(): continue
shapeid2district[num] = (fipscode, distnum)
out = {}
for line in file(DATA_DIR + '/cd99_110.dat'):
nums = line.strip().split()
if len(nums) == 3:
shapeid = nums[0] # other points are the center
if shapeid in shapeid2district:
SKIPME = False
district = shapeid2district[shapeid]
out.setdefault(district, [])
out[district].append([])
else:
SKIPME = True
elif len(nums) == 2 and not SKIPME:
out[district][-1].append((float(nums[0]), float(nums[1])))
for (fipscode, distnum), shapes in out.iteritems():
yield {
'_type': 'district',
'state_fipscode': fipscode,
'district': distnum,
'shapes': shapes
}
开发者ID:jdthomas,项目名称:watchdog,代码行数:30,代码来源:shapes.py
示例4: update_docs
def update_docs(db, all_docs, chunk_size=10000, comment=""):
now = datetime.datetime.utcnow()
for chunk in web.group(all_docs, chunk_size):
print chunk
d = dict((doc['key'], doc) for doc in chunk)
rows = get_docs(db, d.keys())
for row in rows:
row.doc.update(d[row.key])
row.doc['revision'] = row.revision + 1
row.doc['latest_revision'] = row.revision + 1
row.doc['last_modified']['value'] = now.isoformat()
data = [web.storage(thing_id=row.id, revision=row.revision+1, data=simplejson.dumps(row.doc)) for row in rows]
author_id = get_thing_id(db, "/user/anand")
t = db.transaction()
try:
tx_id = db.insert("transaction", author_id=author_id, action="bulk_update", ip="127.0.0.1", bot=True, created=now, comment=comment)
db.multiple_insert("version", [dict(thing_id=d.thing_id, transaction_id=tx_id, revision=d.revision) for d in data], seqname=False)
db.multiple_insert("data", data, seqname=False)
db.query("UPDATE thing set latest_revision=latest_revision+1 WHERE key in $d.keys()", vars=locals())
except:
t.rollback()
raise
else:
t.commit()
开发者ID:RaceList,项目名称:openlibrary,代码行数:29,代码来源:update_docs.py
示例5: add_urls
def add_urls(module):
global urls
module_urls = []
for path, classname in web.group(module.urls, 2):
classname = module.__name__ + "." + classname
module_urls.extend([path, classname])
urls = urls + tuple(module_urls)
开发者ID:AndresGrams,项目名称:broadgauge,代码行数:7,代码来源:webapp.py
示例6: request
def request(path, method, data):
"""Fakes the web request.
Useful when infobase is not run as a separate process.
"""
web.ctx.infobase_localmode = True
web.ctx.infobase_input = data or {}
web.ctx.infobase_method = method
def get_class(classname):
if '.' in classname:
modname, classname = classname.rsplit('.', 1)
mod = __import__(modname, None, None, ['x'])
fvars = mod.__dict__
else:
fvars = globals()
return fvars[classname]
try:
# hack to make cache work for local infobase connections
cache.loadhook()
for pattern, classname in web.group(app.mapping, 2):
m = web.re_compile('^' + pattern + '$').match(path)
if m:
args = m.groups()
cls = get_class(classname)
tocall = getattr(cls(), method)
return tocall(*args)
raise web.notfound()
finally:
# hack to make cache work for local infobase connections
cache.unloadhook()
开发者ID:termim,项目名称:infogami,代码行数:32,代码来源:server.py
示例7: load_identifiers
def load_identifiers(self, identifiers):
for chunk in web.group(identifiers, 1000):
chunk = list(set(chunk))
result = self.db.query("SELECT identifier FROM bookloader WHERE identifier IN $chunk", vars=locals())
present = set(row.identifier for row in result)
data = [dict(identifier=id) for id in chunk if id not in present]
if data:
self.db.multiple_insert("bookloader", data)
开发者ID:anandology,项目名称:bookloader,代码行数:8,代码来源:loader.py
示例8: list_ranges
def list_ranges(self):
r = db.select(self.sql_table,
vars = dict(start_id=self.start_id, end_id=self.end_id, step=self.step),
what = 'id, %s as value' % self.sql_field,
where = 'id >= $start_id and id <= $end_id and\
((id-$start_id) % $step = 0 or (id-$start_id+1) % $step = 0 or\
id = $end_id)',
limit = self.max_size * 2)
return web.group(r, 2)
开发者ID:Roverok,项目名称:CloudMining,代码行数:9,代码来源:expose.py
示例9: get_editions
def get_editions(self, limit=50, offset=0, _raw=False):
"""Returns the editions objects belonged to this list ordered by last_modified.
When _raw=True, the edtion dicts are returned instead of edtion objects.
"""
# show at max 10 pages
MAX_OFFSET = min(self.edition_count, 50 * 10)
if not self.seeds or offset > MAX_OFFSET:
return {
"count": 0,
"offset": offset,
"limit": limit,
"editions": []
}
# We don't want to give more than 500 editions for performance reasons.
if offset + limit > MAX_OFFSET:
limit = MAX_OFFSET - offset
key_data = []
rawseeds = self._get_rawseeds()
for seeds in web.group(rawseeds, 50):
key_data += self._get_edition_keys(seeds, limit=MAX_OFFSET)
keys = [key for key, last_modified in sorted(key_data, key=lambda x: x[1], reverse=True)]
keys = keys[offset:limit]
# Get the documents from couchdb
docs = self.get_couchdb_docs(self._get_editions_db(), keys)
def get_doc(key):
doc = docs[key]
del doc['_id']
del doc['_rev']
if not _raw:
data = self._site._process_dict(common.parse_query(doc))
doc = client.create_thing(self._site, doc['key'], data)
return doc
d = {
"count": self.edition_count,
"offset": offset,
"limit": limit,
"editions": [get_doc(key) for key in keys]
}
if offset + limit < MAX_OFFSET:
d['next_params'] = {
'offset': offset+limit
}
if offset > 0:
d['prev_params'] = {
'offset': max(0, offset-limit)
}
return d
开发者ID:RaceList,项目名称:openlibrary,代码行数:56,代码来源:model.py
示例10: process_changesets
def process_changesets(self, changesets, update_seeds=False):
"""Updates the lists databases for given changesets.
Seeds are updated in the seeds db if update_seeds is True, otherwise they are marked for later update.
"""
logger.info("BEGIN process_changesets")
ctx = UpdaterContext()
for chunk in web.group(changesets, 50):
chunk = list(chunk)
logger.info("processing changesets %s", [c['id'] for c in chunk])
works = [work for changeset in chunk
for work in self._get_works(changeset)]
editions = [e for changeset in chunk
for e in self._get_editions(changeset)]
logger.info("found %d works and %d editions", len(works), len(editions))
keys = [w['key'] for w in works] + [e['works'][0]['key'] for e in editions if e.get('works')]
keys = list(set(keys))
self.works_db.db.preload(keys)
for work in works:
work = self.works_db.update_work(ctx, work)
# works have been modified. Commit to update the views.
logger.info("BEGIN commit works_db")
self.works_db.db.commit()
logger.info("END commit works_db")
self.works_db.update_editions(ctx, editions)
self.editions_db.update_editions(ctx.editions.values())
ctx.editions.clear()
t = datetime.datetime.utcnow().isoformat()
if ctx.seeds:
logger.info("BEGIN commit works_db")
self.works_db.db.commit()
logger.info("END commit works_db")
logger.info("BEGIN mark %d seeds for update" % len(ctx.seeds))
if update_seeds:
self.seeds_db.update_seeds(ctx.seeds.keys())
else:
self.seeds_db.mark_seeds_for_update(ctx.seeds.keys())
logger.info("END mark %d seeds for update" % len(ctx.seeds))
ctx.seeds.clear()
# reset to limit the make sure the size of cache never grows without any limit.
if len(self.works_db.db.docs) > 1000:
self.works_db.db.reset()
self.works_db.db.commit()
self.works_db.db.reset()
logger.info("END process_changesets")
开发者ID:iambibhas,项目名称:openlibrary,代码行数:56,代码来源:updater.py
示例11: find_not_indexed
def find_not_indexed(keys, chunk_size=1000):
for chunk in web.group(keys, chunk_size):
chunk = list(chunk)
q=" OR ".join("key:" + k for k in chunk)
params = urllib.urlencode({"q": q, "rows": chunk_size, "wt": "json", "fl": "key"})
url = solr_base_url + "/select"
d = jsonget(url, params)
found = set(doc['key'] for doc in d['response']['docs'])
for k in chunk:
if k not in found:
yield k
开发者ID:RaceList,项目名称:openlibrary,代码行数:11,代码来源:find-indexed-works.py
示例12: get_many
def get_many(self, keys):
"""Get multiple documents in a single request as a dictionary.
"""
if len(keys) > 500:
# get in chunks of 500 to avoid crossing the URL length limit.
d = {}
for chunk in web.group(keys, 500):
d.update(self._get_many(chunk))
return d
else:
return self._get_many(keys)
开发者ID:artmedlar,项目名称:openlibrary,代码行数:11,代码来源:api.py
示例13: _process_key
def _process_key(key):
mapping = (
"/l/", "/languages/",
"/a/", "/authors/",
"/b/", "/books/",
"/user/", "/people/"
)
for old, new in web.group(mapping, 2):
if key.startswith(old):
return new + key[len(old):]
return key
开发者ID:amoghravish,项目名称:openlibrary,代码行数:11,代码来源:ol_infobase.py
示例14: update_seeds
def update_seeds(self, seeds, chunksize=50):
big_seeds = self.get_big_seeds()
seeds2 = sorted(seed for seed in seeds if seed not in big_seeds)
logger.info("update_seeds %s", len(seeds2))
logger.info("ignored %d big seeds", len(seeds)-len(seeds2))
for i, chunk in enumerate(web.group(seeds2, chunksize)):
chunk = list(chunk)
logger.info("update_seeds %d %d", i, len(chunk))
self._update_seeds(chunk)
开发者ID:iambibhas,项目名称:openlibrary,代码行数:11,代码来源:updater.py
示例15: write_sitemaps
def write_sitemaps(data, outdir, prefix):
timestamp = datetime.datetime.utcnow().isoformat() + 'Z'
# maximum permitted entries in one sitemap is 50K.
for i, rows in enumerate(web.group(data, 50000)):
filename = "sitemap_%s_%04d.xml.gz" % (prefix, i)
print >> sys.stderr, "generating", filename
sitemap = web.safestr(t_sitemap(rows))
path = os.path.join(outdir, filename)
gzwrite(path, sitemap)
yield filename, timestamp
开发者ID:lukasklein,项目名称:openlibrary,代码行数:13,代码来源:sitemap.py
示例16: _process_key
def _process_key(self, key):
mapping = (
"/l/", "/languages/",
"/a/", "/authors/",
"/b/", "/books/",
"/user/", "/people/"
)
if "/" in key and key.split("/")[1] in ['a', 'b', 'l', 'user']:
for old, new in web.group(mapping, 2):
if key.startswith(old):
return new + key[len(old):]
return key
开发者ID:hornc,项目名称:openlibrary-1,代码行数:13,代码来源:connection.py
示例17: _get_docs
def _get_docs(self, keys):
"""Returns docs for the specified keys as a dictionary.
"""
docs = {}
for keys2 in web.group(keys, 500):
json = self.infobase_conn.request(
sitename="openlibrary.org",
path="/get_many",
data={"keys": simplejson.dumps(keys2)})
docs2 = simplejson.loads(json)
docs.update(docs2)
return docs
开发者ID:mikemaehr,项目名称:openlibrary,代码行数:13,代码来源:engine.py
示例18: parse
def parse(filename, chunk_size=10000):
t0 = time.time()
i = 0
for chunk in web.group(open(filename), chunk_size):
print i, time.time() - t0
d = {}
for line in chunk:
key, type, revision, json = line.strip().split("\t")
d["%[email protected]@%s" % (key, revision)] = json
i += len(d)
yield d
print i, time.time() - t0
开发者ID:hornc,项目名称:openlibrary-1,代码行数:13,代码来源:olload.py
示例19: update_keys
def update_keys(keys):
keys = (k for k in keys if k.count("/") == 2 and k.split("/")[1] in ["books", "authors", "works"])
count = 0
for chunk in web.group(keys, 100):
chunk = list(chunk)
count += len(chunk)
update_work.update_keys(chunk, commit=False)
if count:
logger.info("updated %d documents", count)
return count
开发者ID:RaceList,项目名称:openlibrary,代码行数:13,代码来源:new-solr-updater.py
示例20: update_seeds
def update_seeds(self, seeds, chunksize=50):
# XXX-Anand: temporarily disable updates as the node hosting seeds_db is low on disk
return
big_seeds = self.get_big_seeds()
seeds2 = sorted(seed for seed in seeds if seed not in big_seeds)
logger.info("update_seeds %s", len(seeds2))
logger.info("ignored %d big seeds", len(seeds)-len(seeds2))
for i, chunk in enumerate(web.group(seeds2, chunksize)):
chunk = list(chunk)
logger.info("update_seeds %d %d", i, len(chunk))
self._update_seeds(chunk)
开发者ID:RaceList,项目名称:openlibrary,代码行数:13,代码来源:updater.py
注:本文中的web.group函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论