本文整理汇总了Python中utils.merge函数的典型用法代码示例。如果您正苦于以下问题:Python merge函数的具体用法?Python merge怎么用?Python merge使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了merge函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: getlocals
def getlocals(steamhome, logindata, gamedata, localdata):
logindata["configdata"] = {}
for root, subFolders, files in os.walk(os.path.join(steamhome["path"], "userdata", logindata["dir"])):
for file in files:
if file.lower().endswith("config.vdf"):
vdfdata = utils.vdf.load(open(os.path.join(steamhome["path"], "userdata", root, file)))
logindata["configdata"] = utils.merge(logindata["configdata"], vdfdata)
def getnewgamedata(appid, name):
ret = {"appid": appid, "name": name}
if int(appid) <= 999999: # not a shortcut
ret["firstseen"] = int(time.time())
return ret
purchaseddata = purchased.getpurchased(logindata)
localdb = utils.merge(shortcuts.getshortcuts(steamhome, logindata), apppackages.getappinfo(steamhome, logindata))
localdata.clear()
for g in localdb:
if "data-isdlc" not in localdb[g]:
localdata[g] = localdb[g]
if not g in gamedata:
gamedata[g] = getnewgamedata(g, localdb[g]["name"])
if "data-packageid" in localdb[g] and localdb[g]["data-packageid"] in purchaseddata:
gamedata[g]["firstseen"] = purchaseddata[localdb[g]["data-packageid"]]
开发者ID:shrewdlogarithm,项目名称:pyst,代码行数:25,代码来源:locals.py
示例2: select_group
def select_group (self, group):
if self.__group == group:
return
if group:
groups = [ group ] + [ g for g in self.groups if g != group ]
else:
groups = self.groups
# clear dict and only keep some values we want unchanged
if not self.__base_dict:
self.__base_dict = self.__dict__.copy()
else:
self.__dict__ = self.__base_dict.copy()
# updating
for group_ in groups:
group_.select_group(None)
if group_.handlers:
merge(self.handlers, group_.handlers)
self.__inherits(self.__dict__, group_.__dict__)
# some value that we must reset to their original state
for key in ('synctrex', 'group', 'groups', 'children'):
if key in self.__base_dict:
setattr(self, key, self.__base_dict[key])
elif hasattr(self, key):
delattr(self, key)
self.__group = group
开发者ID:bkfox,项目名称:synctrex,代码行数:30,代码来源:objects.py
示例3: _merge_sort
def _merge_sort(a, left, right):
if right - left < 2:
return
pivot = (right + left) // 2
_merge_sort(a, left, pivot)
_merge_sort(a, pivot, right)
merge(a, left, pivot, right)
开发者ID:sanchopanca,项目名称:clrs,代码行数:7,代码来源:sort.py
示例4: mkdumpdb
def mkdumpdb():
dumpdb = {
"name64": logindata["id64"],
"name": logindata["dir"],
"user": logindata["name"]
}
if "avatar" in logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]:
dumpdb["avatar"] = logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]["avatar"]
dumpdb["gamelist"] = {}
for db in dbs:
dbo = copy.deepcopy(db)
utils.merge(dumpdb["gamelist"],dbo)
return dumpdb
开发者ID:shrewdlogarithm,项目名称:pyst,代码行数:13,代码来源:pyst.py
示例5: run
def run(options):
amdt_id = options.get('amendment_id', None)
search_state = { }
if amdt_id:
amdt_type, number, congress = utils.split_bill_id(amdt_id)
to_fetch = [amdt_id]
else:
congress = options.get('congress', utils.current_congress())
to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state)
if not to_fetch:
if options.get("fast", False):
logging.warn("No amendments changed.")
else:
logging.error("Error figuring out which amendments to download, aborting.")
return None
limit = options.get('limit', None)
if limit:
to_fetch = to_fetch[:int(limit)]
if options.get('pages_only', False):
return None
logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress))
saved_amendments = utils.process_set(to_fetch, fetch_amendment, options)
save_bill_search_state(saved_amendments, search_state)
开发者ID:Web5design,项目名称:congress,代码行数:30,代码来源:amendments.py
示例6: mirror_package
def mirror_package(sitemap, package_name, lastmod, content_detail_url, options):
"""Create a local mirror of a FDSys package."""
# Return a list of files we downloaded.
results = []
if not options.get("granules", False):
# Most packages are just a package. This is the usual case.
results = mirror_package_or_granule(sitemap, package_name, None, lastmod, options)
else:
# In some collections, like STATUTE, each document has subparts which are not
# described in the sitemap. Load the main HTML page and scrape for the sub-files.
# In the STATUTE collection, the MODS information in granules is redundant with
# information in the top-level package MODS file. But the only way to get granule-
# level PDFs is to go through the granules.
content_index = utils.download(content_detail_url,
"fdsys/package/%s/%s/%s.html" % (sitemap["year"], sitemap["collection"], package_name),
utils.merge(options, {
'binary': True,
}))
if not content_index:
raise Exception("Failed to download %s" % content_detail_url)
for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"):
if link.text == "More":
m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href"))
if not m or m.group(1) != package_name:
raise Exception("Unmatched granule URL %s" % link.get("href"))
granule_name = m.group(2)
results = mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options)
return results
开发者ID:hugovk,项目名称:congress,代码行数:32,代码来源:fdsys.py
示例7: vote_ids_for_senate
def vote_ids_for_senate(congress, session_year, options):
session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1
vote_ids = []
page = utils.download(
"http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num),
"%s/votes/%s/pages/senate.xml" % (congress, session_year),
utils.merge(options, {'binary': True})
)
if not page:
logging.error("Couldn't download Senate vote XML index, aborting")
return None
dom = etree.fromstring(page)
# Sanity checks.
if int(congress) != int(dom.xpath("congress")[0].text):
logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text)
return None
if int(session_year) != int(dom.xpath("congress_year")[0].text):
logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text)
return None
# Get vote list.
for vote in dom.xpath("//vote"):
num = int(vote.xpath("vote_number")[0].text)
vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year
if not should_process(vote_id, options):
continue
vote_ids.append(vote_id)
return vote_ids
开发者ID:Christopher-Churnick,项目名称:congress,代码行数:33,代码来源:votes.py
示例8: narrow_docids
def narrow_docids(self, idx):
m0 = [ decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat) ]
if not m0:
return []
refs = merge(m0)
locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ]
return locs
开发者ID:yasusii,项目名称:fooling,代码行数:7,代码来源:selection.py
示例9: document_info_for
def document_info_for(filename, cache, options):
mods_url = mods_for(filename)
mods_cache = ""
body = utils.download(mods_url,
cache,
utils.merge(options, {'xml': True})
)
doc = etree.fromstring(body)
mods_ns = {"mods": "http://www.loc.gov/mods/v3"}
locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns)
urls = {}
for location in locations:
label = location.attrib['displayLabel']
if "HTML" in label:
format = "html"
elif "PDF" in label:
format = "pdf"
elif "XML" in label:
format = "xml"
else:
format = "unknown"
urls[format] = location.text
issued_on = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns)
return issued_on, urls
开发者ID:actkatiemacias,项目名称:congress,代码行数:29,代码来源:fdsys.py
示例10: get_sitemap
def get_sitemap(year, collection, lastmod, options):
"""Gets a single sitemap, downloading it if the sitemap has changed.
Downloads the root sitemap (year==None, collection==None), or
the sitemap for a year (collection==None), or the sitemap for
a particular year and collection. Pass lastmod which is the current
modification time of the file according to its parent sitemap, which
is how it knows to return a cached copy.
Returns the sitemap parsed into a DOM.
"""
# Construct the URL and the path to where to cache the file on disk.
if year == None:
url = "http://www.gpo.gov/smap/fdsys/sitemap.xml"
path = "fdsys/sitemap/sitemap.xml"
elif collection == None:
url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year)
path = "fdsys/sitemap/%s/sitemap.xml" % year
else:
url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection)
path = "fdsys/sitemap/%s/%s.xml" % (year, collection)
# Should we re-download the file?
lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt")
if options.get("cached", False):
# If --cached is used, don't hit the network.
force = False
elif not lastmod:
# No *current* lastmod date is known for this file (because it is the master
# sitemap file, probably), so always download.
force = True
else:
# If the file is out of date or --force is used, download the file.
cache_lastmod = utils.read(lastmod_cache_file)
force = (lastmod != cache_lastmod) or options.get("force", False)
if force:
logging.warn("Downloading: %s" % url)
body = utils.download(url, path, utils.merge(options, {
'force': force,
'binary': True
}))
if not body:
raise Exception("Failed to download %s" % url)
# Write the current last modified date to disk so we know the next time whether
# we need to fetch the file.
if lastmod and not options.get("cached", False):
utils.write(lastmod, lastmod_cache_file)
try:
return etree.fromstring(body)
except etree.XMLSyntaxError as e:
raise Exception("XML syntax error in %s: %s" % (url, str(e)))
开发者ID:GPHemsley,项目名称:congress,代码行数:57,代码来源:fdsys.py
示例11: fetch_vote
def fetch_vote(vote_id, options):
logging.info("\n[%s] Fetching..." % vote_id)
vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id)
if vote_chamber == "h":
url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number))
else:
session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1
url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number))
# fetch vote XML page
body = utils.download(
url,
"%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number),
utils.merge(options, {'binary': True}),
)
if not body:
return {'saved': False, 'ok': False, 'reason': "failed to download"}
if options.get("download_only", False):
return {'saved': False, 'ok': True, 'reason': "requested download only"}
if "This vote was vacated" in body:
# Vacated votes: 2011-484, 2012-327, ...
# Remove file, since it may previously have existed with data.
for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")):
if os.path.exists(f):
os.unlink(f)
return {'saved': False, 'ok': True, 'reason': "vote was vacated"}
dom = etree.fromstring(body)
vote = {
'vote_id': vote_id,
'chamber': vote_chamber,
'congress': int(vote_congress),
'session': vote_session_year,
'number': int(vote_number),
'updated_at': datetime.datetime.fromtimestamp(time.time()),
'source_url': url,
}
# do the heavy lifting
if vote_chamber == "h":
parse_house_vote(dom, vote)
elif vote_chamber == "s":
parse_senate_vote(dom, vote)
# output and return
output_vote(vote, options)
return {'ok': True, 'saved': True}
开发者ID:JT5D,项目名称:congress,代码行数:56,代码来源:vote_info.py
示例12: mirror_file
def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options):
# Where should we store the file?
path = get_output_path(year, collection, package_name, granule_name, options)
if not path: return # should skip
# Do we need to update this record?
lastmod_cache_file = path + "/lastmod.txt"
cache_lastmod = utils.read(lastmod_cache_file)
force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False)
# Try downloading files for each file type.
targets = get_package_files(package_name, granule_name, path)
updated_file_types = set()
for file_type in file_types:
if file_type not in targets: raise Exception("Invalid file type: %s" % file_type)
f_url, f_path = targets[file_type]
if (not force) and os.path.exists(f_path): continue # we already have the current file
logging.warn("Downloading: " + f_path)
data = utils.download(f_url, f_path, utils.merge(options, {
'binary': True,
'force': force,
'to_cache': False,
'needs_content': file_type == "text" and f_path.endswith(".html"),
}))
updated_file_types.add(file_type)
if not data:
if file_type == "pdf":
# expected to be present for all packages
raise Exception("Failed to download %s" % package_name)
else:
# not all packages have all file types, but assume this is OK
logging.error("file not found: " + f_url)
continue
if file_type == "text" and f_path.endswith(".html"):
# The "text" format files are put in an HTML container. Unwrap it into a .txt file.
# TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it?
# html.fromstring does auto-detection.
with open(f_path[0:-4] + "txt", "w") as f:
text_content = unicode(html.fromstring(data).text_content())
f.write(text_content.encode("utf8"))
if collection == "BILLS" and "mods" in updated_file_types:
# When we download bill files, also create the text-versions/data.json file
# which extracts commonly used components of the MODS XML.
from bill_versions import write_bill_version_metadata
write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True))
# Write the current last modified date to disk so we know the next time whether
# we need to fetch the files for this sitemap item.
if lastmod and not options.get("cached", False):
utils.write(lastmod, lastmod_cache_file)
开发者ID:JT5D,项目名称:congress,代码行数:54,代码来源:fdsys.py
示例13: generate_validation_batch
def generate_validation_batch(required_input_keys, required_output_keys, set="validation"):
# generate sunny data
sunny_length = get_lenght_of_set(name="sunny", set=set)
regular_length = get_lenght_of_set(name="regular", set=set)
sunny_batches = int(np.ceil(sunny_length / float(_config().sunny_batch_size)))
regular_batches = int(np.ceil(regular_length / float(_config().batch_size)))
if "sunny" in required_input_keys or "segmentation" in required_output_keys:
num_batches = max(sunny_batches, regular_batches)
else:
num_batches = regular_batches
num_chunks = int(np.ceil(num_batches / float(_config().batches_per_chunk)))
sunny_chunk_size = _config().batches_per_chunk * _config().sunny_batch_size
regular_chunk_size = _config().batches_per_chunk * _config().batch_size
for n in xrange(num_chunks):
result = {}
input_keys_to_do = list(required_input_keys) # clone
output_keys_to_do = list(required_output_keys) # clone
if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:
indices = range(n*sunny_chunk_size, (n+1)*sunny_chunk_size)
sunny_patient_data = get_sunny_patient_data(indices, set="train")
result = utils.merge(result, sunny_patient_data)
input_keys_to_do.remove("sunny")
output_keys_to_do.remove("segmentation")
indices = range(n*regular_chunk_size, (n+1)*regular_chunk_size)
kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set=set,
preprocess_function=_config().preprocess_validation)
result = utils.merge(result, kaggle_data)
yield result
开发者ID:317070,项目名称:kaggle-heart,代码行数:40,代码来源:data_loader.py
示例14: fetch_version
def fetch_version(bill_version_id, options):
# Download MODS etc.
logging.info("\n[%s] Fetching..." % bill_version_id)
bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id)
# bill_id = "%s%s-%s" % (bill_type, number, congress)
utils.download(
mods_url_for(bill_version_id),
document_filename_for(bill_version_id, "mods.xml"),
utils.merge(options, {'binary': True, 'to_cache': False})
)
return write_bill_version_metadata(bill_version_id)
开发者ID:GPHemsley,项目名称:congress,代码行数:15,代码来源:bill_versions.py
示例15: generate_train_batch
def generate_train_batch(required_input_keys, required_output_keys):
"""Creates an iterator that returns train batches."""
sunny_chunk_size = _config().sunny_batch_size * _config().batches_per_chunk
chunk_size = _config().batch_size * _config().batches_per_chunk
while True:
result = {}
input_keys_to_do = list(required_input_keys) #clone
output_keys_to_do = list(required_output_keys) #clone
if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do:
indices = _config().rng.randint(0, len(sunny_train_images), sunny_chunk_size)
sunny_patient_data = get_sunny_patient_data(indices, set="train")
result = utils.merge(result, sunny_patient_data)
input_keys_to_do.remove("sunny")
output_keys_to_do.remove("segmentation")
indices = _config().rng.randint(0, len(train_patient_folders), chunk_size) #
kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set="train",
preprocess_function=_config().preprocess_train)
result = utils.merge(result, kaggle_data)
yield result
开发者ID:317070,项目名称:kaggle-heart,代码行数:24,代码来源:data_loader.py
示例16: run
def run(options):
# Download the TSV file.
cache_zip_path = "adler-wilkerson-bills.zip"
utils.download(
"http://congressionalbills.org/billfiles/bills80-92.zip",
cache_zip_path,
utils.merge(options, {'binary': True, 'needs_content': False}))
# Unzip in memory and process the records.
zfile = zipfile.ZipFile(utils.cache_dir() + "/" + cache_zip_path)
csvreader = csv.DictReader(zfile.open("bills80-92.txt"), delimiter="\t")
for record in csvreader:
rec = process_bill(record)
import pprint
pprint.pprint(rec)
开发者ID:Christopher-Churnick,项目名称:congress,代码行数:16,代码来源:adler_wilkerson_bills.py
示例17: run
def run(options):
amendment_id = options.get('amendment_id', None)
bill_id = options.get('bill_id', None)
search_state = { }
if amendment_id:
amendment_type, number, congress = utils.split_bill_id(amendment_id)
to_fetch = [amendment_id]
elif bill_id:
# first, crawl the bill
bill_type, number, congress = utils.split_bill_id(bill_id)
bill_status = fetch_bill(bill_id, options)
if bill_status['ok']:
bill = json.loads(utils.read(output_for_bill(bill_id, "json")))
to_fetch = [x["amendment_id"] for x in bill["amendments"]]
else:
logging.error("Couldn't download information for that bill.")
return None
else:
congress = options.get('congress', utils.current_congress())
to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state)
if not to_fetch:
if options.get("fast", False):
logging.warn("No amendments changed.")
else:
logging.error("Error figuring out which amendments to download, aborting.")
return None
limit = options.get('limit', None)
if limit:
to_fetch = to_fetch[:int(limit)]
if options.get('pages_only', False):
return None
logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress))
saved_amendments = utils.process_set(to_fetch, fetch_amendment, options)
# keep record of the last state of all these amendments, for later fast-searching
save_bill_search_state(saved_amendments, search_state)
开发者ID:GPHemsley,项目名称:congress,代码行数:45,代码来源:amendments.py
示例18: mirror_bulkdata_file
def mirror_bulkdata_file(collection, url, item_path, lastmod, options):
# Return a list of files we downloaded.
results = []
# Where should we store the file?
path = "%s/govinfo/%s/%s" % (utils.data_dir(), collection, item_path)
# For BILLSTATUS, store this along with where we store the rest of bill
# status data.
if collection == "BILLSTATUS":
from bills import output_for_bill
bill_id, version_code = get_bill_id_for_package(os.path.splitext(os.path.basename(item_path.replace("BILLSTATUS-", "")))[0], with_version=False)
path = output_for_bill(bill_id, FDSYS_BILLSTATUS_FILENAME, is_data_dot=False)
# Where should we store the lastmod found in the sitemap so that
# we can tell later if the file has changed?
lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt"
# Do we already have this file up to date?
if os.path.exists(lastmod_cache_file) and not options.get("force", False):
if lastmod == utils.read(lastmod_cache_file):
return
# With --cached, skip if the file is already downloaded.
if os.path.exists(path) and options.get("cached", False):
return
# Download.
logging.warn("Downloading: " + path)
data = utils.download(url, path, utils.merge(options, {
'binary': True,
'force': True, # decision to cache was made above
'to_cache': False,
}))
results.append(path)
if not data:
# Something failed.
return
# Write the current last modified date back to disk so we know the next time whether
# we need to fetch the file again.
utils.write(lastmod, lastmod_cache_file)
return results
开发者ID:d0tN3t,项目名称:congress,代码行数:45,代码来源:govinfo.py
示例19: start
def start(self):
"""Start up the bot process
Calls the ``connect`` method and then (if ``stream`` is set) begins the
event loop.
"""
login_data = self.connect()
if not login_data:
return None
self.running = True
for handler in self._hooks[events.SETUP]:
handler(merge(login_data, {"client": self.client}))
if self.stream:
try:
self.read()
except:
self.stop()
raise
开发者ID:jdost,项目名称:lazbot,代码行数:20,代码来源:lazbot.py
示例20: parse_data
def parse_data(url):
r = requests.get(url, headers=utils.merge(DEFAULT_HEADERS, {}))
soup = BeautifulSoup(r.text, "html.parser")
if r.status_code != 200:
return None
full_data = {}
for t in soup.select('table'):
section = t.select('th')[0].contents[0]
h = [get_contents(e.contents) for e in t.select('.ttl > a')]
c = [get_contents(e.contents) for e in t.select('.nfo')]
full_data[section] = dict(zip(h, c))
new_data = {}
for key, val in full_data.items():
for subk, subv in val.items():
new_data["%s:%s" % (key, subk)] = subv
#print json.dumps({"%s:%s" % (key, subk): subv})
return new_data
开发者ID:coolkunal64,项目名称:gsmarena-scraper,代码行数:21,代码来源:gsmarena.py
注:本文中的utils.merge函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论