本文整理汇总了Python中mmh3.hash64函数的典型用法代码示例。如果您正苦于以下问题:Python hash64函数的具体用法?Python hash64怎么用?Python hash64使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了hash64函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: lemmatizeText
def lemmatizeText(text):
processedWords = {}
wordsHashMap = {}
words = {}
word = ""
for c in text:
if ('a' <= c <= 'z') or ('A' <= c <= 'Z') or (c == '\'') or (c == '-') or (c == '_'):
word += c
else:
if word:
word = word.lower()
if word not in processedWords:
if word in exceptions:
if word in words:
words[word] += 1
else:
words[word] = 1
wordsHashMap[mmh3.hash64(word)[0]] = [word, words[word]]
else:
if len(word) > 3 and word not in stopWords:
tag = nltk.pos_tag([word]) # !!! WARNING : takes A LOT OF TIME !!!
if tag[0][1] in tags:
if word in words:
words[word] += 1
else:
words[word] = 1
wordsHashMap[mmh3.hash64(word)[0]] = [word, words[word]]
processedWords[word] = word
else:
if word in words:
words[word] += 1
wordsHashMap[mmh3.hash64(word)[0]] = [word, words[word]]
word = ""
return wordsHashMap
开发者ID:MariusBordeian,项目名称:RIWSearchEngine,代码行数:35,代码来源:tema1.py
示例2: update
def update(self,instance,y):
for aggKey in self._keys:
key_for_update = hash64(str(tuple([key+'_'+instance[key] for key in aggKey]))) # hash for memory issue
temp_list = self._counter_map[key_for_update]
if len(temp_list) == self.mem_len:
temp_list.popleft()
temp_list.append((self.time,y))
开发者ID:cckk3333,项目名称:rt-pred,代码行数:7,代码来源:FTRLProximal.py
示例3: readHash
def readHash(self):
hll = Hll(self.p)
x = sys.stdin.readline().rstrip('\n')
while x:
hll.AddItem64(mmh3.hash64(str(x))[0])
x = sys.stdin.readline().rstrip('\n')
print hll.Count64()
开发者ID:MarwanG,项目名称:Graal,代码行数:7,代码来源:client64.py
示例4: insert
def insert(self, codelet):
"""
Insert a codelet into the database.
:param codelet: The codelet to insert.
:type codelet: :py:class:`.Codelet`
"""
query1 = """INSERT INTO code VALUES (?, ?, ?)
ON DUPLICATE KEY UPDATE code_id=code_id"""
query2 = """INSERT INTO codelets VALUES
(DEFAULT, ?, ?, ?, ?, ?, ?, ?)"""
query3 = "INSERT INTO authors VALUES (DEFAULT, ?, ?, ?)"
hash_key = str(codelet.language) + ":" + codelet.code.encode("utf8")
code_id = mmh3.hash64(hash_key)[0]
with self._conn.cursor() as cursor:
cursor.execute(query1, (code_id, codelet.language, codelet.code))
if cursor.rowcount == 1:
for sym_type, symbols in codelet.symbols.iteritems():
self._insert_symbols(cursor, code_id, sym_type, symbols)
origin, url = self._decompose_url(cursor, codelet.url)
cursor.execute(query2, (codelet.name, code_id, origin, url,
codelet.rank, codelet.date_created,
codelet.date_modified))
codelet_id = cursor.lastrowid
authors = [(codelet_id, a[0], a[1]) for a in codelet.authors]
cursor.executemany(query3, authors)
开发者ID:SuriyaaKudoIsc,项目名称:bitshift,代码行数:28,代码来源:__init__.py
示例5: _indexes
def _indexes(self, key):
"""
Generates the indicies corresponding to the given key
"""
h1, h2 = mmh3.hash64(key)
for i in xrange(self.num_hashes):
yield (h1 + i * h2) % self.num_bytes
开发者ID:pombredanne,项目名称:fuggetaboutit,代码行数:7,代码来源:counting_bloom_filter.py
示例6: murmur3_64bit
def murmur3_64bit(obj):
"""
Use murmur3_64bit for 64 bit hash by passing this method:
hasher=DeepHash.murmur3_64bit
"""
obj = obj.encode('utf-8')
# This version of murmur3 returns two 64bit integers.
return mmh3.hash64(obj, MURMUR_SEED)[0]
开发者ID:seperman,项目名称:deepdiff,代码行数:8,代码来源:deephash.py
示例7: gen_features
def gen_features(self,instance,logtime,D):
# generate features based on instance's attribute.
# For each key, we generate hash((bin(logtime-time[i]),i,lastY[i])) % D
for aggKey in self._keys:
key_for_feature = hash64(str(tuple([key+'_'+instance[key] for key in aggKey])))
for idx, content in enumerate(self._counter_map[key_for_feature]):
time, lastY = content
val = int(log((logtime - time).total_seconds() + 1.))
yield abs(hash(str(aggKey)+'_'+str(idx)+'_'+str((val,lastY)))) % D , 1.
开发者ID:cckk3333,项目名称:rt-pred,代码行数:9,代码来源:FTRLProximal.py
示例8: params_stand
def params_stand(self, infos, res_id):
"""params to stand DB"""
infos['issue_time'] = infos.pop("create_time")
infos['res_id'] = res_id
hstr = "%snaviappfeedback" % infos['res_id']
infos['mid'] = mmh3.hash64(hstr)[0] #unique
infos['intelligence_source'] = 31
infos['dispatch_flag'] = 0
infos['update_time'] = infos['commit_time']
开发者ID:GeWu,项目名称:tornado_api_example,代码行数:9,代码来源:handlers.py
示例9: murmurhash3_64
def murmurhash3_64(item, seed = 0):
"""
Murmurhash 3 for 64-bit integers (returns the first of a tuple of two)
"""
if type(item) is not str:
item = str(item)
if type(seed) is not int:
seed = int(seed)
return mmh3.hash64(item, seed = seed)
开发者ID:venantius,项目名称:droplet,代码行数:9,代码来源:hash_functions.py
示例10: hash64
def hash64(key, seed):
"""
Wrapper around mmh3.hash64 to get us single 64-bit value.
This also does the extra work of ensuring that we always treat the
returned values as big-endian unsigned long, like smhasher used to
do.
"""
hash_val = mmh3.hash64(key, seed)[0]
return struct.unpack('>Q', struct.pack('q', hash_val))[0]
开发者ID:Parsely,项目名称:python-pds,代码行数:10,代码来源:hashfunctions.py
示例11: go
def go(self):
hll = Hll(self.p)
hashvalues = []
for x in range (0,int(self.n)):
hashvalues.append(mmh3.hash64(str(x))[0])
debut = int(round (time.time() * 1000))
for i in range(0, int(self.n)):
hll.AddItem64(hashvalues[i])
fin = int(round (time.time() * 1000))
print hll.Count64()
print "temps = "+str(fin-debut)+"ms";
开发者ID:MarwanG,项目名称:Graal,代码行数:11,代码来源:bench64.py
示例12: find_match
def find_match(threadName, size, ohash):
global match
global solution
global found_by
while (match == 0):
rands = str(bytearray(os.urandom(size)))
h1 = mmh3.hash64(rands)
if (h1 == ohash):
solution = rands
match = 1
found_by = threadName
开发者ID:abemassry,项目名称:crazip,代码行数:11,代码来源:decompress_threads.py
示例13: sim_shi4_mm3
def sim_shi4_mm3(text):
# NB: It makes quite little sense to use both 64bit numbers to compare
# hashes as pairwise Hamming distance using high 64bit is highly correlated
# with the distance computed using low 64bit. It's actually expected, but
# it means, that summing these distances is not linear and should be avoided.
# -- https://gist.github.com/darkk/e2b2762c4fe053a3cf8a299520f0490e
i1, i2 = itertools.tee(WORD_RE.finditer(text))
for _ in xrange(3): # 4 words per shingle
next(i2, None)
mm = [mmh3.hash64(text[m1.start():m2.end()]) for m1, m2 in itertools.izip(i1, i2)]
return (simhash.compute([_[0] & 0xffffffffffffffff for _ in mm]),
simhash.compute([_[1] & 0xffffffffffffffff for _ in mm]))
开发者ID:TheTorProject,项目名称:ooni-pipeline,代码行数:12,代码来源:simhash_seomoz.py
示例14: tx_partition
def tx_partition(app, txid):
""" Return a blob hash for a given application and transaction ID.
Args:
app: A string specifying the application ID.
txid: An integer specifying the transaction ID.
Returns:
A bytearray that can be used as the transaction partition key.
"""
murmur_int = mmh3.hash64(app + str(txid))[0]
# Distribute the integer range evenly across the byte ordered token range.
return bytearray(struct.pack('<q', murmur_int))
开发者ID:cdonati,项目名称:appscale,代码行数:12,代码来源:utils.py
示例15: compressor_worker
def compressor_worker():
while not q.empty():
w_base, w_rel_base, w_f = q.get()
w_rel_base = '' if w_rel_base == '.' else w_rel_base
abs_path = os.path.join(w_base, w_f)
rel_path = os.path.join(w_rel_base, w_f)
extension = os.path.splitext(rel_path)[1][1:]
raw_filestring = open(abs_path).read()
compressed_filestring = lzo.compress(raw_filestring, options.compression)
len_raw = len(raw_filestring)
len_compressed = len(compressed_filestring)
compression_factor = (float(len_compressed) / len_raw) if len_raw else 0
compression_used = False
if compression_factor < options.cutoff and False:
compression_used = True
string_final = compressed_filestring if compression_used else raw_filestring
len_final = len(string_final)
adler32_final = lzo.adler32(string_final)
compressed_data_chunks.append({
'path': rel_path,
'path_mmh3': mmh3.hash64(rel_path)[0],
'adler32': adler32_final,
'size_before': len_raw,
'size_after': len_final,
'factor': compression_factor,
'compression': 1 if compression_used else 0,
'extension_str': extension,
'extension': extensions[extension] if extension in extensions else 0,
'data': string_final
})
if options.verbose:
print('\t'.join((
'Y' if compression_used else 'N',
extension,
'%.02f' % (compression_factor * 100.0),
str(len_raw / 1024),
str(len_final / 1024),
str(adler32_final),
rel_path
)))
q.task_done()
开发者ID:vinther,项目名称:rmit-rendering,代码行数:52,代码来源:pack_directory.py
示例16: hash_line
def hash_line(line, n, size, order=1):
line = line.strip().lower().split()
res = []
for w in ngrams(line, order):
h1, h2 = mmh3.hash64(w)
for s in range(n):
hashval = (h1 + s * h2) % size
res.append(int(hashval))
# res.append(hash("%s\t%s" % (s, w)) % size)
res = list(set(res))
res.sort()
# print ("%d => %d" %(len(line), len(res)))
return res
开发者ID:christianbuck,项目名称:CorpusMining,代码行数:13,代码来源:nnbloom.py
示例17: test_compute_librarylink_hash
def test_compute_librarylink_hash(inputdata, expected):
bits128 = mmh3.hash64(inputdata)
bits64 = bits128[0]
hexbits64 = hex(bits64)
hexbits128 = [ hex(x) for x in bits128 ]
octets = struct.pack('!q', bits64)
octets_raw = [ hex(c) for c in octets ]
octets_rawer = [ hex(c)[2:].zfill(2) for c in octets ]
encoded = base64.urlsafe_b64encode(octets).rstrip(b"=")
encoded_unsafe = base64.b64encode(octets).rstrip(b"=")
assert encoded == expected, (encoded, expected)
encoded = simple_hashstring(inputdata).encode('ascii')
assert encoded == expected, (encoded, expected)
开发者ID:anukat2015,项目名称:pybibframe,代码行数:13,代码来源:test_linkhash.py
示例18: map_sketch
def map_sketch(input, options):
'''
Returns a list of sketches (x,r, pos, d)
x = hashed sketch
r = ID of original sequence
pos = index of kmer starting position in original sequence
d = count of kmers extracted
'''
id_seq = input.split('\t')
# pp.pprint(type(id_seq[0]))
sketches = [(mmh3.hash64(i[0])[0], (int(id_seq[0]), i[1], len(id_seq[1]) - options.kmer + 1)) for i in gen_kmers(id_seq[1], options)]
# if (mmh3.hash64(i)[0] % options.mod == 0)
return sketches
开发者ID:paulkowa,项目名称:BiGPY,代码行数:13,代码来源:bigpy_sketch.py
示例19: __init__
def __init__(self, url):
self.url = urlnorm.norm(url)
self.tld = get_tld(url).encode('ascii','ignore')
self.crawl_data = {}
self.crawl_data['failure'] = False
self.crawl_data['url'] = self.url
self.crawl_data['id'] = mmh3.hash64(self.url)[0]
self.cassandra_cluster = ['127.0.0.1']
self.keyspace = 'crawlr'
self.cluster = Cluster(self.cassandra_cluster)
self.session = self.cluster.connect(self.keyspace)
self.session.row_factory = dict_factory
# Prepared Cassandra queries.
self.check_prepped_stmt = self.session.prepare(
"""
SELECT id FROM pages WHERE id = ?;
""")
self.add_fail_prepped_stmt = self.session.prepare(
"""
UPDATE failure_counts SET failures = failures + 1 WHERE id = ?;
""")
self.del_fail_prepped_stmt = self.session.prepare(
"""
DELETE failures FROM failure_counts where id = ?;
""")
self.add_crawl_prepped_stmt = self.session.prepare(
"""
INSERT INTO pages (
id,
url,
crawled_at,
failure,
title,
body,
internal_links,
outbound_links)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""")
开发者ID:AugustLONG,项目名称:crawlr,代码行数:41,代码来源:__init__.py
示例20: post
def post(self):
status, dinfo = self.storinfo()
if status == "error":
self.jsonError({"msg":status})
return
# insert into source_db
try:
#sourcedb_result = yield self.db_s.ns_map_infoplat.rf_naviapp_feedback.update(\
# {"ugc_id":dinfo['ugc_id']}, dinfo, upsert=True, w=True)
sourcedb_result = yield self.db_s.ns_map_infoplat.rf_naviapp_feedback.insert(dinfo)
except pymongo.errors.ConnectionFailure:
self.jsonError({"msg": "connect mongodb timeout"})
return
'''
dinfo['res_id'] = sourcedb_result.get("upserted", "ERROR")
if dinfo['res_id'] == "ERROR":
self.jsonError({"msg": "insert source_db ERROR, ugc_id is duplicated"})
return
'''
dinfo['res_id'] = sourcedb_result
hstr = "%snaviappfeedback" % dinfo['res_id']
dinfo['mid'] = mmh3.hash64(hstr)[0] #unique
dinfo['intelligence_source'] = 31
dinfo['dispatch_flag'] = 0
#form geom in mongodb
for geo_field in ("siwei_link1_list", "siwei_link2_list", "current_path_list", "current_track_list"):
dinfo[geo_field] = self.form_dict_geom(dinfo[geo_field])
# insert into stand_db
try:
standdb_result = yield self.db_r.info.inte_naviapp_feedback.insert(dinfo)
except pymongo.errors.PyMongoError as e:
self.jsonError({"msg": e})
return
self.jsonOk()
'''
开发者ID:GeWu,项目名称:tornado_api_example,代码行数:41,代码来源:api.py
注:本文中的mmh3.hash64函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论