本文整理汇总了Python中mmh3.hash函数的典型用法代码示例。如果您正苦于以下问题:Python hash函数的具体用法?Python hash怎么用?Python hash使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了hash函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _hash_bits
def _hash_bits(self, key):
# http://spyced.blogspot.com/2009
# /01/all-you-ever-wanted-to-know-about.html
hash1 = mmh3.hash(key, 0)
hash2 = mmh3.hash(key, hash1)
for i in range(self._hash_funcs):
yield abs((hash1 + i * hash2) % self._bits_per_slice)
开发者ID:jettify,项目名称:aioredis_bloom,代码行数:7,代码来源:bloom.py
示例2: data
def data(path, label_path=None):
fd = open(path)
fd.readline() # skip headers
hash_cols = [3,4,34,35,61,64,65,91,94,95]
npairs = len(hash_cols)
x = [0] * (146 + npairs*(npairs-1)/2)
if label_path:
label = open(label_path)
label.readline() # skip headers
for t, line in enumerate(fd):
# parse x
row = line.rstrip().split(',')
for m, feat in enumerate(row):
if m == 0:
ID = int(feat)
else:
# one-hot encode everything with hash trick
# categorical: one-hotted
# boolean: ONE-HOTTED
# numerical: ONE-HOTTED!
# note, the build in hash(), although fast is not stable,
# i.e., same value won't always have the same hash
# on different machines
x[m] = abs(mmh3.hash(str(m) + '_' + feat)) % D
for i in xrange(10):
for j in xrange(i+1,10):
m += 1
x[m] = abs(mmh3.hash(str(m)+'_'+row[hash_cols[i]]+"_x_"+row[hash_cols[j]])) % D
# parse y, if provided
if label_path:
# use float() to prevent future type casting, [1:] to ignore id
y = [float(y) for y in label.readline().split(',')[1:]]
yield (ID, x, y) if label_path else (ID, x)
开发者ID:timpalpant,项目名称:KaggleTSTextClassification,代码行数:33,代码来源:fast_solution.py
示例3: authenticate
def authenticate(self, name, password, certificates, certhash, certstrong, current=None):
with self.app.app_context():
if name == 'SuperUser':
return RET_FALLTHROUGH
user = User.query.filter_by(user_id=name).first()
if not user:
try:
uuid.UUID(name, version=4)
except ValueError:
return RET_DENIED
guest_user = GuestUser.query.get(name)
if guest_user:
if not guest_user.password == password or guest_user.banned:
return RET_DENIED
if guest_user.corporation:
self.app.logger.debug('Authenticating guest with: {} {} {}'.format(abs(mmh3.hash(guest_user.id.hex)), '[{}][GUEST] {}'.format(self.get_ticker(guest_user.corporation), guest_user.name), [u'Guest']))
return abs(mmh3.hash(guest_user.id.hex)), '[{}][GUEST] {}'.format(self.get_ticker(guest_user.corporation), guest_user.name), [u'Guest']
else:
self.app.logger.debug('Authenticating guest with: {} {} {}'.format(abs(mmh3.hash(guest_user.id.hex)), '[GUEST] {}'.format(guest_user.name), [u'Guest']))
return abs(mmh3.hash(guest_user.id.hex)), '[GUEST] {}'.format(guest_user.name), ['Guest']
else:
return RET_DENIED
if not user.mumble_password == password:
return RET_DENIED
self.app.logger.debug('Authenticating user with: {} {} {}'.format(mmh3.hash(user.user_id), '[{}] {}'.format(self.get_ticker(user.corporation_name), user.main_character), user.groups))
return mmh3.hash(user.user_id), '[{}] {}'.format(self.get_ticker(user.corporation_name), user.main_character), user.groups
开发者ID:J4LP,项目名称:mumble,代码行数:26,代码来源:m_api.py
示例4: hash
def hash(self, string):
hash_arr = []
hash1 = mmh3.hash(string, 0)
hash2 = mmh3.hash(string, hash1)
for i in range(self.k):
hash_arr.append(abs((hash1 + i * hash2) % self.m))
return hash_arr
开发者ID:dariajung,项目名称:bloom,代码行数:8,代码来源:bloom.py
示例5: get_hash
def get_hash(label,namespace,feature,stride,mask):
if namespace:
namespace_hash = mmh3.hash(namespace,0)
else:
namespace_hash = 0
if is_number(feature):
feature_hash = int(feature) + namespace_hash
else:
feature_hash = mmh3.hash(feature,namespace_hash)
feature_hash_oaa = feature_hash * stride
return (feature_hash_oaa + label - 1) & mask
开发者ID:Faye2014,项目名称:seldon-server,代码行数:11,代码来源:vw_hash.py
示例6: Hashmap_WordVector
def Hashmap_WordVector(self,nbits):
length=len(self.Words_Vector)
self.bl_bits=nbits
self.bloom_vector=self.bl_bits*bitarray('0')
for i in range(length):
self.hashmap1.append(mmh3.hash(self.Words_Vector[i]) % self.bl_bits )
self.hashmap2.append(mmh3.hash(self.Words_Vector[i],self.hashmap1[i]) % self.bl_bits )
self.hashmap3.append(mmh3.hash(self.Words_Vector[i],self.hashmap2[i]) % self.bl_bits )
self.bloom_vector[self.hashmap1[i]]=1
self.bloom_vector[self.hashmap2[i]]=1
self.bloom_vector[self.hashmap3[i]]=1
开发者ID:bajib,项目名称:Log_Clustering,代码行数:11,代码来源:sketching_files.py
示例7: parse_block
def parse_block(block):
index_block = []
for file_path in block:
file_path_hash = mmh3.hash(file_path)
with open(file_path, 'r') as input_file:
for line in input_file:
items = line.strip().split(' ')
index_block.append(
(mmh3.hash(items[0]),
[file_path_hash,
items[1]])
)
return index_block
开发者ID:bogdancarpusor,项目名称:html_indexer,代码行数:13,代码来源:bsbi.py
示例8: getHash
def getHash(word):
'''
This return the hash value and does the anding with 0xffffffffL on a 32 bit system
'''
curHash = mmh3.hash(word)
curHash = curHash & 0xffffffffL
return curHash
开发者ID:kenluck2001,项目名称:customHashing,代码行数:7,代码来源:work.py
示例9: add_document_indexes
def add_document_indexes(self, text, url, is_print=False):
# TODO: Maybe, it is good idea to change key from string to hash
self.documents.append(url)
doc_id = len(self.documents)-1
word_list = self._split_text(text.lower())
for word in word_list:
#"""
try:
word = word.encode('utf-8')
w_hash = mmh3.hash(word) % self.count_of_files
if is_print:
print word, w_hash
r_index = self.full_index[w_hash]
if r_index.has_key(word):
r_index[word]["docs"].append(doc_id)
else:
r_index[word] = {}
r_index[word]["docs"] = [doc_id]
if not r_index.has_key('encoding'):
r_index['encoding'] = self._encoding
except Exception as e:
print "EXCEPRION", word
traceback.print_exc()
开发者ID:pashna,项目名称:SearchIndexer,代码行数:28,代码来源:Indexer.py
示例10: readHash
def readHash(self):
hll = Hll(self.p)
x = sys.stdin.readline().rstrip('\n')
while x:
hll.AddItem(mmh3.hash(x))
x = sys.stdin.readline().rstrip('\n')
print hll.Count()
开发者ID:MarwanG,项目名称:Graal,代码行数:7,代码来源:client32.py
示例11: save_cursor
def save_cursor(self, cursor_data):
cursor_id = mmh3.hash(self.data_from)
res = self.es.index(index="lookup",
doc_type="data",
id=cursor_id,
body=cursor_data)
return
开发者ID:pangbo-1988,项目名称:data_diff,代码行数:7,代码来源:cursor.py
示例12: _bit_offsets
def _bit_offsets(self, value):
'''The bit offsets to set/check in this Bloom filter for a given value.
Instantiate a Bloom filter:
>>> dilberts = BloomFilter(
... num_values=100,
... false_positives=0.01,
... key='dilberts',
... )
Now let's look at a few examples:
>>> tuple(dilberts._bit_offsets('rajiv'))
(183, 319, 787, 585, 8, 471, 711)
>>> tuple(dilberts._bit_offsets('raj'))
(482, 875, 725, 667, 109, 714, 595)
>>> tuple(dilberts._bit_offsets('dan'))
(687, 925, 954, 707, 615, 914, 620)
Thus, if we want to insert the value 'rajiv' into our Bloom filter,
then we must set bits 183, 319, 787, 585, 8, 471, and 711 all to 1. If
any/all of them are already 1, no problems.
Similarly, if we want to check to see if the value 'rajiv' is in our
Bloom filter, then we must check to see if the bits 183, 319, 787, 585,
8, 471, and 711 are all set to 1. If even one of those bits is set to
0, then the value 'rajiv' must never have been inserted into our Bloom
filter. But if all of those bits are set to 1, then the value 'rajiv'
was *probably* inserted into our Bloom filter.
'''
encoded_value = self._encode(value)
for seed in range(self.num_hashes()):
yield mmh3.hash(encoded_value, seed=seed) % self.size()
开发者ID:brainix,项目名称:pottery,代码行数:34,代码来源:bloom.py
示例13: add
def add(self, string):
# Hash the string
hashlist = [mmh3.hash(string, seed=x) % 1000000 for x in xrange(Bloom.numberofhash)]
for x in hashlist:
Bloom.bit[x] = 1
开发者ID:dimitrisdan,项目名称:BigData,代码行数:7,代码来源:7-1.py
示例14: lookup
def lookup(self, string):
for seed in range(self.hash_count):
result = mmh3.hash(string, seed) % self.size
if self.bit_array[result] == 0:
#return "Nope"
return False
return True
开发者ID:datateller,项目名称:ywbserver-py3,代码行数:7,代码来源:bloomfilter.py
示例15: select_hash
def select_hash(hashkind, line):
"""Select the kind of hashing for the line.
:param hashkind: -- (str) The name of the hash
:param line: -- (str) The string to hash.
This function is a kind of hash selector which will use the hash passed
in argument to hash the string also passed in argument.
"""
if hashkind == "md5":
hashline = hashlib.md5(line).hexdigest()
elif hashkind == "sha1":
hashline = hashlib.sha1(line).hexdigest()
elif hashkind == "crc":
crc32 = crcmod.Crc(0x104c11db7, initCrc=0, xorOut=0xFFFFFFFF)
crc32.update(line)
hashline = crc32.hexdigest()
elif hashkind == "murmur":
hashline = mmh3.hash(line)
return str(hashline)
开发者ID:caar2000,项目名称:AIL-framework,代码行数:25,代码来源:lib_redis_insert.py
示例16: get_scatter_prop
def get_scatter_prop(element_list):
""" Gets the scatter property for an entity's key path.
This will return a property for only a small percentage of entities.
Args:
element_list: A list of entity_pb.Path_Element objects.
Returns:
An entity_pb.Property object or None.
"""
def id_from_element(element):
if element.has_name():
return element.name()
elif element.has_id():
return str(element.id())
else:
return ''
to_hash = ''.join([id_from_element(element) for element in element_list])
full_hash = mmh3.hash(to_hash)
hash_bytes = struct.pack('i', full_hash)[0:2]
hash_int = struct.unpack('H', hash_bytes)[0]
if hash_int >= dbconstants.SCATTER_PROPORTION:
return None
scatter_property = entity_pb.Property()
scatter_property.set_name('__scatter__')
scatter_property.set_meaning(entity_pb.Property.BYTESTRING)
scatter_property.set_multiple(False)
property_value = scatter_property.mutable_value()
property_value.set_stringvalue(hash_bytes)
return scatter_property
开发者ID:tmarballi,项目名称:appscale,代码行数:33,代码来源:utils.py
示例17: last_seen
def last_seen(self, item):
timestamps = []
for k, v in zip(self.sketch, range(self.hashes)):
for j in k:
search_key = mmh3.hash(item, v) % self.size
timestamps.append(k[search_key])
return max(timestamps)
开发者ID:grantholly,项目名称:cms_stream_processing,代码行数:7,代码来源:count_last_seen_sketch.py
示例18: alert_factory
def alert_factory(location=None,
bssid=None,
channel=None,
essid=None,
tx=None,
intent=None):
# all arguments are required
assert not any([
location is None,
bssid is None,
channel is None,
essid is None,
tx is None,
intent is None,
])
# return dict from arguments
_id = str(mmh3.hash(''.join([ bssid, str(channel), intent])))
return {
'id' : _id,
'location' : location,
'bssid' : bssid,
'channel' : channel,
'tx' : tx,
'essid' : essid,
'intent' : intent,
'timestamp' : time.time(),
}
开发者ID:s0lst1c3,项目名称:sentrygun,代码行数:31,代码来源:sentrygun.py
示例19: contingentParitiesFunction
def contingentParitiesFunction(pop, verbose=False):
assert(pop.shape[1] == order * height)
popMissteps = []
traceAndFitness = []
for c in xrange(pop.shape[0]):
output = 0
ctr = 0
length = pop.shape[1]
loci = np.arange(length)
missteps = []
trace = ""
while ctr < height:
rng.seed(abs(mmh3.hash(trace)))
acc = 0
trace += "|"
for i in xrange(order):
idx = rng.randint(length - (ctr * order + i)) + 1
swap = loci[-idx]
loci[-idx] = loci[ctr * order + i]
loci[ctr * order + i] = swap
trace += "%2d:%s|" % (swap + 1, int(pop[c, swap]))
acc += pop[c, swap]
output += acc % 2
if acc % 2 == 0:
missteps.append(ctr + 1)
ctr +=1
popMissteps.append(missteps)
traceAndFitness.append((trace, height - len(missteps)))
if verbose:
for t in sorted(traceAndFitness):
print "%s %s " % t
return np.array([height - len(missteps) for missteps in popMissteps]), popMissteps
开发者ID:burjorjee,项目名称:royal-roads,代码行数:34,代码来源:royalroads.py
示例20: makeHashFuncs
def makeHashFuncs(key, size, numHashes):
hashValue = []
for i in range(1, (numHashes+1)):
value = mmh3.hash(key,i) % size
#print value
hashValue.append(value)
return hashValue
开发者ID:xsswfm,项目名称:pypbf,代码行数:7,代码来源:pypbf.py
注:本文中的mmh3.hash函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论