-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhash_utils.py
46 lines (35 loc) · 1.3 KB
/
hash_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import hashlib
import string
# Remove umlauts will hopefully avoid to generate valid words ;)
ALPHABET = (
'-._~' # Allowed additional characters RFC 3986
'bcdfghjklmnpqrstvwxyz' # lower case letters without aeiou
'BCDFGHJKLMNPQRSTVWXYZ' # upper case letters without AEIOU
) + string.digits
def url_safe_encode(data, alphabet=ALPHABET):
"""
Encode bytes into a URL safe string.
Note:
Use a URL safe alphabet (see RFC 3986) without umlauts
"""
assert isinstance(data, bytes)
len_alphabet = len(alphabet)
return ''.join(alphabet[char % len_alphabet] for char in data)
def url_safe_hash(data, max_size=None, hasher_name='sha3_512', encoding='utf-8'):
"""
Generate a URL safe hash with `max_size` from given string/bytes.
>>> url_safe_hash('foo', max_size=16)
'tMXtn6KpcjzTdzTk'
"""
if isinstance(data, str):
data = bytes(data, encoding=encoding)
# Generate hash digest:
hasher = hashlib.new(hasher_name)
hasher.update(data)
hash_digest = hasher.digest()
# Convert hash digest bytes into URL safe string:
safe_hash = url_safe_encode(hash_digest)
if max_size:
assert len(safe_hash) >= max_size, 'Hash digest too short for requested max size!'
safe_hash = safe_hash[:max_size]
return safe_hash