55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
import hashlib
|
|
import random
|
|
import string
|
|
import time
|
|
|
|
def ds_gen(dsize):
|
|
"""Generate random strings dataset"""
|
|
dataset = []
|
|
for _ in range(dsize):
|
|
length = random.randint(500000, 1000000)
|
|
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
|
|
dataset.append(random_string)
|
|
return dataset
|
|
|
|
def hash_benchmark(dataset, hash_func):
|
|
"""Benchmark hashing function and detect collisions"""
|
|
start_time = time.time()
|
|
hashes = {}
|
|
collisions = []
|
|
|
|
for data in dataset:
|
|
hash_value = hash_func(data.encode()).hexdigest()
|
|
if hash_value in hashes:
|
|
collisions.append((data, hashes[hash_value]))
|
|
else:
|
|
hashes[hash_value] = data
|
|
|
|
end_time = time.time()
|
|
return end_time - start_time, len(collisions), collisions
|
|
|
|
def main():
|
|
dsize = int(input("Enter data size (50-100): "))
|
|
dsize = max(50, min(100, dsize)) # Ensure range 50-100
|
|
|
|
dataset = ds_gen(dsize)
|
|
|
|
hash_functions = [
|
|
(hashlib.md5, "MD5"),
|
|
(hashlib.sha1, "SHA-1"),
|
|
(hashlib.sha256, "SHA-256")
|
|
]
|
|
|
|
print(f"Testing with {len(dataset)} strings\n")
|
|
|
|
for hash_func, name in hash_functions:
|
|
time_taken, collision_count, collisions = hash_benchmark(dataset, hash_func)
|
|
print(f"{name}:")
|
|
print(f" Time: {time_taken:.6f} seconds")
|
|
print(f" Collisions: {collision_count}")
|
|
if collisions:
|
|
print(f" Collision pairs: {collisions[:3]}") # Show first 3
|
|
print()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|