55 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			55 lines
		
	
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import hashlib
 | 
						|
import random
 | 
						|
import string
 | 
						|
import time
 | 
						|
 | 
						|
def ds_gen(dsize):
 | 
						|
    """Generate random strings dataset"""
 | 
						|
    dataset = []
 | 
						|
    for _ in range(dsize):
 | 
						|
        length = random.randint(500000, 1000000)
 | 
						|
        random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
 | 
						|
        dataset.append(random_string)
 | 
						|
    return dataset
 | 
						|
 | 
						|
def hash_benchmark(dataset, hash_func):
 | 
						|
    """Benchmark hashing function and detect collisions"""
 | 
						|
    start_time = time.time()
 | 
						|
    hashes = {}
 | 
						|
    collisions = []
 | 
						|
    
 | 
						|
    for data in dataset:
 | 
						|
        hash_value = hash_func(data.encode()).hexdigest()
 | 
						|
        if hash_value in hashes:
 | 
						|
            collisions.append((data, hashes[hash_value]))
 | 
						|
        else:
 | 
						|
            hashes[hash_value] = data
 | 
						|
    
 | 
						|
    end_time = time.time()
 | 
						|
    return end_time - start_time, len(collisions), collisions
 | 
						|
 | 
						|
def main():
 | 
						|
    dsize = int(input("Enter data size (50-100): "))
 | 
						|
    dsize = max(50, min(100, dsize))  # Ensure range 50-100
 | 
						|
    
 | 
						|
    dataset = ds_gen(dsize)
 | 
						|
    
 | 
						|
    hash_functions = [
 | 
						|
        (hashlib.md5, "MD5"),
 | 
						|
        (hashlib.sha1, "SHA-1"), 
 | 
						|
        (hashlib.sha256, "SHA-256")
 | 
						|
    ]
 | 
						|
    
 | 
						|
    print(f"Testing with {len(dataset)} strings\n")
 | 
						|
    
 | 
						|
    for hash_func, name in hash_functions:
 | 
						|
        time_taken, collision_count, collisions = hash_benchmark(dataset, hash_func)
 | 
						|
        print(f"{name}:")
 | 
						|
        print(f"  Time: {time_taken:.6f} seconds")
 | 
						|
        print(f"  Collisions: {collision_count}")
 | 
						|
        if collisions:
 | 
						|
            print(f"  Collision pairs: {collisions[:3]}")  # Show first 3
 | 
						|
        print()
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |