130 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			130 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| import json
 | |
| from collections import defaultdict
 | |
| from Crypto.Cipher import AES
 | |
| from Crypto.Random import get_random_bytes
 | |
| from Crypto.Util.Padding import pad, unpad
 | |
| 
 | |
| AES_KEY = get_random_bytes(32)  # 256-bit key
 | |
| 
 | |
| 
 | |
| def encrypt_data(data):
 | |
|     cipher = AES.new(AES_KEY, AES.MODE_CBC)
 | |
|     iv = cipher.iv
 | |
|     if isinstance(data, str):
 | |
|         data = data.encode('utf-8')
 | |
|     encrypted = cipher.encrypt(pad(data, AES.block_size))
 | |
|     return iv + encrypted
 | |
| 
 | |
| 
 | |
| def decrypt_data(encrypted_data):
 | |
|     iv = encrypted_data[:16]
 | |
|     encrypted = encrypted_data[16:]
 | |
|     cipher = AES.new(AES_KEY, AES.MODE_CBC, iv)
 | |
|     decrypted = unpad(cipher.decrypt(encrypted), AES.block_size)
 | |
|     return decrypted
 | |
| 
 | |
| 
 | |
| def load_documents(docs_dir):
 | |
|     documents = {}
 | |
|     for filename in os.listdir(docs_dir):
 | |
|         if filename.endswith(".md"):
 | |
|             filepath = os.path.join(docs_dir, filename)
 | |
|             with open(filepath, "r") as f:
 | |
|                 documents[filename] = f.read()
 | |
|     print(f"Loaded {len(documents)} documents")
 | |
|     return documents
 | |
| 
 | |
| 
 | |
| def build_inverted_index(documents):
 | |
|     # word -> list of doc IDs
 | |
|     inverted_index = defaultdict(set)
 | |
|     
 | |
|     for doc_id, content in documents.items():
 | |
|         words = content.lower().replace('\n', ' ').split()
 | |
|         words = [''.join(c for c in word if c.isalnum()) for word in words]
 | |
|         words = [w for w in words if w]
 | |
|         
 | |
|         for word in words:
 | |
|             inverted_index[word].add(doc_id)
 | |
|     
 | |
|     inverted_index = {word: list(doc_ids) for word, doc_ids in inverted_index.items()}
 | |
|     print(f"Built index with {len(inverted_index)} unique words")
 | |
|     return inverted_index
 | |
| 
 | |
| 
 | |
| def encrypt_index(inverted_index):
 | |
|     # serialize and encrypt
 | |
|     serialized = json.dumps(inverted_index).encode('utf-8')
 | |
|     encrypted = encrypt_data(serialized)
 | |
|     with open("encrypted_index.bin", "wb") as f:
 | |
|         f.write(encrypted)
 | |
|     print("Encrypted index saved")
 | |
|     return encrypted
 | |
| 
 | |
| 
 | |
| def decrypt_index(encrypted_index):
 | |
|     decrypted = decrypt_data(encrypted_index)
 | |
|     inverted_index = json.loads(decrypted.decode('utf-8'))
 | |
|     return inverted_index
 | |
| 
 | |
| 
 | |
| def search(query, encrypted_index_data, documents):
 | |
|     print(f"\nSearching for: '{query}'")
 | |
|     
 | |
|     # decrypt index
 | |
|     inverted_index = decrypt_index(encrypted_index_data)
 | |
|     
 | |
|     # normalize query
 | |
|     query_normalized = query.lower().strip()
 | |
|     query_normalized = ''.join(c for c in query_normalized if c.isalnum())
 | |
|     
 | |
|     # search
 | |
|     doc_ids = inverted_index.get(query_normalized, [])
 | |
|     
 | |
|     # display results
 | |
|     if not doc_ids:
 | |
|         print("No documents found")
 | |
|         return
 | |
|     
 | |
|     print(f"Found {len(doc_ids)} document(s):\n")
 | |
|     for doc_id in doc_ids:
 | |
|         if doc_id in documents:
 | |
|             print(f"{'='*60}")
 | |
|             print(f"Document: {doc_id}")
 | |
|             print(f"{'='*60}")
 | |
|             print(documents[doc_id])
 | |
|             print(f"{'='*60}\n")
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     print("\n=== Searchable Symmetric Encryption Demo ===\n")
 | |
|     
 | |
|     docs_dir = "documents"
 | |
|     
 | |
|     # load documents
 | |
|     documents = load_documents(docs_dir)
 | |
|     
 | |
|     # build inverted index
 | |
|     inverted_index = build_inverted_index(documents)
 | |
|     
 | |
|     # encrypt index
 | |
|     encrypted_index = encrypt_index(inverted_index)
 | |
|     
 | |
|     # interactive search
 | |
|     print("\nInteractive Search (type 'exit' to quit)")
 | |
|     
 | |
|     while True:
 | |
|         query = input("\nEnter search query: ").strip()
 | |
|         
 | |
|         if query.lower() == 'exit':
 | |
|             break
 | |
|         
 | |
|         if query:
 | |
|             search(query, encrypted_index, documents)
 | |
|     
 | |
|     print("\nDemo Complete\n")
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |