130 lines
3.5 KiB
Python
130 lines
3.5 KiB
Python
import os
|
|
import json
|
|
from collections import defaultdict
|
|
from Crypto.Cipher import AES
|
|
from Crypto.Random import get_random_bytes
|
|
from Crypto.Util.Padding import pad, unpad
|
|
|
|
AES_KEY = get_random_bytes(32) # 256-bit key
|
|
|
|
|
|
def encrypt_data(data):
|
|
cipher = AES.new(AES_KEY, AES.MODE_CBC)
|
|
iv = cipher.iv
|
|
if isinstance(data, str):
|
|
data = data.encode('utf-8')
|
|
encrypted = cipher.encrypt(pad(data, AES.block_size))
|
|
return iv + encrypted
|
|
|
|
|
|
def decrypt_data(encrypted_data):
|
|
iv = encrypted_data[:16]
|
|
encrypted = encrypted_data[16:]
|
|
cipher = AES.new(AES_KEY, AES.MODE_CBC, iv)
|
|
decrypted = unpad(cipher.decrypt(encrypted), AES.block_size)
|
|
return decrypted
|
|
|
|
|
|
def load_documents(docs_dir):
|
|
documents = {}
|
|
for filename in os.listdir(docs_dir):
|
|
if filename.endswith(".md"):
|
|
filepath = os.path.join(docs_dir, filename)
|
|
with open(filepath, "r") as f:
|
|
documents[filename] = f.read()
|
|
print(f"Loaded {len(documents)} documents")
|
|
return documents
|
|
|
|
|
|
def build_inverted_index(documents):
|
|
# word -> list of doc IDs
|
|
inverted_index = defaultdict(set)
|
|
|
|
for doc_id, content in documents.items():
|
|
words = content.lower().replace('\n', ' ').split()
|
|
words = [''.join(c for c in word if c.isalnum()) for word in words]
|
|
words = [w for w in words if w]
|
|
|
|
for word in words:
|
|
inverted_index[word].add(doc_id)
|
|
|
|
inverted_index = {word: list(doc_ids) for word, doc_ids in inverted_index.items()}
|
|
print(f"Built index with {len(inverted_index)} unique words")
|
|
return inverted_index
|
|
|
|
|
|
def encrypt_index(inverted_index):
|
|
# serialize and encrypt
|
|
serialized = json.dumps(inverted_index).encode('utf-8')
|
|
encrypted = encrypt_data(serialized)
|
|
with open("encrypted_index.bin", "wb") as f:
|
|
f.write(encrypted)
|
|
print("Encrypted index saved")
|
|
return encrypted
|
|
|
|
|
|
def decrypt_index(encrypted_index):
|
|
decrypted = decrypt_data(encrypted_index)
|
|
inverted_index = json.loads(decrypted.decode('utf-8'))
|
|
return inverted_index
|
|
|
|
|
|
def search(query, encrypted_index_data, documents):
|
|
print(f"\nSearching for: '{query}'")
|
|
|
|
# decrypt index
|
|
inverted_index = decrypt_index(encrypted_index_data)
|
|
|
|
# normalize query
|
|
query_normalized = query.lower().strip()
|
|
query_normalized = ''.join(c for c in query_normalized if c.isalnum())
|
|
|
|
# search
|
|
doc_ids = inverted_index.get(query_normalized, [])
|
|
|
|
# display results
|
|
if not doc_ids:
|
|
print("No documents found")
|
|
return
|
|
|
|
print(f"Found {len(doc_ids)} document(s):\n")
|
|
for doc_id in doc_ids:
|
|
if doc_id in documents:
|
|
print(f"{'='*60}")
|
|
print(f"Document: {doc_id}")
|
|
print(f"{'='*60}")
|
|
print(documents[doc_id])
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
def main():
|
|
print("\n=== Searchable Symmetric Encryption Demo ===\n")
|
|
|
|
docs_dir = "documents"
|
|
|
|
# load documents
|
|
documents = load_documents(docs_dir)
|
|
|
|
# build inverted index
|
|
inverted_index = build_inverted_index(documents)
|
|
|
|
# encrypt index
|
|
encrypted_index = encrypt_index(inverted_index)
|
|
|
|
# interactive search
|
|
print("\nInteractive Search (type 'exit' to quit)")
|
|
|
|
while True:
|
|
query = input("\nEnter search query: ").strip()
|
|
|
|
if query.lower() == 'exit':
|
|
break
|
|
|
|
if query:
|
|
search(query, encrypted_index, documents)
|
|
|
|
print("\nDemo Complete\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|