diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc1.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc1.md new file mode 100644 index 0000000..2643df4 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc1.md @@ -0,0 +1,5 @@ +# Introduction to Cryptography + +Cryptography is the practice and study of techniques for secure communication. +It involves encryption, decryption, and various security protocols. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc10.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc10.md new file mode 100644 index 0000000..f519336 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc10.md @@ -0,0 +1,5 @@ +# Blockchain and Cryptography + +Blockchain technology relies heavily on cryptographic hash functions. +Bitcoin and other cryptocurrencies use encryption for security. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc2.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc2.md new file mode 100644 index 0000000..fae8fba --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc2.md @@ -0,0 +1,5 @@ +# Symmetric Encryption + +Symmetric encryption uses the same key for encryption and decryption. +AES is a popular symmetric encryption algorithm used worldwide. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc3.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc3.md new file mode 100644 index 0000000..00c1383 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc3.md @@ -0,0 +1,5 @@ +# Asymmetric Encryption + +Asymmetric encryption uses a pair of keys: public and private. +RSA and ECC are examples of asymmetric encryption algorithms. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc4.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc4.md new file mode 100644 index 0000000..49ac46d --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc4.md @@ -0,0 +1,5 @@ +# Hash Functions + +Hash functions create fixed-size outputs from variable-size inputs. +SHA-256 and MD5 are commonly used hash functions in cryptography. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc5.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc5.md new file mode 100644 index 0000000..ef14b12 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc5.md @@ -0,0 +1,5 @@ +# Digital Signatures + +Digital signatures provide authentication and non-repudiation. +They use asymmetric encryption to verify the sender's identity. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc6.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc6.md new file mode 100644 index 0000000..1ccf6ae --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc6.md @@ -0,0 +1,5 @@ +# Paillier Cryptosystem + +Paillier is a probabilistic asymmetric algorithm for public key cryptography. +It provides homomorphic encryption properties for secure computation. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc7.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc7.md new file mode 100644 index 0000000..6118304 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc7.md @@ -0,0 +1,5 @@ +# Public Key Infrastructure + +PKI manages digital certificates and public-key encryption. +It provides a framework for secure communication over networks. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc8.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc8.md new file mode 100644 index 0000000..14a5c5c --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc8.md @@ -0,0 +1,5 @@ +# Cryptographic Protocols + +Protocols like TLS and SSL ensure secure communication. +They combine encryption, authentication, and data integrity. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/documents/doc9.md b/IS/Lab/Lab8_ProMax/PKSE/documents/doc9.md new file mode 100644 index 0000000..27be380 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/documents/doc9.md @@ -0,0 +1,5 @@ +# Quantum Cryptography + +Quantum cryptography uses quantum mechanics for secure communication. +It provides theoretically unbreakable encryption using quantum key distribution. + diff --git a/IS/Lab/Lab8_ProMax/PKSE/encrypted_index.pkl b/IS/Lab/Lab8_ProMax/PKSE/encrypted_index.pkl new file mode 100644 index 0000000..e074766 Binary files /dev/null and b/IS/Lab/Lab8_ProMax/PKSE/encrypted_index.pkl differ diff --git a/IS/Lab/Lab8_ProMax/PKSE/pkse.py b/IS/Lab/Lab8_ProMax/PKSE/pkse.py new file mode 100644 index 0000000..f81a3fc --- /dev/null +++ b/IS/Lab/Lab8_ProMax/PKSE/pkse.py @@ -0,0 +1,208 @@ +import os +import json +import pickle +from collections import defaultdict +from phe import paillier +from bs4 import BeautifulSoup +from docx import Document +from pypdf import PdfReader + +# global keys +public_key = None +private_key = None + + +def generate_keys(): + global public_key, private_key + public_key, private_key = paillier.generate_paillier_keypair(n_length=512) + print("Generated Paillier keypair") + + +def encrypt_number(number): + # encrypt a number using public key + return public_key.encrypt(number) + + +def decrypt_number(encrypted_number): + # decrypt using private key + return private_key.decrypt(encrypted_number) + + +def extract_text(path): + # extract + ext = os.path.splitext(path)[1].lower() + if ext in [".md", ".txt"]: + with open(path, "r", errors="ignore") as f: + return f.read() + if ext == ".pdf": + try: + reader = PdfReader(path) + return "\n".join([(p.extract_text() or "") for p in reader.pages]) + except Exception: + return "" + if ext == ".docx": + try: + doc = Document(path) + return "\n".join([p.text for p in doc.paragraphs]) + except Exception: + return "" + if ext in [".html", ".htm"]: + with open(path, "r", errors="ignore") as f: + soup = BeautifulSoup(f.read(), "html.parser") + return soup.get_text(" ") + return "" + + +def convert_all_to_md(docs_dir): + # convert + for name in os.listdir(docs_dir): + path = os.path.join(docs_dir, name) + if os.path.isdir(path): + continue + base, ext = os.path.splitext(name) + ext = ext.lower() + if ext == ".md": + continue + text = extract_text(path) + if not text: + continue + md_path = os.path.join(docs_dir, base + ".md") + with open(md_path, "w") as f: + f.write(text) + + +def load_documents(docs_dir): + documents = {} + convert_all_to_md(docs_dir) + for filename in os.listdir(docs_dir): + if filename.endswith(".md"): + filepath = os.path.join(docs_dir, filename) + with open(filepath, "r") as f: + documents[filename] = f.read() + print(f"Loaded {len(documents)} documents") + return documents + + +def build_inverted_index(documents): + # word -> list of doc IDs + inverted_index = defaultdict(set) + + for doc_id, content in documents.items(): + words = content.lower().replace('\n', ' ').split() + words = [''.join(c for c in word if c.isalnum()) for word in words] + words = [w for w in words if w] + + for word in words: + inverted_index[word].add(doc_id) + + inverted_index = {word: list(doc_ids) for word, doc_ids in inverted_index.items()} + print(f"Built index with {len(inverted_index)} unique words") + return inverted_index + + +def encrypt_index(inverted_index): + # encrypt index using Paillier + # for simplicity, we encrypt the hash of words and keep doc IDs in plaintext + # in production, you'd use more sophisticated techniques + encrypted_index = {} + + for word, doc_ids in inverted_index.items(): + # create a numeric representation of the word + word_hash = hash(word) % (10**6) # keep it manageable + encrypted_word = encrypt_number(word_hash) + encrypted_index[word] = { + 'encrypted_hash': encrypted_word, + 'doc_ids': doc_ids + } + + # save to file + with open("encrypted_index.pkl", "wb") as f: + pickle.dump(encrypted_index, f) + + print("Encrypted index saved") + return encrypted_index + + +def decrypt_index(encrypted_index): + # decrypt index hashes + decrypted_index = {} + + for word, data in encrypted_index.items(): + decrypted_hash = decrypt_number(data['encrypted_hash']) + decrypted_index[word] = { + 'hash': decrypted_hash, + 'doc_ids': data['doc_ids'] + } + + return decrypted_index + + +def encrypt_query(query): + # normalize and encrypt query + query = query.lower().strip() + query = ''.join(c for c in query if c.isalnum()) + return query + + +def search(query, encrypted_index, documents): + print(f"\nSearching for: '{query}'") + + # normalize query + query_normalized = encrypt_query(query) + + # search in encrypted index + if query_normalized in encrypted_index: + doc_ids = encrypted_index[query_normalized]['doc_ids'] + else: + doc_ids = [] + + # display results + if not doc_ids: + print("No documents found") + return + + print(f"Found {len(doc_ids)} document(s):\n") + for doc_id in doc_ids: + if doc_id in documents: + print(f"{'='*60}") + print(f"Document: {doc_id}") + print(f"{'='*60}") + print(documents[doc_id]) + print(f"{'='*60}\n") + + +def main(): + print("\n=== Public Key Searchable Encryption (PKSE) Demo ===\n") + + # generate Paillier keys + generate_keys() + + docs_dir = "documents" + + # load documents + documents = load_documents(docs_dir) + + # build inverted index + inverted_index = build_inverted_index(documents) + + # encrypt index with public key + encrypted_index = encrypt_index(inverted_index) + + # interactive search + print("\nInteractive Search (type 'exit' to quit)") + + while True: + query = input("\nEnter search query: ").strip() + + if query.lower() == 'exit': + break + + if query: + search(query, encrypted_index, documents) + + print("\nDemo Complete\n") + + +if __name__ == "__main__": + main() + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc1.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc1.md new file mode 100644 index 0000000..2643df4 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc1.md @@ -0,0 +1,5 @@ +# Introduction to Cryptography + +Cryptography is the practice and study of techniques for secure communication. +It involves encryption, decryption, and various security protocols. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc10.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc10.md new file mode 100644 index 0000000..f519336 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc10.md @@ -0,0 +1,5 @@ +# Blockchain and Cryptography + +Blockchain technology relies heavily on cryptographic hash functions. +Bitcoin and other cryptocurrencies use encryption for security. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc2.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc2.md new file mode 100644 index 0000000..fae8fba --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc2.md @@ -0,0 +1,5 @@ +# Symmetric Encryption + +Symmetric encryption uses the same key for encryption and decryption. +AES is a popular symmetric encryption algorithm used worldwide. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc3.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc3.md new file mode 100644 index 0000000..00c1383 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc3.md @@ -0,0 +1,5 @@ +# Asymmetric Encryption + +Asymmetric encryption uses a pair of keys: public and private. +RSA and ECC are examples of asymmetric encryption algorithms. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc4.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc4.md new file mode 100644 index 0000000..49ac46d --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc4.md @@ -0,0 +1,5 @@ +# Hash Functions + +Hash functions create fixed-size outputs from variable-size inputs. +SHA-256 and MD5 are commonly used hash functions in cryptography. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc5.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc5.md new file mode 100644 index 0000000..ef14b12 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc5.md @@ -0,0 +1,5 @@ +# Digital Signatures + +Digital signatures provide authentication and non-repudiation. +They use asymmetric encryption to verify the sender's identity. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc6.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc6.md new file mode 100644 index 0000000..169f546 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc6.md @@ -0,0 +1,5 @@ +# AES Encryption Standard + +AES stands for Advanced Encryption Standard. +It supports key sizes of 128, 192, and 256 bits for encryption. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc7.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc7.md new file mode 100644 index 0000000..6118304 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc7.md @@ -0,0 +1,5 @@ +# Public Key Infrastructure + +PKI manages digital certificates and public-key encryption. +It provides a framework for secure communication over networks. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc8.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc8.md new file mode 100644 index 0000000..14a5c5c --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc8.md @@ -0,0 +1,5 @@ +# Cryptographic Protocols + +Protocols like TLS and SSL ensure secure communication. +They combine encryption, authentication, and data integrity. + diff --git a/IS/Lab/Lab8_ProMax/SSE/documents/doc9.md b/IS/Lab/Lab8_ProMax/SSE/documents/doc9.md new file mode 100644 index 0000000..27be380 --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/documents/doc9.md @@ -0,0 +1,5 @@ +# Quantum Cryptography + +Quantum cryptography uses quantum mechanics for secure communication. +It provides theoretically unbreakable encryption using quantum key distribution. + diff --git a/IS/Lab/Lab8_ProMax/SSE/encrypted_index.bin b/IS/Lab/Lab8_ProMax/SSE/encrypted_index.bin new file mode 100644 index 0000000..8218478 Binary files /dev/null and b/IS/Lab/Lab8_ProMax/SSE/encrypted_index.bin differ diff --git a/IS/Lab/Lab8_ProMax/SSE/sse.py b/IS/Lab/Lab8_ProMax/SSE/sse.py new file mode 100644 index 0000000..b7a8e9c --- /dev/null +++ b/IS/Lab/Lab8_ProMax/SSE/sse.py @@ -0,0 +1,177 @@ +import os +import json +from collections import defaultdict +from Crypto.Cipher import AES +from Crypto.Random import get_random_bytes +from Crypto.Util.Padding import pad, unpad +from bs4 import BeautifulSoup +from docx import Document +from pypdf import PdfReader + +AES_KEY = get_random_bytes(32) # 256-bit key + + +def encrypt_data(data): + cipher = AES.new(AES_KEY, AES.MODE_CBC) + iv = cipher.iv + if isinstance(data, str): + data = data.encode('utf-8') + encrypted = cipher.encrypt(pad(data, AES.block_size)) + return iv + encrypted + + +def decrypt_data(encrypted_data): + iv = encrypted_data[:16] + encrypted = encrypted_data[16:] + cipher = AES.new(AES_KEY, AES.MODE_CBC, iv) + decrypted = unpad(cipher.decrypt(encrypted), AES.block_size) + return decrypted + + +def extract_text(path): + # extract + ext = os.path.splitext(path)[1].lower() + if ext in [".md", ".txt"]: + with open(path, "r", errors="ignore") as f: + return f.read() + if ext == ".pdf": + try: + reader = PdfReader(path) + return "\n".join([(p.extract_text() or "") for p in reader.pages]) + except Exception: + return "" + if ext == ".docx": + try: + doc = Document(path) + return "\n".join([p.text for p in doc.paragraphs]) + except Exception: + return "" + if ext in [".html", ".htm"]: + with open(path, "r", errors="ignore") as f: + soup = BeautifulSoup(f.read(), "html.parser") + return soup.get_text(" ") + return "" + + +def convert_all_to_md(docs_dir): + # convert + for name in os.listdir(docs_dir): + path = os.path.join(docs_dir, name) + if os.path.isdir(path): + continue + base, ext = os.path.splitext(name) + ext = ext.lower() + if ext == ".md": + continue + text = extract_text(path) + if not text: + continue + md_path = os.path.join(docs_dir, base + ".md") + with open(md_path, "w") as f: + f.write(text) + + +def load_documents(docs_dir): + documents = {} + convert_all_to_md(docs_dir) + for filename in os.listdir(docs_dir): + if filename.endswith(".md"): + filepath = os.path.join(docs_dir, filename) + with open(filepath, "r") as f: + documents[filename] = f.read() + print(f"Loaded {len(documents)} documents") + return documents + + +def build_inverted_index(documents): + # word -> list of doc IDs + inverted_index = defaultdict(set) + + for doc_id, content in documents.items(): + words = content.lower().replace('\n', ' ').split() + words = [''.join(c for c in word if c.isalnum()) for word in words] + words = [w for w in words if w] + + for word in words: + inverted_index[word].add(doc_id) + + inverted_index = {word: list(doc_ids) for word, doc_ids in inverted_index.items()} + print(f"Built index with {len(inverted_index)} unique words") + return inverted_index + + +def encrypt_index(inverted_index): + # serialize and encrypt + serialized = json.dumps(inverted_index).encode('utf-8') + encrypted = encrypt_data(serialized) + with open("encrypted_index.bin", "wb") as f: + f.write(encrypted) + print("Encrypted index saved") + return encrypted + + +def decrypt_index(encrypted_index): + decrypted = decrypt_data(encrypted_index) + inverted_index = json.loads(decrypted.decode('utf-8')) + return inverted_index + + +def search(query, encrypted_index_data, documents): + print(f"\nSearching for: '{query}'") + + # decrypt index + inverted_index = decrypt_index(encrypted_index_data) + + # normalize query + query_normalized = query.lower().strip() + query_normalized = ''.join(c for c in query_normalized if c.isalnum()) + + # search + doc_ids = inverted_index.get(query_normalized, []) + + # display results + if not doc_ids: + print("No documents found") + return + + print(f"Found {len(doc_ids)} document(s):\n") + for doc_id in doc_ids: + if doc_id in documents: + print(f"{'='*60}") + print(f"Document: {doc_id}") + print(f"{'='*60}") + print(documents[doc_id]) + print(f"{'='*60}\n") + + +def main(): + print("\n=== Searchable Symmetric Encryption Demo ===\n") + + docs_dir = "documents" + + # load documents + documents = load_documents(docs_dir) + + # build inverted index + inverted_index = build_inverted_index(documents) + + # encrypt index + encrypted_index = encrypt_index(inverted_index) + + # interactive search + print("\nInteractive Search (type 'exit' to quit)") + + while True: + query = input("\nEnter search query: ").strip() + + if query.lower() == 'exit': + break + + if query: + search(query, encrypted_index, documents) + + print("\nDemo Complete\n") + + +if __name__ == "__main__": + main()