Files
zvml-python-sdk/examples/ransomware/create_dataset.py
T
Kosta Mushkin ba16ef9a08 initial commit
2025-04-12 14:33:32 -04:00

79 lines
2.8 KiB
Python

#!/usr/bin/env python3
import os
import argparse
import logging
import shutil
def create_dataset(base_dir: str, number_of_files: int):
"""Create test files filled with specific content."""
try:
# Validate input
if number_of_files <= 0:
raise ValueError("Number of files must be positive")
# Create directory if it doesn't exist
os.makedirs(base_dir, exist_ok=True)
# Calculate required disk space (approximate)
required_space = number_of_files * 1024 * 1024 # 1MB per file
free_space = shutil.disk_usage(base_dir).free
if free_space < required_space:
raise ValueError(
f"Not enough disk space. Need {required_space / (1024**3):.2f} GB, "
f"but only {free_space / (1024**3):.2f} GB available"
)
# Calculate how many lines we need for ~1MB file
# Each line is about 6 bytes (5 chars + newline)
# 1MB = 1048576 bytes
# Actual calculation: 1048576 / 6 = 174762.67
lines_per_file = 174763
# Create files
for i in range(number_of_files):
file_path = os.path.join(base_dir, f"file{i:04d}.txt")
with open(file_path, 'w') as f:
for _ in range(lines_per_file):
f.write(f'file{i:04d}\n')
# Log every 100 files
if (i + 1) % 100 == 0:
logging.info(f"Created {i + 1} files...")
logging.info(f"Created dataset in {base_dir}")
logging.info(f"Total files created: {number_of_files}")
# Log total size of the dataset
total_size = sum(os.path.getsize(os.path.join(base_dir, f))
for f in os.listdir(base_dir))
logging.info(f"Total dataset size: {total_size / (1024*1024):.2f} MB")
logging.info(f"Average file size: {total_size / (number_of_files * 1024*1024):.2f} MB")
except Exception as e:
logging.error(f"Failed to create dataset: {str(e)}")
raise
def main():
parser = argparse.ArgumentParser(description="Create test dataset")
parser.add_argument("--base_dir", default="~/encryption_test",
help="Base directory for test files (default: ~/encryption_test)")
parser.add_argument("--number_of_files", type=int, required=True,
help="Number of files to create")
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Expand user path (~/...)
base_dir = os.path.expanduser(args.base_dir)
create_dataset(base_dir, args.number_of_files)
if __name__ == "__main__":
main()