Delete examples/ransomware/create_dataset.py
delete create_dataset.py
This commit is contained in:
@@ -1,78 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
def create_dataset(base_dir: str, number_of_files: int):
|
|
||||||
"""Create test files filled with specific content."""
|
|
||||||
try:
|
|
||||||
# Validate input
|
|
||||||
if number_of_files <= 0:
|
|
||||||
raise ValueError("Number of files must be positive")
|
|
||||||
|
|
||||||
# Create directory if it doesn't exist
|
|
||||||
os.makedirs(base_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Calculate required disk space (approximate)
|
|
||||||
required_space = number_of_files * 1024 * 1024 # 1MB per file
|
|
||||||
free_space = shutil.disk_usage(base_dir).free
|
|
||||||
|
|
||||||
if free_space < required_space:
|
|
||||||
raise ValueError(
|
|
||||||
f"Not enough disk space. Need {required_space / (1024**3):.2f} GB, "
|
|
||||||
f"but only {free_space / (1024**3):.2f} GB available"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate how many lines we need for ~1MB file
|
|
||||||
# Each line is about 6 bytes (5 chars + newline)
|
|
||||||
# 1MB = 1048576 bytes
|
|
||||||
# Actual calculation: 1048576 / 6 = 174762.67
|
|
||||||
lines_per_file = 174763
|
|
||||||
|
|
||||||
# Create files
|
|
||||||
for i in range(number_of_files):
|
|
||||||
file_path = os.path.join(base_dir, f"file{i:04d}.txt")
|
|
||||||
with open(file_path, 'w') as f:
|
|
||||||
for _ in range(lines_per_file):
|
|
||||||
f.write(f'file{i:04d}\n')
|
|
||||||
|
|
||||||
# Log every 100 files
|
|
||||||
if (i + 1) % 100 == 0:
|
|
||||||
logging.info(f"Created {i + 1} files...")
|
|
||||||
|
|
||||||
logging.info(f"Created dataset in {base_dir}")
|
|
||||||
logging.info(f"Total files created: {number_of_files}")
|
|
||||||
|
|
||||||
# Log total size of the dataset
|
|
||||||
total_size = sum(os.path.getsize(os.path.join(base_dir, f))
|
|
||||||
for f in os.listdir(base_dir))
|
|
||||||
logging.info(f"Total dataset size: {total_size / (1024*1024):.2f} MB")
|
|
||||||
logging.info(f"Average file size: {total_size / (number_of_files * 1024*1024):.2f} MB")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Failed to create dataset: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Create test dataset")
|
|
||||||
parser.add_argument("--base_dir", default="~/encryption_test",
|
|
||||||
help="Base directory for test files (default: ~/encryption_test)")
|
|
||||||
parser.add_argument("--number_of_files", type=int, required=True,
|
|
||||||
help="Number of files to create")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Expand user path (~/...)
|
|
||||||
base_dir = os.path.expanduser(args.base_dir)
|
|
||||||
|
|
||||||
create_dataset(base_dir, args.number_of_files)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user