initial commit
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
import shutil
|
||||
|
||||
def create_dataset(base_dir: str, number_of_files: int):
|
||||
"""Create test files filled with specific content."""
|
||||
try:
|
||||
# Validate input
|
||||
if number_of_files <= 0:
|
||||
raise ValueError("Number of files must be positive")
|
||||
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
# Calculate required disk space (approximate)
|
||||
required_space = number_of_files * 1024 * 1024 # 1MB per file
|
||||
free_space = shutil.disk_usage(base_dir).free
|
||||
|
||||
if free_space < required_space:
|
||||
raise ValueError(
|
||||
f"Not enough disk space. Need {required_space / (1024**3):.2f} GB, "
|
||||
f"but only {free_space / (1024**3):.2f} GB available"
|
||||
)
|
||||
|
||||
# Calculate how many lines we need for ~1MB file
|
||||
# Each line is about 6 bytes (5 chars + newline)
|
||||
# 1MB = 1048576 bytes
|
||||
# Actual calculation: 1048576 / 6 = 174762.67
|
||||
lines_per_file = 174763
|
||||
|
||||
# Create files
|
||||
for i in range(number_of_files):
|
||||
file_path = os.path.join(base_dir, f"file{i:04d}.txt")
|
||||
with open(file_path, 'w') as f:
|
||||
for _ in range(lines_per_file):
|
||||
f.write(f'file{i:04d}\n')
|
||||
|
||||
# Log every 100 files
|
||||
if (i + 1) % 100 == 0:
|
||||
logging.info(f"Created {i + 1} files...")
|
||||
|
||||
logging.info(f"Created dataset in {base_dir}")
|
||||
logging.info(f"Total files created: {number_of_files}")
|
||||
|
||||
# Log total size of the dataset
|
||||
total_size = sum(os.path.getsize(os.path.join(base_dir, f))
|
||||
for f in os.listdir(base_dir))
|
||||
logging.info(f"Total dataset size: {total_size / (1024*1024):.2f} MB")
|
||||
logging.info(f"Average file size: {total_size / (number_of_files * 1024*1024):.2f} MB")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to create dataset: {str(e)}")
|
||||
raise
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Create test dataset")
|
||||
parser.add_argument("--base_dir", default="~/encryption_test",
|
||||
help="Base directory for test files (default: ~/encryption_test)")
|
||||
parser.add_argument("--number_of_files", type=int, required=True,
|
||||
help="Number of files to create")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Expand user path (~/...)
|
||||
base_dir = os.path.expanduser(args.base_dir)
|
||||
|
||||
create_dataset(base_dir, args.number_of_files)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user