import boto3
import json
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

# AWS Credentials
aws_access_key = 'AKIAJKAOAOP2LRHJHN6A'
aws_secret_key = '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'

# AWS Pricing (as of December 2024)
LIST_REQUEST_COST_PER_1000 = 0.005  # $0.005 per 1,000 LIST requests

def process_bucket(bucket_name):
    """
    Process a single bucket to fetch object metadata and save it as a JSON file.
    """
    try:
        # Initialize S3 client
        s3 = boto3.client(
            's3',
            aws_access_key_id=aws_access_key,
            aws_secret_access_key=aws_secret_key,
        )

        json_file_name = f"{bucket_name}.json"

        # Skip if data for this bucket already exists
        if os.path.exists(json_file_name):
            print(f"Skipping bucket '{bucket_name}' - JSON file already exists.")
            return 0  # No additional LIST requests

        print(f"Fetching data for bucket: {bucket_name}")

        # Create a list to store objects' metadata
        bucket_data = {
            "BucketName": bucket_name,
            "Files": []
        }

        # Use a paginator to list all objects in the bucket
        paginator = s3.get_paginator('list_objects_v2')
        operation_parameters = {'Bucket': bucket_name}

        list_requests = 0
        for page in paginator.paginate(**operation_parameters):
            list_requests += 1  # Each paginated page is a LIST request
            if 'Contents' in page:
                for obj in page['Contents']:
                    object_data = {
                        "Key": obj['Key'],
                        "LastModified": str(obj['LastModified']),
                        "Size": obj['Size'],
                        "StorageClass": obj.get('StorageClass', 'STANDARD')
                    }
                    bucket_data['Files'].append(object_data)

        # Save bucket data to a JSON file
        with open(json_file_name, 'w') as json_file:
            json.dump(bucket_data, json_file, indent=4)

        print(f"Data for bucket '{bucket_name}' saved to '{json_file_name}'")
        return list_requests

    except Exception as e:
        print(f"Error processing bucket '{bucket_name}': {e}")
        return 0

def list_buckets_and_objects():
    try:
        # Initialize S3 client
        s3 = boto3.client(
            's3',
            aws_access_key_id=aws_access_key,
            aws_secret_access_key=aws_secret_key,
        )

        # Get a list of all buckets
        buckets_response = s3.list_buckets()
        buckets = [bucket['Name'] for bucket in buckets_response['Buckets']]  # Process all buckets

        total_list_requests = 0  # Track total number of LIST requests

        # Process 15 buckets at a time using ThreadPoolExecutor
        with ThreadPoolExecutor(max_workers=15) as executor:
            futures = {executor.submit(process_bucket, bucket_name): bucket_name for bucket_name in buckets}

            for future in as_completed(futures):
                bucket_name = futures[future]
                try:
                    list_requests = future.result()
                    total_list_requests += list_requests
                except Exception as e:
                    print(f"Error processing bucket '{bucket_name}': {e}")

        # Calculate estimated cost for LIST requests
        total_cost = (total_list_requests / 1000) * LIST_REQUEST_COST_PER_1000
        print("\nSummary:")
        print(f" - Total LIST requests made: {total_list_requests}")
        print(f" - Estimated LIST request cost: ${total_cost:.4f}")

    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    list_buckets_and_objects()
