Query resource AWS Config

Bảng Athena:

CREATE EXTERNAL TABLE `aws_config_snapshot`(
  `fileversion` string COMMENT 'from deserializer', 
  `configsnapshotid` string COMMENT 'from deserializer', 
  `configurationitems` array<struct<configurationitemversion:string,configurationitemcapturetime:string,configurationstateid:string,awsaccountid:string,configurationitemstatus:string,resourcetype:string,resourceid:string,resourcename:string,arn:string,awsregion:string,availabilityzone:string,configurationstatemd5hash:string,resourcecreationtime:string,tags:map<string,string>,relatedevents:array<string>,relationships:array<struct<resourcetype:string,resourceid:string,resourcename:string>>,configuration:string>> COMMENT 'from deserializer')
ROW FORMAT SERDE 
  'org.openx.data.jsonserde.JsonSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
LOCATION
  's3://config-bucket-830427153490/AWSLogs/830427153490/Config'
TBLPROPERTIES (
  'transient_lastDdlTime'='1702627437')

Bảng quotas:

CREATE EXTERNAL TABLE `aws_quotas`(
  `servicecode` string COMMENT 'from deserializer', 
  `servicename` string COMMENT 'from deserializer', 
  `quotaarn` string COMMENT 'from deserializer', 
  `quotacode` string COMMENT 'from deserializer', 
  `quotaname` string COMMENT 'from deserializer', 
  `value` float COMMENT 'from deserializer', 
  `unit` string COMMENT 'from deserializer', 
  `adjustable` boolean COMMENT 'from deserializer', 
  `globalquota` boolean COMMENT 'from deserializer', 
  `usagemetric` struct<metricnamespace:string,metricname:string,metricdimensions:struct<class:string,resource:string,service:string,type:string>,metricstatisticrecommendation:string> COMMENT 'from deserializer', 
  `quotaappliedatlevel` string COMMENT 'from deserializer')
ROW FORMAT SERDE 
  'org.openx.data.jsonserde.JsonSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
LOCATION
  's3://yen-testing-bucket/test-quotas'
TBLPROPERTIES (
  'transient_lastDdlTime'='1702874923')

Lambda function lấy quota của AWS để feed cho bảng quota:

import boto3
import json
import time

def lambda_handler(event, context):
    quotas_client = boto3.client('service-quotas')
    s3_client = boto3.client('s3')
    bucket_name = 'yen-testing-bucket'
    file_name = 'test-quotas/quotas_data.json'

    def get_all_services():
        services = []
        pagination_token = None
        while True:
            try:
                if pagination_token:
                    response = quotas_client.list_services(NextToken=pagination_token)
                else:
                    response = quotas_client.list_services()

                services.extend(response.get('Services', []))
                pagination_token = response.get('NextToken')

                if not pagination_token:
                    break

                time.sleep(1)  # Độ trễ 1 giây giữa các yêu cầu

            except quotas_client.exceptions.TooManyRequestsException:
                time.sleep(5)  # Đợi 5 giây trước khi thử lại
                continue

        return services

    def get_all_quotas(service_code):
        quotas = []
        pagination_token = None
        while True:
            try:
                if pagination_token:
                    response = quotas_client.list_service_quotas(ServiceCode=service_code, NextToken=pagination_token)
                else:
                    response = quotas_client.list_service_quotas(ServiceCode=service_code)

                quotas.extend(response.get('Quotas', []))
                pagination_token = response.get('NextToken')

                if not pagination_token:
                    break

                time.sleep(1)  # Độ trễ 1 giây giữa các yêu cầu

            except quotas_client.exceptions.TooManyRequestsException:
                time.sleep(5)  # Đợi 5 giây trước khi thử lại
                continue

        return quotas

    services = get_all_services()

    all_quotas = []
    for service in services:
        service_code = service['ServiceCode']
        quotas = get_all_quotas(service_code)
        all_quotas.extend(quotas)

    json_data = '\n'.join(json.dumps(quota) for quota in all_quotas)

    # Ghi dữ liệu vào S3
    s3_client.put_object(Bucket=bucket_name, Key=file_name, Body=json_data)

    return {
        'statusCode': 200,
        'body': json.dumps('Quotas retrieved and saved to S3 successfully!')
    }

Join và tính utilization cho 1 quotas:

WITH attachment_count AS (
    SELECT COUNT(*) as total_attachments
    FROM aws_config_snapshot config
    CROSS JOIN UNNEST(config.configurationitems) AS t(item)
    WHERE t.item.resourcetype = 'AWS::EC2::TransitGatewayAttachment'
),
quota_values AS (
    SELECT Value as quota_limit
    FROM aws_quotas_edit
    WHERE ServiceCode = 'ec2' AND QuotaName = 'Attachments per transit gateway'
)
SELECT ac.total_attachments, qv.quota_limit, 
       (ac.total_attachments / qv.quota_limit) * 100 as utilization_percentage
FROM attachment_count ac
CROSS JOIN quota_values qv;

count instance theo instance type:

SELECT 
    SPLIT_PART(json_extract_scalar(t.item.configuration, '$.instancetype'), '.', 1) AS instanceTypePrefix,
    COUNT(DISTINCT t.item.resourceid) AS unique_instance_count
FROM 
    aws_config_snapshot,
    UNNEST(configurationitems) t(item)
WHERE 
    t.item.resourcetype = 'AWS::EC2::Instance'
    AND json_extract_scalar(t.item.configuration, '$.instancetype') IS NOT NULL
GROUP BY 
    SPLIT_PART(json_extract_scalar(t.item.configuration, '$.instancetype'), '.', 1);

gdgdhgf