Python Lambda function code to automate OpenSearch snapshot and delete snapshots

Create snapshots

import os
import boto3
import requests
from logging import getLogger, StreamHandler, DEBUG
from requests_aws4auth import AWS4Auth
from datetime import datetime

logger = getLogger("urllib3")
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False


# configuration
host  = os.environ.get("DOMAIN_HOST") ## For eg: "https://vpc-test-osearch-zmqj43i234onqwhempvqdh3xmi.ap-northeast-1.es.amazonaws.com/"
region = os.environ.get("DOMAIN_REGION", "ap-northeast-1")
repository_path = os.environ.get("REPOSITORY_PATH", "_snapshot/manual")
snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX","manual-snapshot-test")
bucket_name = os.environ.get("BUCKET_NAME")
role_arn = os.environ.get("DOMAIN_SNAPSHOT_ROLE_ARN")

# Note: Please refer to this document for snapshot role setting: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-snapshots.html#managedomains-snapshot-prerequisites
## If using fine-gained access control, you will have to map the snapshot role in  OpenSearch Dashboards

# AWS OpenSearch service identifier
service = "es"

# Lambda execution starts here.
def lambda_handler(event, context):

    print("OpenSearch backup Lambda - Get OpenSearch snapshot and save to S3 bucket.")
    print(f"Domain host: {host}")
    print(f"Repository path: {repository_path}")
    print(f"Snapshot prefix: {snapshot_prefix}")
    print(f"Bucket name: {bucket_name}")
    print(f"Role ARN: {role_arn}")

    # Authenticate
    auth = authentication()
    # Register snapshot repository
    register_snapshot_repository(auth)
    # Take snapshot
    take_snapshot(auth)

    return event

# Functions
def authentication() -> AWS4Auth:
    """Sign requests to AWS OpenSearch with the credentials taken from Lambda function IAM role"""

    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(
        credentials.access_key,
        credentials.secret_key,
        region,
        service,
        session_token=credentials.token,
    )

    return awsauth


def register_snapshot_repository(awsauth: AWS4Auth):
    """Register a new snapshot repository, if already done this function will not do anything"""

    path = repository_path
    url = host + path

    payload = {
        "type": "s3",
        "settings": {
            "bucket": bucket_name,
            "base_path": repository_path,
            "endpoint": "s3.amazonaws.com", 
            "role_arn": role_arn
        }
    }

    headers = {"Content-Type": "application/json"}

    r = requests.put(url, auth=awsauth, json=payload, headers=headers)
    logger.info(r.text)

    if r.status_code != 200:
        raise Exception(
            f"Cannot register the snapshot repository. Details: {r.text}"
        )
    else:
        print(r.text)

    return {"statusCode": r.status_code, "body": r.text}


def take_snapshot(awsauth: AWS4Auth):
    """Take a snapshot of the OpenSearch domain by appending a date to the basename given in the environment"""
    print("===================================================")
    snapshot_name = snapshot_prefix + "-" + datetime.now().strftime("%Y-%m-%dt%H-%M-%S")

    path = (
        repository_path + "/" ## Important note: need to append trailing slash here, do not append it to the env var!
        + snapshot_name
    )

    url = host + path

    r = requests.put(url, auth=awsauth)

    logger.info(r.text)
    if r.status_code != 200:
        raise Exception(
            f"Cannot take snapshot {snapshot_name}. Details: {r.text}"
        )
    else:
        print(r.text)
        print("===================================================")
        print(f"Snapshot name: {snapshot_name}")

    return {"statusCode": r.status_code, "body": r.text}

requirements:

certifi==2021.5.30
chardet==4.0.0
idna==2.10
requests
requests-aws4auth==1.0.1
urllib3==1.26.3
charset-normalizer==2.1.1

Delete snapshots

import os
import boto3
import curator
import logging
from logging import getLogger, StreamHandler, DEBUG
from requests_aws4auth import AWS4Auth
from elasticsearch import Elasticsearch, RequestsHttpConnection
from datetime import datetime

logger = getLogger("curator")
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False

url_logger = getLogger("urllib3")
url_logger.setLevel(DEBUG)

host  = os.environ.get("DOMAIN_HOST") ## For eg: "vpc-test-osearch-zmqj43i234onqwhempvqdh3xmi.ap-northeast-1.es.amazonaws.com" (do not include https:// and / at the end)
region = os.environ.get("DOMAIN_REGION", "ap-northeast-1")
repository_path = os.environ.get("REPOSITORY_PATH", "_snapshot/manual")
snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX","manual-snapshot-test")

# AWS OpenSearch service identifier
service = "es"

repository_name = os.environ.get("REPOSITORY_NAME","manual")
unit = os.environ.get("UNIT") # Valid: seconds, minutes, hours, days, weeks, months, years
unit_count = int(os.environ.get("UNIT_COUNT")) # For example {unit:weeks, unit_count:2}, it will take 2 weeks.

# Note: This Lambda function assume that you have configured snapshot role setting when you get the snapshot.
# Please refer to this document for snapshot role setting: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-snapshots.html#managedomains-snapshot-prerequisites
## If using fine-gained access control, you will have to map the snapshot role in  OpenSearch Dashboards 
## To delete snapshot, you have to map backend role to the "readall_and_monitor" permission

def authentication() -> AWS4Auth:
    """Sign requests to AWS OpenSearch with the credentials taken from Lambda function IAM role"""

    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(
        credentials.access_key,
        credentials.secret_key,
        region,
        service,
        session_token=credentials.token,
    )

    return awsauth

def delete_snapshot(awsauth: AWS4Auth):
    snapshot_name = snapshot_prefix + "-" + datetime.now().strftime("%Y-%m-%dt%H-%M-%S")

    # Build the OpenSearch client.
    client = Elasticsearch(
        hosts = [{'host': host, 'port': 443}],
        http_auth = awsauth,
        use_ssl = True,
        verify_certs = True,
        connection_class = RequestsHttpConnection,
        timeout = 120 # Deleting snapshots can take a while, so keep the connection open for long enough to get a response.
    )
    path = repository_path
    url = "https://" + host + "/"
    print(url)

    try:
        # Get all snapshots in the repository.
        snapshot_list = curator.SnapshotList(client, repository=repository_name)

        # Filter by age, any snapshot older than two weeks.
        snapshot_list.filter_by_age(source='creation_date', direction='older', unit=unit, unit_count=unit_count)

        # Delete the old snapshots.
        curator.DeleteSnapshots(snapshot_list, retry_interval=30, retry_count=3).do_action()
        #curator.DeleteSnapshots(snapshot_list, retry_interval=30, retry_count=3). do_dry_run()
    except (curator.exceptions.SnapshotInProgress, curator.exceptions.NoSnapshots, curator.exceptions.FailedExecution) as e:
        print(e)

# Lambda execution starts here.
def lambda_handler(event, context):

    print("Lambda function to delete OpenSearch snapshot")
    print(f"Domain host: {host}")
    print(f"Repository path: {repository_path}")
    print(f"Snapshot prefix: {snapshot_prefix}")
    print(f"Repository_name: {repository_name}")
    print(f"Time unit: {unit}")
    print(f"Time amount: {unit_count}")

    auth = authentication()
    delete_snapshot(auth)

requirements:

certifi==2021.5.30
chardet==4.0.0
idna==2.10
requests
requests-aws4auth==1.0.1
urllib3==1.26.3
charset-normalizer==2.1.1
click==6.7
elasticsearch==7.9.1
elasticsearch-curator==5.7.0
voluptuous==0.13.1
pyyaml==5.4.1
six>=1.16.0

Some notes:

get_snapshot.py and delete_snapshot.py uses Curator and the elasticsearch-py client to delete any index whose name contains a time stamp.
You have to set environments for Lambda functions (docs.aws.amazon.com/lambda/latest/dg/config..)
Because taking and deleting snapshots can take a while, this code is more sensitive to connection and Lambda timeouts—hence the extra logging code. If the DeleteSnapshots function takes longer to get a response from the OpenSearch Service domain, you might need to increase this value. You must also increase the Lambda function timeout from its default value of three seconds.

Basic Lamba setting values:

Memory – 128 MB; Timeout: 3 minutes

Triggers:

Rather than reacting to some event (such as a file upload to Amazon S3), these functions are meant to be scheduled.

Permissions:

Both Lambda functions in this section need the basic logging permissions that all Lambda functions need, plus HTTP method permissions for the OpenSearch Service domain:

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": "logs:CreateLogGroup",
      "Resource": "arn:aws:logs:ap-northeast-1:123456789012:*"
    },
    {
      "Effect": "Allow",
      "Action": [
        "logs:CreateLogStream",
        "logs:PutLogEvents"
      ],
      "Resource": [
        "arn:aws:logs:ap-northeast-1:123456789012:log-group:/aws/lambda/your-lambda-function:*"
      ]
    },
    {
      "Effect": "Allow",
      "Action": [
        "es:ESHttpPost",
        "es:ESHttpGet",
        "es:ESHttpPut",
        "es:ESHttpDelete"
      ],
      "Resource": "arn:aws:es:ap-northeast-1:123456789012:domain/my-domain/*"
    }
  ]
}

Take snapshot reference:

https://opensearch.org/docs/2.1/opensearch/snapshots/snapshot-restore/#take-snapshots
Delete snapshot:

https://opensearch.org/docs/latest/api-reference/snapshots/delete-snapshot/#delete-snapshot

To make Lambda layers:

pip install -r requirements.txt -t python

aws lambda publish-layer-version --layer-name <name> --description "<desciption" --license-info "MIT" --zip-file fileb://python.zip --compatible-runtimes python3.8 python3.9 --compatible-architectures "arm64" "x86_64"

Setup inside OpenSearch dashboard

If you want to access OpenSearch dashboard with your browser, please refer to this guide to make a reverse proxy instance:

https://shinchan.asia/2022/12/13/reverse-proxy-with-nginx-amz-linux-2-nat-instance/

\=================================================

Terraform code to deploy testing resources

vpc.tf

#####################################
# VPC
#####################################
resource "aws_vpc" "vpc" {
  cidr_block           = var.root_cidr
  enable_dns_hostnames = "true"
  enable_dns_support   = "true"

}

#####################################
# Internet gateway
#####################################
resource "aws_internet_gateway" "igw" {
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name = "${var.app_name}-${var.env_name}-igw"
  }
}

#####################################
# Public subnet
#####################################
resource "aws_subnet" "public" {
  count                   = length(var.public_subnets)
  vpc_id                  = aws_vpc.vpc.id
  cidr_block              = element(values(var.public_subnets), count.index)
  map_public_ip_on_launch = true
  availability_zone       = element(keys(var.public_subnets), count.index)
  depends_on = [
    aws_internet_gateway.igw
  ]

  tags = {
    Name = "${var.app_name}-${var.env_name}-subnet-public-${substr(element(keys(var.public_subnets), count.index), -1, 0)}"
  }
}

#####################################
# Private subnet
#####################################
resource "aws_subnet" "private" {
  count             = length(var.private_subnets)
  vpc_id            = aws_vpc.vpc.id
  cidr_block        = element(values(var.private_subnets), count.index)
  availability_zone = element(keys(var.private_subnets), count.index)

  tags = {
    Name = "${var.app_name}-${var.env_name}-subnet-private-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
  }
}

#####################################
# Nat Gateway Settings
#####################################
resource "aws_nat_gateway" "natgw" {
  count         = var.enable_natgw ? length(var.private_subnets) : 0
  allocation_id = element(aws_eip.natgw_eip.*.id, count.index)
  subnet_id     = element(aws_subnet.private.*.id, count.index)

  tags = {
    Name = "${var.app_name}-${var.env_name}-ngw-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
  }

  depends_on = [
    aws_internet_gateway.igw
  ]
}

#####################################
# EIP
#####################################
## NAT gateway eip
resource "aws_eip" "natgw_eip" {
  count = var.enable_natgw ? length(var.private_subnets) : 0
  vpc   = true
  tags = {
    Name = "${var.app_name}-${var.env_name}-eip-nat-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
  }
}

## NAT instance eip 
resource "aws_eip" "nat_instance_eip" {
  count = var.enable_nat_instance ? 1 : 0
  vpc   = true
  tags = {
    Name = "${var.app_name}-${var.env_name}-eip-nat"
  }
}

resource "aws_eip_association" "nat_instance_eip" {
  count         = var.enable_nat_instance ? 1 : 0
  instance_id   = aws_instance.nat_instance[0].id
  allocation_id = element(aws_eip.nat_instance_eip.*.id, count.index)
}

#####################################
# Public route table
#####################################
resource "aws_route_table" "public_rtb" {
  vpc_id = aws_vpc.vpc.id

}

resource "aws_route_table_association" "public_rta" {
  count          = length(var.public_subnets)
  subnet_id      = element(aws_subnet.public.*.id, count.index)
  route_table_id = aws_route_table.public_rtb.id
}

resource "aws_main_route_table_association" "main_rt_association" {
  vpc_id         = aws_vpc.vpc.id
  route_table_id = aws_route_table.public_rtb.id
}

resource "aws_route" "public_rtb_default" {
  route_table_id         = aws_route_table.public_rtb.id
  destination_cidr_block = "0.0.0.0/0"
  gateway_id             = aws_internet_gateway.igw.id
}

#####################################
# Private route table - NAT instance
#####################################
resource "aws_route_table" "private_rtb_nat_ins" {
  count  = var.enable_nat_instance ? 1 : 0
  vpc_id = aws_vpc.vpc.id
}

resource "aws_route_table_association" "private_nat_ins_rta" {
  count          = var.enable_nat_instance ? length(var.private_subnets) : 0
  subnet_id      = element(aws_subnet.private.*.id, count.index)
  route_table_id = aws_route_table.private_rtb_nat_ins[0].id

  depends_on = [
    aws_route_table.private_rtb_nat_ins
  ]
}

resource "aws_route" "private_route_nat_ins" {
  count                  = var.enable_nat_instance ? 1 : 0
  route_table_id         = aws_route_table.private_rtb_nat_ins[0].id
  destination_cidr_block = "0.0.0.0/0"
  network_interface_id   = element(aws_instance.nat_instance.*.primary_network_interface_id, count.index)
}

#####################################
# Private route table - NAT gateway
# There are as many routing tables as the number of NAT gateways
#####################################
resource "aws_route_table" "private_rtb_natgw" {
  count  = var.enable_natgw ? length(var.private_subnets) : 0
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name = "${var.app_name}-${var.env_name}-
private-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
  }
}

resource "aws_route_table_association" "private_natgw_rta" {
  count          = var.enable_natgw ? length(var.private_subnets) : 0
  subnet_id      = element(aws_subnet.private.*.id, count.index)
  route_table_id = element(aws_route_table.private_rtb_natgw.*.id, count.index)
  depends_on = [
    aws_route_table.private_rtb_natgw
  ]
}

resource "aws_route" "private_route_natgw" {
  count                  = var.enable_natgw ? length(var.private_subnets) : 0
  route_table_id         = element(aws_route_table.private_rtb_natgw.*.id, count.index)
  destination_cidr_block = "0.0.0.0/0"
  nat_gateway_id         = element(aws_nat_gateway.natgw.*.id, count.index)
}

#####################################
# DHCP Option Set
#####################################
resource "aws_vpc_dhcp_options" "dhcp_option_set" {
  count               = var.create_custom_dhcp ? 1 : 0
  domain_name         = var.dhcp_option_domain
  domain_name_servers = ["AmazonProvidedDNS"]
}

resource "aws_vpc_dhcp_options_association" "dhcp_option_set_association" {
  count           = var.create_custom_dhcp ? 1 : 0
  vpc_id          = aws_vpc.vpc.id
  dhcp_options_id = aws_vpc_dhcp_options.dhcp_option_set[0].id
}

#####################################
# VPC endpoint
#####################################
## Gateway endpoint 
resource "aws_vpc_endpoint" "vpce_gateway" {
  count             = var.create_gateway_vpce ? length(var.gateway_endpoint_services) : 0
  vpc_id            = aws_vpc.vpc.id
  service_name      = var.gateway_endpoint_services[count.index]
  vpc_endpoint_type = "Gateway"
}

resource "aws_vpc_endpoint_route_table_association" "vpce_gateway_attach_nat_ins" {
  count           = var.create_gateway_vpce && var.enable_nat_instance ? length(var.gateway_endpoint_services) : 0
  route_table_id  = element(aws_route_table.private_rtb_nat_ins.*.id, count.index)
  vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)

  depends_on = [
    aws_route_table.private_rtb_nat_ins
  ]
}

resource "aws_vpc_endpoint_route_table_association" "vpce_gateway_attach_natgw" {
  count           = var.create_gateway_vpce && var.enable_natgw ? length(var.gateway_endpoint_services) : 0
  route_table_id  = element(aws_route_table.private_rtb_natgw.*.id, count.index)
  vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)

  depends_on = [
    aws_route_table.private_rtb_natgw
  ]
}

resource "aws_vpc_endpoint_policy" "vpce_gateway_policy_attach" {
  count           = var.create_gateway_vpce ? length(var.gateway_endpoint_services) : 0
  vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)
  policy = jsonencode({
    "Version" : "2012-10-17",
    "Statement" : [
      {
        "Sid" : "AllowAll",
        "Effect" : "Allow",
        "Principal" : {
          "AWS" : "*"
        },
        "Action" : [
          "*"
        ],
        "Resource" : "*"
      }
    ]
  })
}

# ## Interface endpoint 
resource "aws_vpc_endpoint" "vpce_interface" {
  count             = var.create_interface_vpce ? length(var.interface_endpoint_services) : 0
  vpc_id            = aws_vpc.vpc.id
  service_name      = var.interface_endpoint_services[count.index]
  vpc_endpoint_type = "Interface"
  subnet_ids        = aws_subnet.private[*].id

  security_group_ids = [
    aws_security_group.vpce_sg.id
  ]

  private_dns_enabled = true
}

resource "aws_vpc_endpoint_policy" "vpce_interface_policy_attach" {
  count           = var.create_interface_vpce ? length(var.interface_endpoint_services) : 0
  vpc_endpoint_id = element(aws_vpc_endpoint.vpce_interface.*.id, count.index)
  policy = jsonencode({
    "Version" : "2012-10-17",
    "Statement" : [
      {
        "Sid" : "AllowAll",
        "Effect" : "Allow",
        "Principal" : {
          "AWS" : "*"
        },
        "Action" : [
          "*"
        ],
        "Resource" : "*"
      }
    ]
  })
}

sg.tf

locals {
  ############## Ingress rules ##############

  opensearch_sg_ingress_source = {
    security_group_lambda = aws_security_group.lambda_sg.id
  }

  ############## Egress rules ##############

  lambda_sg_outbound_destination = {
    security_group_opensearch = aws_security_group.opensearch_sg.id
  }

}

#####################################
# Security Group EC2
#####################################

## EC2 NAT SG
resource "aws_security_group" "ec2_nat_sg" {
  count       = var.enable_nat_instance ? 1 : 0
  name        = "${var.app_name}-${var.env_name}-ec2"
  vpc_id      = aws_vpc.vpc.id
  description = "${var.app_name}-${var.env_name}-ec2"
  dynamic "egress" {
    for_each = var.security_group_common_egress
    content {
      from_port   = egress.value["from_port"]
      to_port     = egress.value["to_port"]
      protocol    = egress.value["protocol"]
      cidr_blocks = egress.value["cidr_blocks"]
      description = egress.value["description"]
    }
  }
  dynamic "ingress" {
    for_each = var.ec2_nat_sg_cidr_ingress
    content {
      from_port   = ingress.value["from_port"]
      to_port     = ingress.value["to_port"]
      protocol    = ingress.value["protocol"]
      cidr_blocks = ingress.value["cidr_blocks"]
      description = ingress.value["description"]
    }
  }
}

#####################################
# Security Group VPC endpoint
#####################################
resource "aws_security_group" "vpce_sg" {
  name        = "${var.app_name}-${var.env_name}-vpce"
  vpc_id      = aws_vpc.vpc.id
  description = "${var.app_name}-${var.env_name}-vpce"
  dynamic "ingress" {
    for_each = var.vpce_sg_cidr_ingress
    content {
      from_port   = ingress.value["from_port"]
      to_port     = ingress.value["to_port"]
      protocol    = ingress.value["protocol"]
      cidr_blocks = ingress.value["cidr_blocks"]
      description = ingress.value["description"]
    }
  }
}

#####################################
# Security Group OpenSearch
#####################################

resource "aws_security_group" "opensearch_sg" {
  name        = "${var.app_name}-${var.env_name}-os"
  vpc_id      = aws_vpc.vpc.id
  description = "${var.app_name}-${var.env_name}-os"
  dynamic "ingress" {
    for_each = var.opensearch_sg_sgid_ingress
    content {
      from_port       = ingress.value["from_port"]
      to_port         = ingress.value["to_port"]
      protocol        = ingress.value["protocol"]
      security_groups = [local.opensearch_sg_ingress_source[ingress.value["security_group"]]]
      description     = ingress.value["description"]
    }
  }
}

#####################################
# Security Group Lambda
#####################################
resource "aws_security_group" "lambda_sg" {
  name        = "${var.app_name}-${var.env_name}-lambda"
  vpc_id      = aws_vpc.vpc.id
  description = "${var.app_name}-${var.env_name}-lambda"
}

resource "aws_security_group_rule" "lambda_sg_outbound" {
  for_each                 = var.lambda_sg_outbound_destination
  type                     = "egress"
  from_port                = each.value["from_port"]
  to_port                  = each.value["to_port"]
  protocol                 = each.value["protocol"]
  source_security_group_id = local.lambda_sg_outbound_destination[each.value["security_group"]]
  description              = each.value["description"]
  security_group_id        = aws_security_group.lambda_sg.id
}

s3.tf

## OpenSearch Snapshot
# OSearch snapshot s3 log bucket
data "template_file" "bucket_log" {
  template = file("s3_policies/bucketlog.json")
  vars = {
    osearch_snapshot_bucket = aws_s3_bucket.osearch_snapshot.bucket
    bucket                  = aws_s3_bucket.bucket_log.bucket
    account_id              = var.account_id
  }
}

resource "aws_s3_bucket" "bucket_log" {
  provider = aws.osaka
  bucket   = "${var.app_name}-${var.env_name}-logs"

  tags = {
    Name        = "${var.app_name}-${var.env_name}-logs"
    Environment = "${var.env_name}"
    System      = "${var.app_name}"
  }
}

resource "aws_s3_bucket_acl" "bucket_log" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.bucket_log.id
  acl      = "private"
}

resource "aws_s3_object" "bucket_log" {
  provider     = aws.osaka
  bucket       = aws_s3_bucket.bucket_log.id
  key          = "s3/"
  content_type = "application/x-directory"
}


resource "aws_s3_bucket_policy" "bucket_log" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.bucket_log.id

  policy = data.template_file.bucket_log.rendered
  depends_on = [
    aws_s3_object.bucket_log
  ]
}

resource "aws_s3_bucket_ownership_controls" "bucket_log" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.bucket_log.id

  rule {
    object_ownership = "BucketOwnerPreferred"
  }
}
# OpenSearch Snapshot bucket
resource "aws_s3_bucket" "osearch_snapshot" {
  provider = aws.osaka
  bucket   = "${var.app_name}-${var.env_name}-snapshot"

  tags = {
    Name        = "${var.app_name}-${var.env_name}-snapshot"
    Environment = "${var.env_name}"
    System      = "${var.app_name}"
  }
}

resource "aws_s3_bucket_acl" "osearch_snapshot" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.osearch_snapshot.id
  acl      = "private"
}

resource "aws_s3_bucket_server_side_encryption_configuration" "osearch_snapshot" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.osearch_snapshot.bucket

  rule {
    apply_server_side_encryption_by_default {
      kms_master_key_id = aws_kms_key.kms_s3_osaka[0].arn
      sse_algorithm     = "aws:kms"
    }

    bucket_key_enabled = true
  }
}

resource "aws_s3_bucket_ownership_controls" "osearch_snapshot" {
  provider = aws.osaka
  bucket   = aws_s3_bucket.osearch_snapshot.id

  rule {
    object_ownership = "BucketOwnerPreferred"
  }
}

output "s3_osearch_snapshot_bucket_name" {
  value = aws_s3_bucket.osearch_snapshot.bucket
}

opensearch.tf

data "aws_region" "current" {}
data "aws_caller_identity" "current" {}
data "aws_route53_zone" "opensearch" {
  count = var.custom_endpoint_enabled ? 1 : 0
  name  = var.cluster_domain
}

resource "aws_iam_service_linked_role" "osearch" {
  count            = var.create_opensearch && var.create_iam_service_linked_role ? 1 : 0
  aws_service_name = "opensearchservice.amazonaws.com"
}

resource "aws_opensearch_domain" "opensearch" {
  count           = var.create_opensearch ? 1 : 0
  domain_name     = "${var.app_name}-${var.env_name}-domain"
  engine_version  = var.osearch_engine_version
  access_policies = <<CONFIG
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Action": "es:*",
            "Principal": "*",
            "Effect": "Allow",
            "Resource": "arn:aws:es:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:domain/*"
        }
    ]
}
CONFIG
  cluster_config {
    dedicated_master_enabled = var.master_instance_enabled
    dedicated_master_count   = var.master_instance_enabled ? var.master_instance_count : null
    dedicated_master_type    = var.master_instance_enabled ? var.master_instance_type : null

    instance_count = var.hot_instance_count
    instance_type  = var.hot_instance_type

    warm_enabled = var.warm_instance_enabled
    warm_count   = var.warm_instance_enabled ? var.warm_instance_count : null
    warm_type    = var.warm_instance_enabled ? var.warm_instance_type : null

    zone_awareness_enabled = (var.availability_zones > 1) ? true : false

    dynamic "zone_awareness_config" {
      for_each = (var.availability_zones > 1) ? [var.availability_zones] : []
      content {
        availability_zone_count = zone_awareness_config.value
      }
    }

    cold_storage_options {
      enabled = var.enable_cold_storage_options
    }
  }

  ### If you need anonymous authentication, in the first time you run `terraform apply`, change enabled in advanced_security_options to `false` and use null_resource.enable_aunonymous_auth to enable anonymous authentication.
  ### After the first successfully apply, change enabled to `true` so that Terraform do not recreate OpenSearch domain!
  ### This behaviour cause: anonymous authentication is only enabled when we do not create OpenSearch domain with advanced_security_options enabled.
  ### Please change it to false in the first apply to avoid error & recreation.
  advanced_security_options {
    enabled                        = true
    internal_user_database_enabled = true

    master_user_options {
      master_user_name     = var.master_user_name
      master_user_password = var.master_user_password
    }
  }

  domain_endpoint_options {
    enforce_https       = var.enforce_https
    tls_security_policy = var.tls_security_policy

    custom_endpoint_enabled         = var.custom_endpoint_enabled
    custom_endpoint                 = var.custom_endpoint_enabled ? "${var.cluster_name}.${data.aws_route53_zone.opensearch[0].name}" : ""
    custom_endpoint_certificate_arn = var.custom_endpoint_enabled ? var.custom_endpoint_certificate_arn : ""
  }

  node_to_node_encryption {
    enabled = true
  }

  encrypt_at_rest {
    enabled    = true
    kms_key_id = var.encrypt_kms_key_id
  }

  dynamic "vpc_options" {
    for_each = var.vpc_enabled ? [true] : []
    content {
      security_group_ids = ["${aws_security_group.opensearch_sg.id}"]
      subnet_ids         = ["${aws_subnet.private[0].id}"]
    }
  }

  # Note that the values for these configuration options must be strings (wrapped in quotes) 
  # or they may be wrong and cause a perpetual diff, causing Terraform to want to recreate your OpenSearch domain on every apply.
  advanced_options = {
    "rest.action.multi.allow_explicit_index" = "true"
  }

  dynamic "ebs_options" {
    for_each = var.ebs_enabled ? [true] : []
    content {
      ebs_enabled = true
      volume_size = var.ebs_volume_size
      volume_type = var.ebs_volume_type
      throughput  = var.ebs_throughput
      iops        = var.ebs_iops
    }
  }

  dynamic "log_publishing_options" {
    for_each = var.log_publishing_options_enabled ? [true] : []
    content {
      enabled                  = true
      cloudwatch_log_group_arn = var.osearch_log_cw_log_group_arn
      log_type                 = var.osearch_log_publishing_type
    }
  }

  dynamic "auto_tune_options" {
    for_each = var.enable_auto_tune_options ? [true] : []
    content {
      desired_state       = var.oseach_autotune_desired_state
      rollback_on_disable = var.oseach_autotune_rollback
      maintenance_schedule {
        start_at = var.autotune_maintenance_start
        duration {
          value = var.autotune_maintenance_duration_value
          unit  = var.autotune_maintenance_duration_unit
        }
        cron_expression_for_recurrence = var.autotune_maintenance_cron_expression
      }
    }
  }

  dynamic "cognito_options" {
    for_each = var.enable_cognito_options ? [true] : []
    content {
      enabled          = true
      identity_pool_id = var.osearch_identity_pool_id
      role_arn         = var.osearch_cognito_role_arn
      user_pool_id     = var.osearch_user_pool_id
    }
  }
  depends_on = [aws_iam_service_linked_role.osearch]
}

resource "null_resource" "enable_aunonymous_auth" {
  ## To advoid: Error creating OpenSearch domain: ValidationException: Cannot enable anonymous auth during domain creation.
  ## Command to enable: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html
  ### and: https://docs.aws.amazon.com/cli/latest/reference/opensearch/update-domain-config.html
  count = var.create_opensearch && var.enable_anonymous_auth ? 1 : 0
  provisioner "local-exec" {
    command    = "aws opensearch update-domain-config --domain-name ${var.app_name}-${var.env_name}-domain --advanced-security-options file://config_files/osearch.json"
    on_failure = continue
  }

  depends_on = [
    aws_opensearch_domain.opensearch
  ]
}

output "cluster_name" {
  description = "The name of the OpenSearch cluster."
  value       = aws_opensearch_domain.opensearch[0].domain_name
}

output "cluster_endpoint" {
  description = "The endpoint URL of the OpenSearch cluster."
  value       = "https://${aws_opensearch_domain.opensearch[0].endpoint}"
}

output "dashboard_endpoint" {
  description = "The endpoint URL of the OpenSearch dashboards."
  value       = "https://${aws_opensearch_domain.opensearch[0].endpoint}/_dashboards/"
}

lambda.tf

data "archive_file" "ossnapshot_get" {
  type        = "zip"
  source_file = "lambda/ossnapshot_get/lambda_function.py"
  output_path = "lambda/upload/ossnapshot_get.zip"
}

resource "aws_lambda_function" "ossnapshot_get" {
  filename         = data.archive_file.ossnapshot_get.output_path
  function_name    = "${var.app_name}-${var.env_name}-get"
  role             = aws_iam_role.snapshot_role.arn
  handler          = var.options_ossnapshot_get["handler"]
  source_code_hash = data.archive_file.ossnapshot_get.output_base64sha256
  runtime          = var.options_ossnapshot_get["runtime"]
  memory_size      = var.options_ossnapshot_get["memory_size"]
  timeout          = var.options_ossnapshot_get["timeout"]
  layers = ["${aws_lambda_layer_version.ossnapshot_get.arn}"]

  vpc_config {
    subnet_ids = aws_subnet.private[*].id
    security_group_ids = ["${aws_security_group.lambda_sg.id}"]
  }

  dynamic "environment" {
    for_each = length(keys(var.get_environment_variables)) == 0 ? [] : [true]
    content {
      variables = var.get_environment_variables
    }
  }

  depends_on = [
    aws_iam_role_policy_attachment.snapshot_role
  ]
}

resource "aws_lambda_permission" "ossnapshot_get" {
  statement_id  = var.options_ossnapshot_get["statement_id"]
  action        = var.options_ossnapshot_get["action"]
  function_name = aws_lambda_function.ossnapshot_get.function_name
  principal     = var.options_ossnapshot_get["principal"]
  source_arn    = var.options_ossnapshot_get["source_arn"]
}

data "archive_file" "ossnapshot_delete" {
  type        = "zip"
  source_file = "lambda/ossnapshot_delete/lambda_function.py"
  output_path = "lambda/upload/ossnapshot_delete.zip"
}

resource "aws_lambda_function" "ossnapshot_delete" {
  filename         = data.archive_file.ossnapshot_delete.output_path
  function_name    = "${var.app_name}-${var.env_name}-delete"
  role             = aws_iam_role.snapshot_role.arn
  handler          = var.options_ossnapshot_delete["handler"]
  source_code_hash = data.archive_file.ossnapshot_delete.output_base64sha256
  runtime          = var.options_ossnapshot_delete["runtime"]
  memory_size      = var.options_ossnapshot_delete["memory_size"]
  timeout          = var.options_ossnapshot_delete["timeout"]

  layers = ["${aws_lambda_layer_version.ossnapshot_delete.arn}"]

  vpc_config {
    subnet_ids = aws_subnet.private[*].id
    security_group_ids = ["${aws_security_group.lambda_sg.id}"]
  }

  dynamic "environment" {
    for_each = length(keys(var.delete_environment_variables)) == 0 ? [] : [true]
    content {
      variables = var.delete_environment_variables
    }
  }

  depends_on = [
    aws_iam_role_policy_attachment.snapshot_role
  ]
}

resource "aws_lambda_permission" "ossnapshot_delete" {
  statement_id  = var.options_ossnapshot_delete["statement_id"]
  action        = var.options_ossnapshot_delete["action"]
  function_name = aws_lambda_function.ossnapshot_delete.function_name
  principal     = var.options_ossnapshot_delete["principal"]
  source_arn    = var.options_ossnapshot_delete["source_arn"]
}

lambda-layer.tf

resource "aws_lambda_layer_version" "ossnapshot_delete" {
  filename            = "lambda/requirements/oss_delete/python.zip"
  layer_name          = "ossnapshot_delete_packages"
  description = "ossnapshot_delete_packages"
  compatible_runtimes = ["python3.8", "python3.9"]
  compatible_architectures = ["arm64", "x86_64"]
}

resource "aws_lambda_layer_version" "ossnapshot_get" {
  filename            = "lambda/requirements/oss_get/python.zip"
  layer_name          = "ossnapshot_get_packages"
  description = "ossnapshot_get_packages"
  compatible_runtimes = ["python3.8", "python3.9"]
}

output "ossnapshot_get_layer_arn" {
  value = aws_lambda_layer_version.ossnapshot_get.arn
}

output "ossnapshot_get_layer_version" {
  value = aws_lambda_layer_version.ossnapshot_get.version
}

output "ossnapshot_delete_layer_arn" {
  value = aws_lambda_layer_version.ossnapshot_delete.arn
}

output "ossnapshot_delete_layer_version" {
  value = aws_lambda_layer_version.ossnapshot_delete.version
}

kms.tf

data "template_file" "kms_s3_policy" {
  count    = var.create_kms_s3 ? 1 : 0
  template = file("kms_policies/kms_s3_policy.json")
  vars = {
    app_name   = var.app_name
    env_name   = var.env_name
    account_id = var.account_id
    region     = var.region
  }
}

resource "aws_kms_key" "kms_s3_osaka" {
  provider            = aws.osaka
  count               = var.create_kms_s3 && var.create_in_osaka ? 1 : 0
  is_enabled          = true
  enable_key_rotation = true
  policy              = data.template_file.kms_s3_policy[0].rendered

resource "aws_kms_alias" "kms_s3_osaka" {
  provider      = aws.osaka
  count         = var.create_kms_s3 ? 1 : 0
  name          = "alias/${var.app_name}-${var.env_name}-kms-s3"
  target_key_id = aws_kms_key.kms_s3_osaka[0].id
}

iam.tf

#####################################
# IAM Policies
#####################################
resource "aws_iam_policy" "osearch_snapshot_operations" {
  name   = "${var.app_name}-${var.env_name}-os"
  policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "es:ESHttpPost",
        "es:ESHttpGet",
        "es:ESHttpPut",
        "es:ESHttpDelete"
      ],
      "Resource": "${aws_opensearch_domain.opensearch[0].arn}/*"
    }
  ]
}
EOF
}

resource "aws_iam_policy" "passrole" {
  name   = "${var.app_name}-${var.env_name}-passrole"
  policy = <<EOF
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
              "iam:GetRole",
              "iam:PassRole"
            ],
            "Resource": "*"
        }
    ]
}
EOF
}

resource "aws_iam_policy" "logs" {
  name   = "${var.app_name}-${var.env_name}-logs"
  policy = <<EOF
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "logs:DescribeLogStreams",
                "logs:CreateLogGroup",
                "logs:DescribeLogStreams",
                "logs:CreateLogStream",
                "logs:PutLogEvents",
                "logs:GetLogEvents"
            ],
            "Resource": "*"
        }
    ]
}
EOF
}

data "aws_iam_policy" "aws_lambda_vpc_access_execution" {
  name = "AWSLambdaVPCAccessExecutionRole"
}

resource "aws_iam_policy" "bucket_snapshot" {
  name   = "${var.app_name}-${var.env_name}-bucket-snapshot"
  policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
      {
        "Sid": "VisualEditor0",
        "Effect": "Allow",
        "Action": [
          "s3:Get*",
          "s3:List*",
          "s3:Put*",
          "s3:DeleteObject",
          "s3:AbortMultipartUpload"
        ],
        "Resource": [
          "arn:aws:s3:::${aws_s3_bucket.osearch_snapshot.bucket}",
          "arn:aws:s3:::${aws_s3_bucket.osearch_snapshot.bucket}/*"
        ]
      },
      {
        "Action": [
          "kms:Decrypt",
          "kms:GenerateDataKey"
        ],
        "Effect": "Allow",
        "Resource": [
          "*"
        ],
        "Sid": "VisualEditor0"
    }
  ]
}
EOF
}

#####################################
# IAM Roles
#####################################
locals {
  snapshot_role_policy_arn_list = [data.aws_iam_policy.aws_lambda_vpc_access_execution.arn, aws_iam_policy.logs.arn, aws_iam_policy.bucket_snapshot.arn, aws_iam_policy.osearch_snapshot_operations.arn, aws_iam_policy.passrole.arn]

data "aws_iam_policy_document" "snapshot_role" {
  statement {
    effect  = "Allow"
    actions = ["sts:AssumeRole"]

    principals {
      type        = "Service"

      identifiers = ["lambda.amazonaws.com", "ssm.amazonaws.com","es.amazonaws.com"]

    }
  }
}

resource "aws_iam_role" "snapshot_role" {
  name               = "${var.app_name}-${var.env_name}-snapshot-role"
  assume_role_policy = data.aws_iam_policy_document.snapshot_role.json
}

resource "aws_iam_role_policy_attachment" "snapshot_role" {
  count      = length(local.snapshot_role_policy_arn_list)
  role       = aws_iam_role.snapshot_role.name
  policy_arn = element(local.snapshot_role_policy_arn_list, count.index)
}

data "aws_iam_role" "osearch_service_role" {
  name = "AWSServiceRoleForAmazonOpenSearchService"
}

output "osearch_snapshot_lambda_role_arn" {
  value = aws_iam_role.snapshot_role.arn
}

nat-instance.tf

#####################################
# EC2 
#####################################
## NAT instance
resource "aws_instance" "nat_instance" {
  count                   = var.enable_nat_instance ? 1 : 0
  ami                     = var.nat_instance_ami
  availability_zone       = var.az_a
  instance_type           = var.nat_instance_type
  disable_api_termination = "true"
  key_name                = "${var.app_name}-${var.env_name}-key"
  subnet_id         = aws_subnet.public[0].id
  source_dest_check = "false"
  credit_specification {
    cpu_credits = "standard"
  }
  vpc_security_group_ids = [
    aws_security_group.ec2_nat_sg[0].id,
  ]
}

logs.tf

#################################
# CloudWatch Logs & event lambda
#################################
## ossnapshot_get
resource "aws_cloudwatch_event_rule" "ossnapshot_get" {
  name                = "${var.app_name}-${var.env_name}-get"
  description         = "${var.app_name}-${var.env_name}-get"
  schedule_expression = var.options_ossnapshot_get["schedule_expression"]
}

resource "aws_cloudwatch_event_target" "ossnapshot_get" {
  rule      = aws_cloudwatch_event_rule.ossnapshot_get.name
  target_id = aws_lambda_function.ossnapshot_get.id
  arn       = aws_lambda_function.ossnapshot_get.arn
}

resource "aws_cloudwatch_log_group" "ossnapshot_get" {
  name              = "/aws/lambda/${var.app_name}-${var.env_name}-get-snapshot"
  retention_in_days = var.options_ossnapshot_get["retention_in_days"]
}

## ossnapshot_delete
resource "aws_cloudwatch_event_rule" "ossnapshot_delete" {
  name                = "${var.app_name}-${var.env_name}-delete"
  description         = "${var.app_name}-${var.env_name}-delete"
  schedule_expression = var.options_ossnapshot_delete["schedule_expression"]
}

resource "aws_cloudwatch_event_target" "ossnapshot_delete" {
  rule      = aws_cloudwatch_event_rule.ossnapshot_delete.name
  target_id = aws_lambda_function.ossnapshot_delete.id
  arn       = aws_lambda_function.ossnapshot_delete.arn
}

resource "aws_cloudwatch_log_group" "ossnapshot_delete" {
  name              = "/aws/lambda/${var.app_name}-${var.env_name}-delete-snapshot"
  retention_in_days = var.options_ossnapshot_delete["retention_in_days"]
}

variables.tf

#####################################
# Provider Settings
#####################################
provider "aws" {
  region = var.region
}

provider "aws" {
  region = var.dr_region
  alias  = "osaka"
}

#####################################
# Variable Settings
#####################################

#####################################
# AWS Settings
#####################################
variable "region" {
  type    = string
  default = "ap-northeast-1"
}

variable "dr_region" {
  type    = string
  default = "ap-northeast-3"
}
#####################################
# App Name
#####################################
variable "app_name" {
}

#####################################
# Env Name
#####################################
variable "env_name" {
}

#####################################
# Account ID
#####################################
variable "account_id" {
}

#####################################
# Segment Settings
#####################################
variable "root_cidr" {
}

#####################################
# AZ
#####################################
variable "az_a" {
}

variable "az_b" {
}

variable "az_c" {
}

variable "az_d" {
}

#####################################
# Subnet
#####################################
variable "public_subnets" {
  type = map(any)
  default = {
  }
}

variable "private_subnets" {
  type = map(any)
  default = {
  }
}

#####################################
# NAT
#####################################
variable "enable_natgw" {
  type    = bool
  default = false
}

variable "enable_nat_instance" {
  type    = bool
  default = true
}

#####################################
# DHCP Option Set
#####################################
variable "create_custom_dhcp" {
  type    = bool
  default = false
}

variable "dhcp_option_domain" {
}

#####################################
# VPC endpoint
#####################################
variable "create_gateway_vpce" {
  type    = bool
  default = true
}

variable "gateway_endpoint_services" {
}

variable "create_interface_vpce" {
  type    = bool
  default = false
}

variable "interface_endpoint_services" {
}

#####################################
# EC2
#####################################
variable "nat_instance_type" {
}

variable "nat_instance_ami" {
}

#####################################
# SG
#####################################
variable "security_group_common_egress" {
}

variable "ec2_nat_sg_cidr_ingress" {
}

variable "vpce_sg_cidr_ingress" {
}

variable "opensearch_sg_sgid_ingress" {
  default = {}
}

variable "lambda_sg_outbound_destination" {
  default = {}
}

#####################################
# KMS
#####################################
variable "create_kms_s3" {
  type    = bool
  default = true
}

variable "create_in_osaka" {
  type    = bool
  default = false
}

#####################################
# OpenSearch
#####################################
variable "cluster_name" {
  type    = string
  default = "opensearch"
}

variable "create_opensearch" {
  type    = bool
  default = true
}

variable "custom_endpoint_enabled" {
  type    = bool
  default = false
}

variable "create_iam_service_linked_role" {
  type    = bool
  default = true
}

variable "master_instance_enabled" {
  type    = bool
  default = false
}

variable "warm_instance_enabled" {
  type    = bool
  default = false
}

variable "enforce_https" {
  type    = bool
  default = true
}

variable "vpc_enabled" {
  description = "Indicates whether the cluster is running inside a VPC."
  type        = bool
  default     = true
}

variable "ebs_enabled" {
  description = "Indicates whether attach EBS volumes to the data nodes."
  type        = bool
  default     = true
}

variable "osearch_engine_version" {
  type    = string
  default = "OpenSearch_2.3"
}

variable "cluster_domain" {
  type    = string
  default = "opensearch"
}

variable "master_instance_count" {
  description = "The number of dedicated master nodes in the cluster."
  type        = number
  default     = 3
}

variable "master_instance_type" {
  description = "The type of EC2 instances to run for each master node. "
  type        = string
  default     = "m6g.large.search"
}

variable "hot_instance_count" {
  description = "The number of dedicated hot nodes in the cluster."
  type        = number
  default     = 3
}

variable "hot_instance_type" {
  description = "The type of EC2 instances to run for each hot node."
  type        = string
  default     = "t3.medium.search"
}

variable "warm_instance_count" {
  description = "The number of dedicated warm nodes in the cluster."
  type        = number
  default     = 3
}

variable "warm_instance_type" {
  type    = string
  default = "c6g.large.search" ## The T3 instance types do not support UltraWarm storage, cold storage, or Auto-Tune.
}

variable "availability_zones" {
  description = "The number of availability zones for the OpenSearch cluster. Valid values: 1, 2 or 3."
  type        = number
  default     = 3
}

variable "master_user_name" {
  type    = string
  default = ""
}

variable "master_user_password" {
  type    = string
  default = ""
}

variable "tls_security_policy" {
  type    = string
  default = "Policy-Min-TLS-1-2-2019-07"
}

variable "custom_endpoint_certificate_arn" {
  type    = string
  default = ""
}

variable "encrypt_kms_key_id" {
  description = "The KMS key ID to encrypt the OpenSearch cluster with. If not specified, then it defaults to using the AWS OpenSearch Service KMS key."
  type        = string
  default     = ""
}

variable "ebs_volume_size" {
  description = "The size of EBS volumes attached to data nodes (in GiB)."
  type        = number
  default     = 10
}

variable "ebs_volume_type" {
  description = "The type of EBS volumes attached to data nodes."
  type        = string
  default     = "gp3"
}

variable "ebs_throughput" {
  description = "The throughput (in MiB/s) of the EBS volumes attached to data nodes. Valid values are between 125 and 1000."
  type        = number
  default     = 125
}

variable "ebs_iops" {
  description = "The baseline input/output (I/O) performance of EBS volumes attached to data nodes."
  type        = number
  default     = 3000
}

variable "enable_anonymous_auth" {
  type    = bool
  default = false
}

variable "log_publishing_options_enabled" {
  type    = bool
  default = false
}

variable "osearch_log_cw_log_group_arn" {
  type    = string
  default = ""
}

variable "osearch_log_publishing_type" {
  description = " Type of OpenSearch log. Valid values: INDEX_SLOW_LOGS, SEARCH_SLOW_LOGS, ES_APPLICATION_LOGS, AUDIT_LOGS."
  type        = string
  default     = "AUDIT_LOGS"
}

variable "enable_auto_tune_options" {
  type    = bool
  default = false
}

variable "oseach_autotune_desired_state" {
  description = " Auto-Tune desired state for the domain. Valid values: ENABLED or DISABLED."
  default     = "DISABLED"
}

variable "autotune_maintenance_start" {
  description = "Date and time at which to start the Auto-Tune maintenance schedule in RFC3339 format. (e.g. 2021-12-19)"
  type        = string
  default     = ""
}

variable "autotune_maintenance_duration_value" {
  description = "An integer specifying the value of the duration of an Auto-Tune maintenance window."
  type        = number
  default     = 1
}

variable "autotune_maintenance_duration_unit" {
  description = "Unit of time specifying the duration of an Auto-Tune maintenance window. Valid values: HOURS"
  type        = string
  default     = "HOURS"
}

variable "autotune_maintenance_cron_expression" {
  description = "A cron expression specifying the recurrence pattern for an Auto-Tune maintenance schedule. E.g. cron(0 12 * * ? *)"
  type        = string
  default     = ""
}

variable "oseach_autotune_rollback" {
  description = "Whether to roll back to default Auto-Tune settings when disabling Auto-Tune. Valid values: DEFAULT_ROLLBACK or NO_ROLLBACK"
  type        = string
  default     = "NO_ROLLBACK"
}

variable "enable_cognito_options" {
  type    = bool
  default = false
}

variable "osearch_identity_pool_id" {
  description = "ID of the Cognito Identity Pool to use"
  type        = string
  default     = ""
}

variable "search_cognito_role_arn" {
  description = "ARN of the IAM role that has the AmazonOpenSearchServiceCognitoAccess policy attached."
  type        = string
  default     = ""
}

variable "osearch_user_pool_id" {
  description = "ID of the Cognito User Pool to use."
  type        = string
  default     = ""
}

variable "enable_cold_storage_options" {
  description = "Boolean to enable cold storage for an OpenSearch domain. Defaults to false. Master and ultrawarm nodes must be enabled for cold storage."
  type        = bool
  default     = false
}

#####################################
# Lambda
#####################################

variable "options_ossnapshot_get" {
  default = {}
}

variable "options_ossnapshot_delete" {
  default = {}
}

variable "get_environment_variables" {
  default = {}
}

variable "delete_environment_variables" {
  default = {}
}

s3_polices/bucketlog.json

{
  "Version": "2012-10-17",
  "Statement": [
      {
          "Sid": "S3ServerAccessLogsPolicy",
          "Effect": "Allow",
          "Principal": {
              "Service": "logging.s3.amazonaws.com"
          },
          "Action": [
              "s3:PutObject"
          ],
          "Resource": "arn:aws:s3:::${bucket}/s3/*",
          "Condition": {
              "ArnLike": {
                  "aws:SourceArn": "arn:aws:s3:::${osearch_snapshot_bucket}"
              },
              "StringEquals": {
                  "aws:SourceAccount": "${account_id}"
              }
          }
      }
  ]
}

kms_policies/kms_s3_policy.json

{
  "Version": "2012-10-17",
  "Id": "${app_name}-${env_name}-kms-s3",
  "Statement": [
    {
      "Sid": "Allow access through S3 for all principals in the account that are authorized to use S3",
      "Effect": "Allow",
      "Principal": {
        "AWS": "*"
      },
      "Action": [
        "kms:Encrypt",
        "kms:Decrypt",
        "kms:ReEncrypt*",
        "kms:GenerateDataKey*",
        "kms:DescribeKey"
      ],
      "Resource": "*",
      "Condition": {
        "StringEquals": {
          "kms:ViaService": "s3.${region}.amazonaws.com",
          "kms:CallerAccount": "${account_id}"
        }
      }
    },
    {
      "Sid": "Allow direct access to key metadata to the account",
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:iam::${account_id}:root"
      },
      "Action": [
        "kms:*"
      ],
      "Resource": "*"
    }
  ]
}

sample terraform.tfvars

#####################################
# AWS Settings
#####################################
region            = "ap-northeast-1"

dr_region         = "ap-northeast-3"
#####################################
# App Name
#####################################
app_name = "xxxx"

#####################################
# Env Name
#####################################
env_name = "xxxx"

#####################################
# Account ID
#####################################
account_id = "xxxxxxxxxxx"

#####################################
# Segment Settings
#####################################
root_cidr = "xx.xx.xx.xx/xx"

public_subnets = {
  ap-northeast-1a = "xx.xx.xx.xx/xx"
  ap-northeast-1c = "xx.xx.xx.xx/xx"
}

private_subnets = {
  ap-northeast-1a = "xx.xx.xx.xx/xx"
  ap-northeast-1c = "xx.xx.xx.xx/xx"
}

#####################################
# AZ
#####################################
az_a = "ap-northeast-1a"
az_b = "ap-northeast-1b"
az_c = "ap-northeast-1c"
az_d = "ap-northeast-1d"

#####################################
# NAT
#####################################
enable_natgw        = false
enable_nat_instance = true

#####################################
# DHCP Option Set
#####################################
create_custom_dhcp = false
dhcp_option_domain = ""

#####################################
# VPC endpoint
#####################################
create_gateway_vpce       = true
gateway_endpoint_services = ["com.amazonaws.ap-northeast-1.s3"]

create_interface_vpce       = true
interface_endpoint_services = ["com.amazonaws.ap-northeast-1.logs", "com.amazonaws.ap-northeast-1.monitoring", "com.amazonaws.ap-northeast-1.kms"]

#####################################
# EC2
#####################################
nat_instance_type   = "t3.nano"
nat_instance_ami    = "ami-04f90b458b992deda"

#####################################
# SG
#####################################
## common egress
security_group_common_egress = {                                                         
  security_group_common_egress_001 = {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
    description = "all"
  }
}

## nat instance
ec2_nat_sg_cidr_ingress = {
  ec2_nat_sg_cidr_ingress_001 = {
    from_port   = 22
    to_port     = 22
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
    description = "Private subnet CIDR"
  }

  ec2_nat_sg_cidr_ingress_002 = {
    from_port   = 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
    description = "Private subnet CIDR"
  }

  ec2_nat_sg_cidr_ingress_003 = {
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
    description = "Private subnet CIDR"
  }

  ec2_nat_sg_cidr_ingress_004 = {
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
    description = "Private subnet CIDR"
  }
}

## vpce
vpce_sg_cidr_ingress = {
  vpce_sg_cidr_ingress_001 = {
    from_port   = 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
    description = "Private subnet CIDR"
  }

  vpce_sg_cidr_ingress_002 = {
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
    description = "Private subnet CIDR"
  }
}

## OpenSearch
opensearch_sg_sgid_ingress = {
  opensearch_sg_sgid_ingress_01 = {
    from_port      = 443
    to_port        = 443
    protocol       = "tcp"
    security_group = "security_group_lambda"
    description    = "security group lambda"
  }
}

# Lambda
lambda_sg_outbound_destination = {
  lambda_sg_outbound_destination_001 = {
    from_port      = 443
    to_port        = 443
    protocol       = "tcp"
    security_group = "security_group_opensearch"
    description    = "domain sg"
  }
}

#####################################
# KMS
#####################################
create_kms_s3 = true
create_in_osaka = true

#####################################
# OpenSearch
#####################################
create_iam_service_linked_role = false
create_opensearch     = true
hot_instance_count    = 1
hot_instance_type     = "t3.medium.search"
availability_zones    = 1
master_user_name      = "xxxxx"
master_user_password  = "xxxxxxxxxx"
enforce_https         = true
tls_security_policy   = "Policy-Min-TLS-1-2-2019-07"
ebs_enabled           = true
ebs_volume_size       = 10
enable_anonymous_auth = false

#####################################
# Lambda
#####################################
options_ossnapshot_get = {
  handler             = "lambda_function.lambda_handler"
  runtime             = "python3.9"
  schedule_expression = "cron(00 01 ? * MON-FRI *)"
  memory_size         = 512
  timeout             = 900
  retention_in_days   = 1827
  statement_id        = "get-permission"
  action              = "lambda:InvokeFunction"
  principal           = "events.amazonaws.com"
  source_arn          = "arn:aws:events:ap-northeast-1:830427153490:rule/*"
}

options_ossnapshot_delete = {
  handler             = "lambda_function.lambda_handler"
  runtime             = "python3.9"
  schedule_expression = "cron(00 01 ? * MON-FRI *)"
  memory_size         = 512
  timeout             = 900
  retention_in_days   = 1827
  statement_id        = "delete-permission"
  action              = "lambda:InvokeFunction"
  principal           = "events.amazonaws.com"
  source_arn          = "arn:aws:events:ap-northeast-1:830427153490:rule/*"
}

get_environment_variables = {
  DOMAIN_HOST = "https://xxxxxxxx-ogx3cplo22kfe6gdffgdhfghdfghpq.ap-northeast-1.es.amazonaws.com/"
  DOMAIN_REGION = "ap-northeast-1"
  REPOSITORY_PATH = "_snapshot/manual"
  SNAPSHOT_PREFIX = "manual-snapshot-test"
  BUCKET_NAME = "snapshot"
  DOMAIN_SNAPSHOT_ROLE_ARN = "arn:aws:iam::xxxxxxx:role/xxxxxxxxxxxx"
}

delete_environment_variables = {
  DOMAIN_HOST = "xxxxxx-ogx3cplo22kfe662aa62w4afpq.ap-northeast-1.es.amazonaws.com"
  DOMAIN_REGION = "ap-northeast-1"
  REPOSITORY_PATH = "_snapshot/manual/"
  SNAPSHOT_PREFIX = "manual-snapshot-test"
  REPOSITORY_NAME = "manual"
  UNIT = "minutes"
  UNIT_COUNT = 1
}

How to resolve the error: “groupSize must be greater than 0 but was -2” when trying to delete OpenSearch snapshot

The first time I create the create snapshot Lambda function, I have set the REPOSITORY_PATH = “_snapshot/manual/” in the environment variable of the Lambda function. And the payload:

payload = {
        "type": "s3",
        "settings": {
            "bucket": bucket_name,
            "base_path": repository_path,
            "endpoint": "s3.amazonaws.com", 
            "role_arn": role_arn
        }
    }

actually will become:

payload = {
        "type": "s3",
        "settings": {
            "bucket": bucket_name,
            "base_path": "_snapshot/manual/",
            "endpoint": "s3.amazonaws.com", 
            "role_arn": role_arn
        }
    }

You can see that there is a trailing slash after the repository name “manual”. This will not cause any error with the get snapshot function.

However, when I tried to delete the snapshot:

There is an error “groupSize must be greater than 0 but was -2”. Even though the snapshot is deleted. This error does not show if there is only 1 snapshot in my repository, but show if there are more than 1 snapshots here.

After several times to debug. It turns out that the trailing slash inside my payload in get snapshot function is the evil. I have removed it (“_snapshot/manual/” -> “_snapshot/manual”) and then the function work fine: