Python Lambda function code to automate OpenSearch snapshot and delete snapshots
Create snapshots
import os
import boto3
import requests
from logging import getLogger, StreamHandler, DEBUG
from requests_aws4auth import AWS4Auth
from datetime import datetime
logger = getLogger("urllib3")
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False
# configuration
host = os.environ.get("DOMAIN_HOST") ## For eg: "https://vpc-test-osearch-zmqj43i234onqwhempvqdh3xmi.ap-northeast-1.es.amazonaws.com/"
region = os.environ.get("DOMAIN_REGION", "ap-northeast-1")
repository_path = os.environ.get("REPOSITORY_PATH", "_snapshot/manual")
snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX","manual-snapshot-test")
bucket_name = os.environ.get("BUCKET_NAME")
role_arn = os.environ.get("DOMAIN_SNAPSHOT_ROLE_ARN")
# Note: Please refer to this document for snapshot role setting: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-snapshots.html#managedomains-snapshot-prerequisites
## If using fine-gained access control, you will have to map the snapshot role in OpenSearch Dashboards
# AWS OpenSearch service identifier
service = "es"
# Lambda execution starts here.
def lambda_handler(event, context):
print("OpenSearch backup Lambda - Get OpenSearch snapshot and save to S3 bucket.")
print(f"Domain host: {host}")
print(f"Repository path: {repository_path}")
print(f"Snapshot prefix: {snapshot_prefix}")
print(f"Bucket name: {bucket_name}")
print(f"Role ARN: {role_arn}")
# Authenticate
auth = authentication()
# Register snapshot repository
register_snapshot_repository(auth)
# Take snapshot
take_snapshot(auth)
return event
# Functions
def authentication() -> AWS4Auth:
"""Sign requests to AWS OpenSearch with the credentials taken from Lambda function IAM role"""
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(
credentials.access_key,
credentials.secret_key,
region,
service,
session_token=credentials.token,
)
return awsauth
def register_snapshot_repository(awsauth: AWS4Auth):
"""Register a new snapshot repository, if already done this function will not do anything"""
path = repository_path
url = host + path
payload = {
"type": "s3",
"settings": {
"bucket": bucket_name,
"base_path": repository_path,
"endpoint": "s3.amazonaws.com",
"role_arn": role_arn
}
}
headers = {"Content-Type": "application/json"}
r = requests.put(url, auth=awsauth, json=payload, headers=headers)
logger.info(r.text)
if r.status_code != 200:
raise Exception(
f"Cannot register the snapshot repository. Details: {r.text}"
)
else:
print(r.text)
return {"statusCode": r.status_code, "body": r.text}
def take_snapshot(awsauth: AWS4Auth):
"""Take a snapshot of the OpenSearch domain by appending a date to the basename given in the environment"""
print("===================================================")
snapshot_name = snapshot_prefix + "-" + datetime.now().strftime("%Y-%m-%dt%H-%M-%S")
path = (
repository_path + "/" ## Important note: need to append trailing slash here, do not append it to the env var!
+ snapshot_name
)
url = host + path
r = requests.put(url, auth=awsauth)
logger.info(r.text)
if r.status_code != 200:
raise Exception(
f"Cannot take snapshot {snapshot_name}. Details: {r.text}"
)
else:
print(r.text)
print("===================================================")
print(f"Snapshot name: {snapshot_name}")
return {"statusCode": r.status_code, "body": r.text}
requirements:
certifi==2021.5.30
chardet==4.0.0
idna==2.10
requests
requests-aws4auth==1.0.1
urllib3==1.26.3
charset-normalizer==2.1.1
Delete snapshots
import os
import boto3
import curator
import logging
from logging import getLogger, StreamHandler, DEBUG
from requests_aws4auth import AWS4Auth
from elasticsearch import Elasticsearch, RequestsHttpConnection
from datetime import datetime
logger = getLogger("curator")
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)
logger.propagate = False
url_logger = getLogger("urllib3")
url_logger.setLevel(DEBUG)
host = os.environ.get("DOMAIN_HOST") ## For eg: "vpc-test-osearch-zmqj43i234onqwhempvqdh3xmi.ap-northeast-1.es.amazonaws.com" (do not include https:// and / at the end)
region = os.environ.get("DOMAIN_REGION", "ap-northeast-1")
repository_path = os.environ.get("REPOSITORY_PATH", "_snapshot/manual")
snapshot_prefix = os.environ.get("SNAPSHOT_PREFIX","manual-snapshot-test")
# AWS OpenSearch service identifier
service = "es"
repository_name = os.environ.get("REPOSITORY_NAME","manual")
unit = os.environ.get("UNIT") # Valid: seconds, minutes, hours, days, weeks, months, years
unit_count = int(os.environ.get("UNIT_COUNT")) # For example {unit:weeks, unit_count:2}, it will take 2 weeks.
# Note: This Lambda function assume that you have configured snapshot role setting when you get the snapshot.
# Please refer to this document for snapshot role setting: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-snapshots.html#managedomains-snapshot-prerequisites
## If using fine-gained access control, you will have to map the snapshot role in OpenSearch Dashboards
## To delete snapshot, you have to map backend role to the "readall_and_monitor" permission
def authentication() -> AWS4Auth:
"""Sign requests to AWS OpenSearch with the credentials taken from Lambda function IAM role"""
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(
credentials.access_key,
credentials.secret_key,
region,
service,
session_token=credentials.token,
)
return awsauth
def delete_snapshot(awsauth: AWS4Auth):
snapshot_name = snapshot_prefix + "-" + datetime.now().strftime("%Y-%m-%dt%H-%M-%S")
# Build the OpenSearch client.
client = Elasticsearch(
hosts = [{'host': host, 'port': 443}],
http_auth = awsauth,
use_ssl = True,
verify_certs = True,
connection_class = RequestsHttpConnection,
timeout = 120 # Deleting snapshots can take a while, so keep the connection open for long enough to get a response.
)
path = repository_path
url = "https://" + host + "/"
print(url)
try:
# Get all snapshots in the repository.
snapshot_list = curator.SnapshotList(client, repository=repository_name)
# Filter by age, any snapshot older than two weeks.
snapshot_list.filter_by_age(source='creation_date', direction='older', unit=unit, unit_count=unit_count)
# Delete the old snapshots.
curator.DeleteSnapshots(snapshot_list, retry_interval=30, retry_count=3).do_action()
#curator.DeleteSnapshots(snapshot_list, retry_interval=30, retry_count=3). do_dry_run()
except (curator.exceptions.SnapshotInProgress, curator.exceptions.NoSnapshots, curator.exceptions.FailedExecution) as e:
print(e)
# Lambda execution starts here.
def lambda_handler(event, context):
print("Lambda function to delete OpenSearch snapshot")
print(f"Domain host: {host}")
print(f"Repository path: {repository_path}")
print(f"Snapshot prefix: {snapshot_prefix}")
print(f"Repository_name: {repository_name}")
print(f"Time unit: {unit}")
print(f"Time amount: {unit_count}")
auth = authentication()
delete_snapshot(auth)
requirements:
certifi==2021.5.30
chardet==4.0.0
idna==2.10
requests
requests-aws4auth==1.0.1
urllib3==1.26.3
charset-normalizer==2.1.1
click==6.7
elasticsearch==7.9.1
elasticsearch-curator==5.7.0
voluptuous==0.13.1
pyyaml==5.4.1
six>=1.16.0
Some notes:
get_snapshot.py and delete_snapshot.py uses Curator and the elasticsearch-py client to delete any index whose name contains a time stamp.
You have to set environments for Lambda functions (docs.aws.amazon.com/lambda/latest/dg/config..)
Because taking and deleting snapshots can take a while, this code is more sensitive to connection and Lambda timeouts—hence the extra logging code. If the DeleteSnapshots function takes longer to get a response from the OpenSearch Service domain, you might need to increase this value. You must also increase the Lambda function timeout from its default value of three seconds.
Basic Lamba setting values:
Memory – 128 MB; Timeout: 3 minutes
Triggers:
Rather than reacting to some event (such as a file upload to Amazon S3), these functions are meant to be scheduled.
Permissions:
Both Lambda functions in this section need the basic logging permissions that all Lambda functions need, plus HTTP method permissions for the OpenSearch Service domain:
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": "logs:CreateLogGroup",
"Resource": "arn:aws:logs:ap-northeast-1:123456789012:*"
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:ap-northeast-1:123456789012:log-group:/aws/lambda/your-lambda-function:*"
]
},
{
"Effect": "Allow",
"Action": [
"es:ESHttpPost",
"es:ESHttpGet",
"es:ESHttpPut",
"es:ESHttpDelete"
],
"Resource": "arn:aws:es:ap-northeast-1:123456789012:domain/my-domain/*"
}
]
}
Take snapshot reference:
https://opensearch.org/docs/2.1/opensearch/snapshots/snapshot-restore/#take-snapshots
Delete snapshot:
https://opensearch.org/docs/latest/api-reference/snapshots/delete-snapshot/#delete-snapshot
To make Lambda layers:
pip install -r requirements.txt -t python
aws lambda publish-layer-version --layer-name <name> --description "<desciption" --license-info "MIT" --zip-file fileb://python.zip --compatible-runtimes python3.8 python3.9 --compatible-architectures "arm64" "x86_64"
Setup inside OpenSearch dashboard
If you want to access OpenSearch dashboard with your browser, please refer to this guide to make a reverse proxy instance:
https://shinchan.asia/2022/12/13/reverse-proxy-with-nginx-amz-linux-2-nat-instance/
\=================================================
Terraform code to deploy testing resources
#####################################
# VPC
#####################################
resource "aws_vpc" "vpc" {
cidr_block = var.root_cidr
enable_dns_hostnames = "true"
enable_dns_support = "true"
}
#####################################
# Internet gateway
#####################################
resource "aws_internet_gateway" "igw" {
vpc_id = aws_vpc.vpc.id
tags = {
Name = "${var.app_name}-${var.env_name}-igw"
}
}
#####################################
# Public subnet
#####################################
resource "aws_subnet" "public" {
count = length(var.public_subnets)
vpc_id = aws_vpc.vpc.id
cidr_block = element(values(var.public_subnets), count.index)
map_public_ip_on_launch = true
availability_zone = element(keys(var.public_subnets), count.index)
depends_on = [
aws_internet_gateway.igw
]
tags = {
Name = "${var.app_name}-${var.env_name}-subnet-public-${substr(element(keys(var.public_subnets), count.index), -1, 0)}"
}
}
#####################################
# Private subnet
#####################################
resource "aws_subnet" "private" {
count = length(var.private_subnets)
vpc_id = aws_vpc.vpc.id
cidr_block = element(values(var.private_subnets), count.index)
availability_zone = element(keys(var.private_subnets), count.index)
tags = {
Name = "${var.app_name}-${var.env_name}-subnet-private-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
}
}
#####################################
# Nat Gateway Settings
#####################################
resource "aws_nat_gateway" "natgw" {
count = var.enable_natgw ? length(var.private_subnets) : 0
allocation_id = element(aws_eip.natgw_eip.*.id, count.index)
subnet_id = element(aws_subnet.private.*.id, count.index)
tags = {
Name = "${var.app_name}-${var.env_name}-ngw-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
}
depends_on = [
aws_internet_gateway.igw
]
}
#####################################
# EIP
#####################################
## NAT gateway eip
resource "aws_eip" "natgw_eip" {
count = var.enable_natgw ? length(var.private_subnets) : 0
vpc = true
tags = {
Name = "${var.app_name}-${var.env_name}-eip-nat-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
}
}
## NAT instance eip
resource "aws_eip" "nat_instance_eip" {
count = var.enable_nat_instance ? 1 : 0
vpc = true
tags = {
Name = "${var.app_name}-${var.env_name}-eip-nat"
}
}
resource "aws_eip_association" "nat_instance_eip" {
count = var.enable_nat_instance ? 1 : 0
instance_id = aws_instance.nat_instance[0].id
allocation_id = element(aws_eip.nat_instance_eip.*.id, count.index)
}
#####################################
# Public route table
#####################################
resource "aws_route_table" "public_rtb" {
vpc_id = aws_vpc.vpc.id
}
resource "aws_route_table_association" "public_rta" {
count = length(var.public_subnets)
subnet_id = element(aws_subnet.public.*.id, count.index)
route_table_id = aws_route_table.public_rtb.id
}
resource "aws_main_route_table_association" "main_rt_association" {
vpc_id = aws_vpc.vpc.id
route_table_id = aws_route_table.public_rtb.id
}
resource "aws_route" "public_rtb_default" {
route_table_id = aws_route_table.public_rtb.id
destination_cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.igw.id
}
#####################################
# Private route table - NAT instance
#####################################
resource "aws_route_table" "private_rtb_nat_ins" {
count = var.enable_nat_instance ? 1 : 0
vpc_id = aws_vpc.vpc.id
}
resource "aws_route_table_association" "private_nat_ins_rta" {
count = var.enable_nat_instance ? length(var.private_subnets) : 0
subnet_id = element(aws_subnet.private.*.id, count.index)
route_table_id = aws_route_table.private_rtb_nat_ins[0].id
depends_on = [
aws_route_table.private_rtb_nat_ins
]
}
resource "aws_route" "private_route_nat_ins" {
count = var.enable_nat_instance ? 1 : 0
route_table_id = aws_route_table.private_rtb_nat_ins[0].id
destination_cidr_block = "0.0.0.0/0"
network_interface_id = element(aws_instance.nat_instance.*.primary_network_interface_id, count.index)
}
#####################################
# Private route table - NAT gateway
# There are as many routing tables as the number of NAT gateways
#####################################
resource "aws_route_table" "private_rtb_natgw" {
count = var.enable_natgw ? length(var.private_subnets) : 0
vpc_id = aws_vpc.vpc.id
tags = {
Name = "${var.app_name}-${var.env_name}-
private-${substr(element(keys(var.private_subnets), count.index), -1, 0)}"
}
}
resource "aws_route_table_association" "private_natgw_rta" {
count = var.enable_natgw ? length(var.private_subnets) : 0
subnet_id = element(aws_subnet.private.*.id, count.index)
route_table_id = element(aws_route_table.private_rtb_natgw.*.id, count.index)
depends_on = [
aws_route_table.private_rtb_natgw
]
}
resource "aws_route" "private_route_natgw" {
count = var.enable_natgw ? length(var.private_subnets) : 0
route_table_id = element(aws_route_table.private_rtb_natgw.*.id, count.index)
destination_cidr_block = "0.0.0.0/0"
nat_gateway_id = element(aws_nat_gateway.natgw.*.id, count.index)
}
#####################################
# DHCP Option Set
#####################################
resource "aws_vpc_dhcp_options" "dhcp_option_set" {
count = var.create_custom_dhcp ? 1 : 0
domain_name = var.dhcp_option_domain
domain_name_servers = ["AmazonProvidedDNS"]
}
resource "aws_vpc_dhcp_options_association" "dhcp_option_set_association" {
count = var.create_custom_dhcp ? 1 : 0
vpc_id = aws_vpc.vpc.id
dhcp_options_id = aws_vpc_dhcp_options.dhcp_option_set[0].id
}
#####################################
# VPC endpoint
#####################################
## Gateway endpoint
resource "aws_vpc_endpoint" "vpce_gateway" {
count = var.create_gateway_vpce ? length(var.gateway_endpoint_services) : 0
vpc_id = aws_vpc.vpc.id
service_name = var.gateway_endpoint_services[count.index]
vpc_endpoint_type = "Gateway"
}
resource "aws_vpc_endpoint_route_table_association" "vpce_gateway_attach_nat_ins" {
count = var.create_gateway_vpce && var.enable_nat_instance ? length(var.gateway_endpoint_services) : 0
route_table_id = element(aws_route_table.private_rtb_nat_ins.*.id, count.index)
vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)
depends_on = [
aws_route_table.private_rtb_nat_ins
]
}
resource "aws_vpc_endpoint_route_table_association" "vpce_gateway_attach_natgw" {
count = var.create_gateway_vpce && var.enable_natgw ? length(var.gateway_endpoint_services) : 0
route_table_id = element(aws_route_table.private_rtb_natgw.*.id, count.index)
vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)
depends_on = [
aws_route_table.private_rtb_natgw
]
}
resource "aws_vpc_endpoint_policy" "vpce_gateway_policy_attach" {
count = var.create_gateway_vpce ? length(var.gateway_endpoint_services) : 0
vpc_endpoint_id = element(aws_vpc_endpoint.vpce_gateway.*.id, count.index)
policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Sid" : "AllowAll",
"Effect" : "Allow",
"Principal" : {
"AWS" : "*"
},
"Action" : [
"*"
],
"Resource" : "*"
}
]
})
}
# ## Interface endpoint
resource "aws_vpc_endpoint" "vpce_interface" {
count = var.create_interface_vpce ? length(var.interface_endpoint_services) : 0
vpc_id = aws_vpc.vpc.id
service_name = var.interface_endpoint_services[count.index]
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
security_group_ids = [
aws_security_group.vpce_sg.id
]
private_dns_enabled = true
}
resource "aws_vpc_endpoint_policy" "vpce_interface_policy_attach" {
count = var.create_interface_vpce ? length(var.interface_endpoint_services) : 0
vpc_endpoint_id = element(aws_vpc_endpoint.vpce_interface.*.id, count.index)
policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Sid" : "AllowAll",
"Effect" : "Allow",
"Principal" : {
"AWS" : "*"
},
"Action" : [
"*"
],
"Resource" : "*"
}
]
})
}
locals {
############## Ingress rules ##############
opensearch_sg_ingress_source = {
security_group_lambda = aws_security_group.lambda_sg.id
}
############## Egress rules ##############
lambda_sg_outbound_destination = {
security_group_opensearch = aws_security_group.opensearch_sg.id
}
}
#####################################
# Security Group EC2
#####################################
## EC2 NAT SG
resource "aws_security_group" "ec2_nat_sg" {
count = var.enable_nat_instance ? 1 : 0
name = "${var.app_name}-${var.env_name}-ec2"
vpc_id = aws_vpc.vpc.id
description = "${var.app_name}-${var.env_name}-ec2"
dynamic "egress" {
for_each = var.security_group_common_egress
content {
from_port = egress.value["from_port"]
to_port = egress.value["to_port"]
protocol = egress.value["protocol"]
cidr_blocks = egress.value["cidr_blocks"]
description = egress.value["description"]
}
}
dynamic "ingress" {
for_each = var.ec2_nat_sg_cidr_ingress
content {
from_port = ingress.value["from_port"]
to_port = ingress.value["to_port"]
protocol = ingress.value["protocol"]
cidr_blocks = ingress.value["cidr_blocks"]
description = ingress.value["description"]
}
}
}
#####################################
# Security Group VPC endpoint
#####################################
resource "aws_security_group" "vpce_sg" {
name = "${var.app_name}-${var.env_name}-vpce"
vpc_id = aws_vpc.vpc.id
description = "${var.app_name}-${var.env_name}-vpce"
dynamic "ingress" {
for_each = var.vpce_sg_cidr_ingress
content {
from_port = ingress.value["from_port"]
to_port = ingress.value["to_port"]
protocol = ingress.value["protocol"]
cidr_blocks = ingress.value["cidr_blocks"]
description = ingress.value["description"]
}
}
}
#####################################
# Security Group OpenSearch
#####################################
resource "aws_security_group" "opensearch_sg" {
name = "${var.app_name}-${var.env_name}-os"
vpc_id = aws_vpc.vpc.id
description = "${var.app_name}-${var.env_name}-os"
dynamic "ingress" {
for_each = var.opensearch_sg_sgid_ingress
content {
from_port = ingress.value["from_port"]
to_port = ingress.value["to_port"]
protocol = ingress.value["protocol"]
security_groups = [local.opensearch_sg_ingress_source[ingress.value["security_group"]]]
description = ingress.value["description"]
}
}
}
#####################################
# Security Group Lambda
#####################################
resource "aws_security_group" "lambda_sg" {
name = "${var.app_name}-${var.env_name}-lambda"
vpc_id = aws_vpc.vpc.id
description = "${var.app_name}-${var.env_name}-lambda"
}
resource "aws_security_group_rule" "lambda_sg_outbound" {
for_each = var.lambda_sg_outbound_destination
type = "egress"
from_port = each.value["from_port"]
to_port = each.value["to_port"]
protocol = each.value["protocol"]
source_security_group_id = local.lambda_sg_outbound_destination[each.value["security_group"]]
description = each.value["description"]
security_group_id = aws_security_group.lambda_sg.id
}
## OpenSearch Snapshot
# OSearch snapshot s3 log bucket
data "template_file" "bucket_log" {
template = file("s3_policies/bucketlog.json")
vars = {
osearch_snapshot_bucket = aws_s3_bucket.osearch_snapshot.bucket
bucket = aws_s3_bucket.bucket_log.bucket
account_id = var.account_id
}
}
resource "aws_s3_bucket" "bucket_log" {
provider = aws.osaka
bucket = "${var.app_name}-${var.env_name}-logs"
tags = {
Name = "${var.app_name}-${var.env_name}-logs"
Environment = "${var.env_name}"
System = "${var.app_name}"
}
}
resource "aws_s3_bucket_acl" "bucket_log" {
provider = aws.osaka
bucket = aws_s3_bucket.bucket_log.id
acl = "private"
}
resource "aws_s3_object" "bucket_log" {
provider = aws.osaka
bucket = aws_s3_bucket.bucket_log.id
key = "s3/"
content_type = "application/x-directory"
}
resource "aws_s3_bucket_policy" "bucket_log" {
provider = aws.osaka
bucket = aws_s3_bucket.bucket_log.id
policy = data.template_file.bucket_log.rendered
depends_on = [
aws_s3_object.bucket_log
]
}
resource "aws_s3_bucket_ownership_controls" "bucket_log" {
provider = aws.osaka
bucket = aws_s3_bucket.bucket_log.id
rule {
object_ownership = "BucketOwnerPreferred"
}
}
# OpenSearch Snapshot bucket
resource "aws_s3_bucket" "osearch_snapshot" {
provider = aws.osaka
bucket = "${var.app_name}-${var.env_name}-snapshot"
tags = {
Name = "${var.app_name}-${var.env_name}-snapshot"
Environment = "${var.env_name}"
System = "${var.app_name}"
}
}
resource "aws_s3_bucket_acl" "osearch_snapshot" {
provider = aws.osaka
bucket = aws_s3_bucket.osearch_snapshot.id
acl = "private"
}
resource "aws_s3_bucket_server_side_encryption_configuration" "osearch_snapshot" {
provider = aws.osaka
bucket = aws_s3_bucket.osearch_snapshot.bucket
rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.kms_s3_osaka[0].arn
sse_algorithm = "aws:kms"
}
bucket_key_enabled = true
}
}
resource "aws_s3_bucket_ownership_controls" "osearch_snapshot" {
provider = aws.osaka
bucket = aws_s3_bucket.osearch_snapshot.id
rule {
object_ownership = "BucketOwnerPreferred"
}
}
output "s3_osearch_snapshot_bucket_name" {
value = aws_s3_bucket.osearch_snapshot.bucket
}
data "aws_region" "current" {}
data "aws_caller_identity" "current" {}
data "aws_route53_zone" "opensearch" {
count = var.custom_endpoint_enabled ? 1 : 0
name = var.cluster_domain
}
resource "aws_iam_service_linked_role" "osearch" {
count = var.create_opensearch && var.create_iam_service_linked_role ? 1 : 0
aws_service_name = "opensearchservice.amazonaws.com"
}
resource "aws_opensearch_domain" "opensearch" {
count = var.create_opensearch ? 1 : 0
domain_name = "${var.app_name}-${var.env_name}-domain"
engine_version = var.osearch_engine_version
access_policies = <<CONFIG
{
"Version": "2012-10-17",
"Statement": [
{
"Action": "es:*",
"Principal": "*",
"Effect": "Allow",
"Resource": "arn:aws:es:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:domain/*"
}
]
}
CONFIG
cluster_config {
dedicated_master_enabled = var.master_instance_enabled
dedicated_master_count = var.master_instance_enabled ? var.master_instance_count : null
dedicated_master_type = var.master_instance_enabled ? var.master_instance_type : null
instance_count = var.hot_instance_count
instance_type = var.hot_instance_type
warm_enabled = var.warm_instance_enabled
warm_count = var.warm_instance_enabled ? var.warm_instance_count : null
warm_type = var.warm_instance_enabled ? var.warm_instance_type : null
zone_awareness_enabled = (var.availability_zones > 1) ? true : false
dynamic "zone_awareness_config" {
for_each = (var.availability_zones > 1) ? [var.availability_zones] : []
content {
availability_zone_count = zone_awareness_config.value
}
}
cold_storage_options {
enabled = var.enable_cold_storage_options
}
}
### If you need anonymous authentication, in the first time you run `terraform apply`, change enabled in advanced_security_options to `false` and use null_resource.enable_aunonymous_auth to enable anonymous authentication.
### After the first successfully apply, change enabled to `true` so that Terraform do not recreate OpenSearch domain!
### This behaviour cause: anonymous authentication is only enabled when we do not create OpenSearch domain with advanced_security_options enabled.
### Please change it to false in the first apply to avoid error & recreation.
advanced_security_options {
enabled = true
internal_user_database_enabled = true
master_user_options {
master_user_name = var.master_user_name
master_user_password = var.master_user_password
}
}
domain_endpoint_options {
enforce_https = var.enforce_https
tls_security_policy = var.tls_security_policy
custom_endpoint_enabled = var.custom_endpoint_enabled
custom_endpoint = var.custom_endpoint_enabled ? "${var.cluster_name}.${data.aws_route53_zone.opensearch[0].name}" : ""
custom_endpoint_certificate_arn = var.custom_endpoint_enabled ? var.custom_endpoint_certificate_arn : ""
}
node_to_node_encryption {
enabled = true
}
encrypt_at_rest {
enabled = true
kms_key_id = var.encrypt_kms_key_id
}
dynamic "vpc_options" {
for_each = var.vpc_enabled ? [true] : []
content {
security_group_ids = ["${aws_security_group.opensearch_sg.id}"]
subnet_ids = ["${aws_subnet.private[0].id}"]
}
}
# Note that the values for these configuration options must be strings (wrapped in quotes)
# or they may be wrong and cause a perpetual diff, causing Terraform to want to recreate your OpenSearch domain on every apply.
advanced_options = {
"rest.action.multi.allow_explicit_index" = "true"
}
dynamic "ebs_options" {
for_each = var.ebs_enabled ? [true] : []
content {
ebs_enabled = true
volume_size = var.ebs_volume_size
volume_type = var.ebs_volume_type
throughput = var.ebs_throughput
iops = var.ebs_iops
}
}
dynamic "log_publishing_options" {
for_each = var.log_publishing_options_enabled ? [true] : []
content {
enabled = true
cloudwatch_log_group_arn = var.osearch_log_cw_log_group_arn
log_type = var.osearch_log_publishing_type
}
}
dynamic "auto_tune_options" {
for_each = var.enable_auto_tune_options ? [true] : []
content {
desired_state = var.oseach_autotune_desired_state
rollback_on_disable = var.oseach_autotune_rollback
maintenance_schedule {
start_at = var.autotune_maintenance_start
duration {
value = var.autotune_maintenance_duration_value
unit = var.autotune_maintenance_duration_unit
}
cron_expression_for_recurrence = var.autotune_maintenance_cron_expression
}
}
}
dynamic "cognito_options" {
for_each = var.enable_cognito_options ? [true] : []
content {
enabled = true
identity_pool_id = var.osearch_identity_pool_id
role_arn = var.osearch_cognito_role_arn
user_pool_id = var.osearch_user_pool_id
}
}
depends_on = [aws_iam_service_linked_role.osearch]
}
resource "null_resource" "enable_aunonymous_auth" {
## To advoid: Error creating OpenSearch domain: ValidationException: Cannot enable anonymous auth during domain creation.
## Command to enable: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html
### and: https://docs.aws.amazon.com/cli/latest/reference/opensearch/update-domain-config.html
count = var.create_opensearch && var.enable_anonymous_auth ? 1 : 0
provisioner "local-exec" {
command = "aws opensearch update-domain-config --domain-name ${var.app_name}-${var.env_name}-domain --advanced-security-options file://config_files/osearch.json"
on_failure = continue
}
depends_on = [
aws_opensearch_domain.opensearch
]
}
output "cluster_name" {
description = "The name of the OpenSearch cluster."
value = aws_opensearch_domain.opensearch[0].domain_name
}
output "cluster_endpoint" {
description = "The endpoint URL of the OpenSearch cluster."
value = "https://${aws_opensearch_domain.opensearch[0].endpoint}"
}
output "dashboard_endpoint" {
description = "The endpoint URL of the OpenSearch dashboards."
value = "https://${aws_opensearch_domain.opensearch[0].endpoint}/_dashboards/"
}
data "archive_file" "ossnapshot_get" {
type = "zip"
source_file = "lambda/ossnapshot_get/lambda_function.py"
output_path = "lambda/upload/ossnapshot_get.zip"
}
resource "aws_lambda_function" "ossnapshot_get" {
filename = data.archive_file.ossnapshot_get.output_path
function_name = "${var.app_name}-${var.env_name}-get"
role = aws_iam_role.snapshot_role.arn
handler = var.options_ossnapshot_get["handler"]
source_code_hash = data.archive_file.ossnapshot_get.output_base64sha256
runtime = var.options_ossnapshot_get["runtime"]
memory_size = var.options_ossnapshot_get["memory_size"]
timeout = var.options_ossnapshot_get["timeout"]
layers = ["${aws_lambda_layer_version.ossnapshot_get.arn}"]
vpc_config {
subnet_ids = aws_subnet.private[*].id
security_group_ids = ["${aws_security_group.lambda_sg.id}"]
}
dynamic "environment" {
for_each = length(keys(var.get_environment_variables)) == 0 ? [] : [true]
content {
variables = var.get_environment_variables
}
}
depends_on = [
aws_iam_role_policy_attachment.snapshot_role
]
}
resource "aws_lambda_permission" "ossnapshot_get" {
statement_id = var.options_ossnapshot_get["statement_id"]
action = var.options_ossnapshot_get["action"]
function_name = aws_lambda_function.ossnapshot_get.function_name
principal = var.options_ossnapshot_get["principal"]
source_arn = var.options_ossnapshot_get["source_arn"]
}
data "archive_file" "ossnapshot_delete" {
type = "zip"
source_file = "lambda/ossnapshot_delete/lambda_function.py"
output_path = "lambda/upload/ossnapshot_delete.zip"
}
resource "aws_lambda_function" "ossnapshot_delete" {
filename = data.archive_file.ossnapshot_delete.output_path
function_name = "${var.app_name}-${var.env_name}-delete"
role = aws_iam_role.snapshot_role.arn
handler = var.options_ossnapshot_delete["handler"]
source_code_hash = data.archive_file.ossnapshot_delete.output_base64sha256
runtime = var.options_ossnapshot_delete["runtime"]
memory_size = var.options_ossnapshot_delete["memory_size"]
timeout = var.options_ossnapshot_delete["timeout"]
layers = ["${aws_lambda_layer_version.ossnapshot_delete.arn}"]
vpc_config {
subnet_ids = aws_subnet.private[*].id
security_group_ids = ["${aws_security_group.lambda_sg.id}"]
}
dynamic "environment" {
for_each = length(keys(var.delete_environment_variables)) == 0 ? [] : [true]
content {
variables = var.delete_environment_variables
}
}
depends_on = [
aws_iam_role_policy_attachment.snapshot_role
]
}
resource "aws_lambda_permission" "ossnapshot_delete" {
statement_id = var.options_ossnapshot_delete["statement_id"]
action = var.options_ossnapshot_delete["action"]
function_name = aws_lambda_function.ossnapshot_delete.function_name
principal = var.options_ossnapshot_delete["principal"]
source_arn = var.options_ossnapshot_delete["source_arn"]
}
resource "aws_lambda_layer_version" "ossnapshot_delete" {
filename = "lambda/requirements/oss_delete/python.zip"
layer_name = "ossnapshot_delete_packages"
description = "ossnapshot_delete_packages"
compatible_runtimes = ["python3.8", "python3.9"]
compatible_architectures = ["arm64", "x86_64"]
}
resource "aws_lambda_layer_version" "ossnapshot_get" {
filename = "lambda/requirements/oss_get/python.zip"
layer_name = "ossnapshot_get_packages"
description = "ossnapshot_get_packages"
compatible_runtimes = ["python3.8", "python3.9"]
}
output "ossnapshot_get_layer_arn" {
value = aws_lambda_layer_version.ossnapshot_get.arn
}
output "ossnapshot_get_layer_version" {
value = aws_lambda_layer_version.ossnapshot_get.version
}
output "ossnapshot_delete_layer_arn" {
value = aws_lambda_layer_version.ossnapshot_delete.arn
}
output "ossnapshot_delete_layer_version" {
value = aws_lambda_layer_version.ossnapshot_delete.version
}
data "template_file" "kms_s3_policy" {
count = var.create_kms_s3 ? 1 : 0
template = file("kms_policies/kms_s3_policy.json")
vars = {
app_name = var.app_name
env_name = var.env_name
account_id = var.account_id
region = var.region
}
}
resource "aws_kms_key" "kms_s3_osaka" {
provider = aws.osaka
count = var.create_kms_s3 && var.create_in_osaka ? 1 : 0
is_enabled = true
enable_key_rotation = true
policy = data.template_file.kms_s3_policy[0].rendered
resource "aws_kms_alias" "kms_s3_osaka" {
provider = aws.osaka
count = var.create_kms_s3 ? 1 : 0
name = "alias/${var.app_name}-${var.env_name}-kms-s3"
target_key_id = aws_kms_key.kms_s3_osaka[0].id
}
#####################################
# IAM Policies
#####################################
resource "aws_iam_policy" "osearch_snapshot_operations" {
name = "${var.app_name}-${var.env_name}-os"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"es:ESHttpPost",
"es:ESHttpGet",
"es:ESHttpPut",
"es:ESHttpDelete"
],
"Resource": "${aws_opensearch_domain.opensearch[0].arn}/*"
}
]
}
EOF
}
resource "aws_iam_policy" "passrole" {
name = "${var.app_name}-${var.env_name}-passrole"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"iam:GetRole",
"iam:PassRole"
],
"Resource": "*"
}
]
}
EOF
}
resource "aws_iam_policy" "logs" {
name = "${var.app_name}-${var.env_name}-logs"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"logs:DescribeLogStreams",
"logs:CreateLogGroup",
"logs:DescribeLogStreams",
"logs:CreateLogStream",
"logs:PutLogEvents",
"logs:GetLogEvents"
],
"Resource": "*"
}
]
}
EOF
}
data "aws_iam_policy" "aws_lambda_vpc_access_execution" {
name = "AWSLambdaVPCAccessExecutionRole"
}
resource "aws_iam_policy" "bucket_snapshot" {
name = "${var.app_name}-${var.env_name}-bucket-snapshot"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"s3:Get*",
"s3:List*",
"s3:Put*",
"s3:DeleteObject",
"s3:AbortMultipartUpload"
],
"Resource": [
"arn:aws:s3:::${aws_s3_bucket.osearch_snapshot.bucket}",
"arn:aws:s3:::${aws_s3_bucket.osearch_snapshot.bucket}/*"
]
},
{
"Action": [
"kms:Decrypt",
"kms:GenerateDataKey"
],
"Effect": "Allow",
"Resource": [
"*"
],
"Sid": "VisualEditor0"
}
]
}
EOF
}
#####################################
# IAM Roles
#####################################
locals {
snapshot_role_policy_arn_list = [data.aws_iam_policy.aws_lambda_vpc_access_execution.arn, aws_iam_policy.logs.arn, aws_iam_policy.bucket_snapshot.arn, aws_iam_policy.osearch_snapshot_operations.arn, aws_iam_policy.passrole.arn]
data "aws_iam_policy_document" "snapshot_role" {
statement {
effect = "Allow"
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com", "ssm.amazonaws.com","es.amazonaws.com"]
}
}
}
resource "aws_iam_role" "snapshot_role" {
name = "${var.app_name}-${var.env_name}-snapshot-role"
assume_role_policy = data.aws_iam_policy_document.snapshot_role.json
}
resource "aws_iam_role_policy_attachment" "snapshot_role" {
count = length(local.snapshot_role_policy_arn_list)
role = aws_iam_role.snapshot_role.name
policy_arn = element(local.snapshot_role_policy_arn_list, count.index)
}
data "aws_iam_role" "osearch_service_role" {
name = "AWSServiceRoleForAmazonOpenSearchService"
}
output "osearch_snapshot_lambda_role_arn" {
value = aws_iam_role.snapshot_role.arn
}
#####################################
# EC2
#####################################
## NAT instance
resource "aws_instance" "nat_instance" {
count = var.enable_nat_instance ? 1 : 0
ami = var.nat_instance_ami
availability_zone = var.az_a
instance_type = var.nat_instance_type
disable_api_termination = "true"
key_name = "${var.app_name}-${var.env_name}-key"
subnet_id = aws_subnet.public[0].id
source_dest_check = "false"
credit_specification {
cpu_credits = "standard"
}
vpc_security_group_ids = [
aws_security_group.ec2_nat_sg[0].id,
]
}
#################################
# CloudWatch Logs & event lambda
#################################
## ossnapshot_get
resource "aws_cloudwatch_event_rule" "ossnapshot_get" {
name = "${var.app_name}-${var.env_name}-get"
description = "${var.app_name}-${var.env_name}-get"
schedule_expression = var.options_ossnapshot_get["schedule_expression"]
}
resource "aws_cloudwatch_event_target" "ossnapshot_get" {
rule = aws_cloudwatch_event_rule.ossnapshot_get.name
target_id = aws_lambda_function.ossnapshot_get.id
arn = aws_lambda_function.ossnapshot_get.arn
}
resource "aws_cloudwatch_log_group" "ossnapshot_get" {
name = "/aws/lambda/${var.app_name}-${var.env_name}-get-snapshot"
retention_in_days = var.options_ossnapshot_get["retention_in_days"]
}
## ossnapshot_delete
resource "aws_cloudwatch_event_rule" "ossnapshot_delete" {
name = "${var.app_name}-${var.env_name}-delete"
description = "${var.app_name}-${var.env_name}-delete"
schedule_expression = var.options_ossnapshot_delete["schedule_expression"]
}
resource "aws_cloudwatch_event_target" "ossnapshot_delete" {
rule = aws_cloudwatch_event_rule.ossnapshot_delete.name
target_id = aws_lambda_function.ossnapshot_delete.id
arn = aws_lambda_function.ossnapshot_delete.arn
}
resource "aws_cloudwatch_log_group" "ossnapshot_delete" {
name = "/aws/lambda/${var.app_name}-${var.env_name}-delete-snapshot"
retention_in_days = var.options_ossnapshot_delete["retention_in_days"]
}
#####################################
# Provider Settings
#####################################
provider "aws" {
region = var.region
}
provider "aws" {
region = var.dr_region
alias = "osaka"
}
#####################################
# Variable Settings
#####################################
#####################################
# AWS Settings
#####################################
variable "region" {
type = string
default = "ap-northeast-1"
}
variable "dr_region" {
type = string
default = "ap-northeast-3"
}
#####################################
# App Name
#####################################
variable "app_name" {
}
#####################################
# Env Name
#####################################
variable "env_name" {
}
#####################################
# Account ID
#####################################
variable "account_id" {
}
#####################################
# Segment Settings
#####################################
variable "root_cidr" {
}
#####################################
# AZ
#####################################
variable "az_a" {
}
variable "az_b" {
}
variable "az_c" {
}
variable "az_d" {
}
#####################################
# Subnet
#####################################
variable "public_subnets" {
type = map(any)
default = {
}
}
variable "private_subnets" {
type = map(any)
default = {
}
}
#####################################
# NAT
#####################################
variable "enable_natgw" {
type = bool
default = false
}
variable "enable_nat_instance" {
type = bool
default = true
}
#####################################
# DHCP Option Set
#####################################
variable "create_custom_dhcp" {
type = bool
default = false
}
variable "dhcp_option_domain" {
}
#####################################
# VPC endpoint
#####################################
variable "create_gateway_vpce" {
type = bool
default = true
}
variable "gateway_endpoint_services" {
}
variable "create_interface_vpce" {
type = bool
default = false
}
variable "interface_endpoint_services" {
}
#####################################
# EC2
#####################################
variable "nat_instance_type" {
}
variable "nat_instance_ami" {
}
#####################################
# SG
#####################################
variable "security_group_common_egress" {
}
variable "ec2_nat_sg_cidr_ingress" {
}
variable "vpce_sg_cidr_ingress" {
}
variable "opensearch_sg_sgid_ingress" {
default = {}
}
variable "lambda_sg_outbound_destination" {
default = {}
}
#####################################
# KMS
#####################################
variable "create_kms_s3" {
type = bool
default = true
}
variable "create_in_osaka" {
type = bool
default = false
}
#####################################
# OpenSearch
#####################################
variable "cluster_name" {
type = string
default = "opensearch"
}
variable "create_opensearch" {
type = bool
default = true
}
variable "custom_endpoint_enabled" {
type = bool
default = false
}
variable "create_iam_service_linked_role" {
type = bool
default = true
}
variable "master_instance_enabled" {
type = bool
default = false
}
variable "warm_instance_enabled" {
type = bool
default = false
}
variable "enforce_https" {
type = bool
default = true
}
variable "vpc_enabled" {
description = "Indicates whether the cluster is running inside a VPC."
type = bool
default = true
}
variable "ebs_enabled" {
description = "Indicates whether attach EBS volumes to the data nodes."
type = bool
default = true
}
variable "osearch_engine_version" {
type = string
default = "OpenSearch_2.3"
}
variable "cluster_domain" {
type = string
default = "opensearch"
}
variable "master_instance_count" {
description = "The number of dedicated master nodes in the cluster."
type = number
default = 3
}
variable "master_instance_type" {
description = "The type of EC2 instances to run for each master node. "
type = string
default = "m6g.large.search"
}
variable "hot_instance_count" {
description = "The number of dedicated hot nodes in the cluster."
type = number
default = 3
}
variable "hot_instance_type" {
description = "The type of EC2 instances to run for each hot node."
type = string
default = "t3.medium.search"
}
variable "warm_instance_count" {
description = "The number of dedicated warm nodes in the cluster."
type = number
default = 3
}
variable "warm_instance_type" {
type = string
default = "c6g.large.search" ## The T3 instance types do not support UltraWarm storage, cold storage, or Auto-Tune.
}
variable "availability_zones" {
description = "The number of availability zones for the OpenSearch cluster. Valid values: 1, 2 or 3."
type = number
default = 3
}
variable "master_user_name" {
type = string
default = ""
}
variable "master_user_password" {
type = string
default = ""
}
variable "tls_security_policy" {
type = string
default = "Policy-Min-TLS-1-2-2019-07"
}
variable "custom_endpoint_certificate_arn" {
type = string
default = ""
}
variable "encrypt_kms_key_id" {
description = "The KMS key ID to encrypt the OpenSearch cluster with. If not specified, then it defaults to using the AWS OpenSearch Service KMS key."
type = string
default = ""
}
variable "ebs_volume_size" {
description = "The size of EBS volumes attached to data nodes (in GiB)."
type = number
default = 10
}
variable "ebs_volume_type" {
description = "The type of EBS volumes attached to data nodes."
type = string
default = "gp3"
}
variable "ebs_throughput" {
description = "The throughput (in MiB/s) of the EBS volumes attached to data nodes. Valid values are between 125 and 1000."
type = number
default = 125
}
variable "ebs_iops" {
description = "The baseline input/output (I/O) performance of EBS volumes attached to data nodes."
type = number
default = 3000
}
variable "enable_anonymous_auth" {
type = bool
default = false
}
variable "log_publishing_options_enabled" {
type = bool
default = false
}
variable "osearch_log_cw_log_group_arn" {
type = string
default = ""
}
variable "osearch_log_publishing_type" {
description = " Type of OpenSearch log. Valid values: INDEX_SLOW_LOGS, SEARCH_SLOW_LOGS, ES_APPLICATION_LOGS, AUDIT_LOGS."
type = string
default = "AUDIT_LOGS"
}
variable "enable_auto_tune_options" {
type = bool
default = false
}
variable "oseach_autotune_desired_state" {
description = " Auto-Tune desired state for the domain. Valid values: ENABLED or DISABLED."
default = "DISABLED"
}
variable "autotune_maintenance_start" {
description = "Date and time at which to start the Auto-Tune maintenance schedule in RFC3339 format. (e.g. 2021-12-19)"
type = string
default = ""
}
variable "autotune_maintenance_duration_value" {
description = "An integer specifying the value of the duration of an Auto-Tune maintenance window."
type = number
default = 1
}
variable "autotune_maintenance_duration_unit" {
description = "Unit of time specifying the duration of an Auto-Tune maintenance window. Valid values: HOURS"
type = string
default = "HOURS"
}
variable "autotune_maintenance_cron_expression" {
description = "A cron expression specifying the recurrence pattern for an Auto-Tune maintenance schedule. E.g. cron(0 12 * * ? *)"
type = string
default = ""
}
variable "oseach_autotune_rollback" {
description = "Whether to roll back to default Auto-Tune settings when disabling Auto-Tune. Valid values: DEFAULT_ROLLBACK or NO_ROLLBACK"
type = string
default = "NO_ROLLBACK"
}
variable "enable_cognito_options" {
type = bool
default = false
}
variable "osearch_identity_pool_id" {
description = "ID of the Cognito Identity Pool to use"
type = string
default = ""
}
variable "search_cognito_role_arn" {
description = "ARN of the IAM role that has the AmazonOpenSearchServiceCognitoAccess policy attached."
type = string
default = ""
}
variable "osearch_user_pool_id" {
description = "ID of the Cognito User Pool to use."
type = string
default = ""
}
variable "enable_cold_storage_options" {
description = "Boolean to enable cold storage for an OpenSearch domain. Defaults to false. Master and ultrawarm nodes must be enabled for cold storage."
type = bool
default = false
}
#####################################
# Lambda
#####################################
variable "options_ossnapshot_get" {
default = {}
}
variable "options_ossnapshot_delete" {
default = {}
}
variable "get_environment_variables" {
default = {}
}
variable "delete_environment_variables" {
default = {}
}
s3_polices/bucketlog.json
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "S3ServerAccessLogsPolicy",
"Effect": "Allow",
"Principal": {
"Service": "logging.s3.amazonaws.com"
},
"Action": [
"s3:PutObject"
],
"Resource": "arn:aws:s3:::${bucket}/s3/*",
"Condition": {
"ArnLike": {
"aws:SourceArn": "arn:aws:s3:::${osearch_snapshot_bucket}"
},
"StringEquals": {
"aws:SourceAccount": "${account_id}"
}
}
}
]
}
kms_policies/kms_s3_policy.json
{
"Version": "2012-10-17",
"Id": "${app_name}-${env_name}-kms-s3",
"Statement": [
{
"Sid": "Allow access through S3 for all principals in the account that are authorized to use S3",
"Effect": "Allow",
"Principal": {
"AWS": "*"
},
"Action": [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
],
"Resource": "*",
"Condition": {
"StringEquals": {
"kms:ViaService": "s3.${region}.amazonaws.com",
"kms:CallerAccount": "${account_id}"
}
}
},
{
"Sid": "Allow direct access to key metadata to the account",
"Effect": "Allow",
"Principal": {
"AWS": "arn:aws:iam::${account_id}:root"
},
"Action": [
"kms:*"
],
"Resource": "*"
}
]
}
sample terraform.tfvars
#####################################
# AWS Settings
#####################################
region = "ap-northeast-1"
dr_region = "ap-northeast-3"
#####################################
# App Name
#####################################
app_name = "xxxx"
#####################################
# Env Name
#####################################
env_name = "xxxx"
#####################################
# Account ID
#####################################
account_id = "xxxxxxxxxxx"
#####################################
# Segment Settings
#####################################
root_cidr = "xx.xx.xx.xx/xx"
public_subnets = {
ap-northeast-1a = "xx.xx.xx.xx/xx"
ap-northeast-1c = "xx.xx.xx.xx/xx"
}
private_subnets = {
ap-northeast-1a = "xx.xx.xx.xx/xx"
ap-northeast-1c = "xx.xx.xx.xx/xx"
}
#####################################
# AZ
#####################################
az_a = "ap-northeast-1a"
az_b = "ap-northeast-1b"
az_c = "ap-northeast-1c"
az_d = "ap-northeast-1d"
#####################################
# NAT
#####################################
enable_natgw = false
enable_nat_instance = true
#####################################
# DHCP Option Set
#####################################
create_custom_dhcp = false
dhcp_option_domain = ""
#####################################
# VPC endpoint
#####################################
create_gateway_vpce = true
gateway_endpoint_services = ["com.amazonaws.ap-northeast-1.s3"]
create_interface_vpce = true
interface_endpoint_services = ["com.amazonaws.ap-northeast-1.logs", "com.amazonaws.ap-northeast-1.monitoring", "com.amazonaws.ap-northeast-1.kms"]
#####################################
# EC2
#####################################
nat_instance_type = "t3.nano"
nat_instance_ami = "ami-04f90b458b992deda"
#####################################
# SG
#####################################
## common egress
security_group_common_egress = {
security_group_common_egress_001 = {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
description = "all"
}
}
## nat instance
ec2_nat_sg_cidr_ingress = {
ec2_nat_sg_cidr_ingress_001 = {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
description = "Private subnet CIDR"
}
ec2_nat_sg_cidr_ingress_002 = {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
description = "Private subnet CIDR"
}
ec2_nat_sg_cidr_ingress_003 = {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
description = "Private subnet CIDR"
}
ec2_nat_sg_cidr_ingress_004 = {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
description = "Private subnet CIDR"
}
}
## vpce
vpce_sg_cidr_ingress = {
vpce_sg_cidr_ingress_001 = {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
description = "Private subnet CIDR"
}
vpce_sg_cidr_ingress_002 = {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["xx.xx.xx.xx/xx", "xx.xx.xx.xx/xx"]
description = "Private subnet CIDR"
}
}
## OpenSearch
opensearch_sg_sgid_ingress = {
opensearch_sg_sgid_ingress_01 = {
from_port = 443
to_port = 443
protocol = "tcp"
security_group = "security_group_lambda"
description = "security group lambda"
}
}
# Lambda
lambda_sg_outbound_destination = {
lambda_sg_outbound_destination_001 = {
from_port = 443
to_port = 443
protocol = "tcp"
security_group = "security_group_opensearch"
description = "domain sg"
}
}
#####################################
# KMS
#####################################
create_kms_s3 = true
create_in_osaka = true
#####################################
# OpenSearch
#####################################
create_iam_service_linked_role = false
create_opensearch = true
hot_instance_count = 1
hot_instance_type = "t3.medium.search"
availability_zones = 1
master_user_name = "xxxxx"
master_user_password = "xxxxxxxxxx"
enforce_https = true
tls_security_policy = "Policy-Min-TLS-1-2-2019-07"
ebs_enabled = true
ebs_volume_size = 10
enable_anonymous_auth = false
#####################################
# Lambda
#####################################
options_ossnapshot_get = {
handler = "lambda_function.lambda_handler"
runtime = "python3.9"
schedule_expression = "cron(00 01 ? * MON-FRI *)"
memory_size = 512
timeout = 900
retention_in_days = 1827
statement_id = "get-permission"
action = "lambda:InvokeFunction"
principal = "events.amazonaws.com"
source_arn = "arn:aws:events:ap-northeast-1:830427153490:rule/*"
}
options_ossnapshot_delete = {
handler = "lambda_function.lambda_handler"
runtime = "python3.9"
schedule_expression = "cron(00 01 ? * MON-FRI *)"
memory_size = 512
timeout = 900
retention_in_days = 1827
statement_id = "delete-permission"
action = "lambda:InvokeFunction"
principal = "events.amazonaws.com"
source_arn = "arn:aws:events:ap-northeast-1:830427153490:rule/*"
}
get_environment_variables = {
DOMAIN_HOST = "https://xxxxxxxx-ogx3cplo22kfe6gdffgdhfghdfghpq.ap-northeast-1.es.amazonaws.com/"
DOMAIN_REGION = "ap-northeast-1"
REPOSITORY_PATH = "_snapshot/manual"
SNAPSHOT_PREFIX = "manual-snapshot-test"
BUCKET_NAME = "snapshot"
DOMAIN_SNAPSHOT_ROLE_ARN = "arn:aws:iam::xxxxxxx:role/xxxxxxxxxxxx"
}
delete_environment_variables = {
DOMAIN_HOST = "xxxxxx-ogx3cplo22kfe662aa62w4afpq.ap-northeast-1.es.amazonaws.com"
DOMAIN_REGION = "ap-northeast-1"
REPOSITORY_PATH = "_snapshot/manual/"
SNAPSHOT_PREFIX = "manual-snapshot-test"
REPOSITORY_NAME = "manual"
UNIT = "minutes"
UNIT_COUNT = 1
}
How to resolve the error: “groupSize must be greater than 0 but was -2” when trying to delete OpenSearch snapshot
The first time I create the create snapshot Lambda function, I have set the REPOSITORY_PATH = “_snapshot/manual/” in the environment variable of the Lambda function. And the payload:
payload = {
"type": "s3",
"settings": {
"bucket": bucket_name,
"base_path": repository_path,
"endpoint": "s3.amazonaws.com",
"role_arn": role_arn
}
}
actually will become:
payload = {
"type": "s3",
"settings": {
"bucket": bucket_name,
"base_path": "_snapshot/manual/",
"endpoint": "s3.amazonaws.com",
"role_arn": role_arn
}
}
You can see that there is a trailing slash after the repository name “manual”. This will not cause any error with the get snapshot function.
However, when I tried to delete the snapshot:
There is an error “groupSize must be greater than 0 but was -2”. Even though the snapshot is deleted. This error does not show if there is only 1 snapshot in my repository, but show if there are more than 1 snapshots here.
After several times to debug. It turns out that the trailing slash inside my payload in get snapshot function is the evil. I have removed it (“_snapshot/manual/” -> “_snapshot/manual”) and then the function work fine: