From d40f226888ab9bef05ad0d4d5a0416b561a5ce0c Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 3 Apr 2026 13:51:30 -0600 Subject: [PATCH 01/16] Build log destination and let fluent-bit forward to CWLogs in dev --- .../filesystem/etc/fluent-bit/fluent-bit.yaml | 28 +++++++++++++++++++ pulumi/__main__.py | 11 ++++++++ pulumi/config.dev.yaml | 15 +++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml index aa4b010..569bdaf 100644 --- a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml +++ b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml @@ -18,6 +18,11 @@ pipeline: tag: untagged port: 1337 threaded: true + + # Log shippers should override the tag with the name of the application + - name: forward + tag: cloudwatch.untagged + port: 24224 filters: # Run a Lua script that breaks all events in a single Stalwart telemetry payload into multiple @@ -45,3 +50,26 @@ pipeline: tls: on uri: /batch format: json_lines + + # Send logs onward to CloudWatch. Log groups by the derived name must pre-exist, and this + # service must have sufficient IAM permissions to create log streams and post events to them. + - name: cloudwatch_logs + match: cloudwatch.mailstrom.mail + log_group_name: /tb/${ENV}/mailstrom + log_stream_name: mail + region: eu-central-1 + # log_key: systemd message field? + + - name: cloudwatch_logs + match: cloudwatch.mailstrom.api + log_group_name: /tb/${ENV}/mailstrom + log_stream_name: api + region: eu-central-1 + # log_key: systemd message field? + + - name: cloudwatch_logs + match: cloudwatch.untagged + log_group_name: /tb/${ENV}/observability + log_stream_name: untagged + region: eu-central-1 + # log_key: systemd message field? \ No newline at end of file diff --git a/pulumi/__main__.py b/pulumi/__main__.py index f84bcb1..2f75194 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -10,6 +10,7 @@ """ import tb_pulumi +import tb_pulumi.cloudwatch import tb_pulumi.fargate import tb_pulumi.network import tb_pulumi.secrets @@ -36,6 +37,16 @@ **psm_opts, ) + logdest_opts = resources.get('tb:cloudwatch:LogDestination', {}) + logdests = { + logdest_name: tb_pulumi.cloudwatch.LogDestination( + f'{project.name_prefix}-logdest-{logdest_name}', + project=project, + **logdest_config, + ) + for logdest_name, logdest_config in logdest_opts.items() + } + vpc_config = resources.get('tb:network:MultiCidrVpc', {}).get('fluentbit', {}) vpc_fluentbit = tb_pulumi.network.MultiCidrVpc( f'{project.name_prefix}-vpc-fluentbit', diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index e29a67a..b845aff 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -10,6 +10,14 @@ resources: secret_names: - posthog_api_key + tb:cloudwatch:LogDestination: + observability: + log_group: + retention_in_days: 7 + log_streams: + untagged: untagged + org_name: tb + tb:network:MultiCidrVpc: fluentbit: # The observability project has all of 10.202.0.0/16 assigned to it, but let's not soak all @@ -78,7 +86,7 @@ resources: secrets: - name: POSTHOG_API_KEY valueFrom: arn:aws:secretsmanager:eu-central-1:768512802988:secret:observability/dev/posthog_api_key-e3UEK4 - image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:fdd1b4748cfaee29553ee2c83fcaa428b68ba8e88c2791e1626e282b48127b9d + image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:d7d656619957e4cc5df44d9b9004f63e81d3732c4bb16e10ba0297f1b58755fa logConfiguration: logDriver: awslogs options: @@ -160,6 +168,11 @@ resources: service: desired_count: 2 target: stalwart-metrics + + extra_policies: + fluentbit: + - arn:aws:iam::768512802988:policy/mailstrom-dev-stalwart-logs-write-access + - arn:aws:iam::768512802988:policy/observability-dev-observability-logs-write-access autoscalers: fluentbit: From 21bdf962c529bbf6875e518a7839c96260ddf28b Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Mon, 6 Apr 2026 11:46:25 -0600 Subject: [PATCH 02/16] Fix fluent-bit CWLogs match/group names --- fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml index 569bdaf..9628157 100644 --- a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml +++ b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml @@ -54,15 +54,15 @@ pipeline: # Send logs onward to CloudWatch. Log groups by the derived name must pre-exist, and this # service must have sufficient IAM permissions to create log streams and post events to them. - name: cloudwatch_logs - match: cloudwatch.mailstrom.mail - log_group_name: /tb/${ENV}/mailstrom + match: cloudwatch.stalwart.mail + log_group_name: /tb/${ENV}/stalwart log_stream_name: mail region: eu-central-1 # log_key: systemd message field? - name: cloudwatch_logs - match: cloudwatch.mailstrom.api - log_group_name: /tb/${ENV}/mailstrom + match: cloudwatch.stalwart.api + log_group_name: /tb/${ENV}/stalwart log_stream_name: api region: eu-central-1 # log_key: systemd message field? From 15016231530d5b8c963414b0d35ff45c798f4b8a Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Mon, 6 Apr 2026 14:36:15 -0600 Subject: [PATCH 03/16] Reconfigure fluent-bit service architecture --- pulumi/Pulumi.dev.yaml | 4 +++ pulumi/__main__.py | 11 +++++++ pulumi/config.dev.yaml | 73 +++++++++++++++++++++++++---------------- pulumi/requirements.txt | 1 + 4 files changed, 61 insertions(+), 28 deletions(-) diff --git a/pulumi/Pulumi.dev.yaml b/pulumi/Pulumi.dev.yaml index 6a30ae2..caf11ab 100644 --- a/pulumi/Pulumi.dev.yaml +++ b/pulumi/Pulumi.dev.yaml @@ -1,3 +1,7 @@ config: observability:posthog_api_key: secure: AAABAACLeD5lasJAmY66NyJXtacSmTSMj/PiXtmBNIHeBfLx2HA3mhTzyWkPZnD9j8MCYPbtnjJiWeZBzOROWVKEcKpuysV/FV5CDoHCJg== + observability:cloudflare_zone_id: + secure: AAABAKragv0vFq2i/lBhwJRTkD/wjW8jefzGy6Mq5A4eZubZLEeh4cSFESB+M3Fv34TvYNxJpFlT208EMqUQLw== + cloudflare:apiToken: + secure: AAABAJfLM7HTgF++SR/ps+pkQQFMNxc0XyRidPcCJKD2nzpc9mRnqdEguDnJlKKwtStygHkRT95D/6n568y+TUf/hkGut6P5 diff --git a/pulumi/__main__.py b/pulumi/__main__.py index 2f75194..07056fb 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -9,6 +9,7 @@ of any of those larger infrastructure patterns. """ +import pulumi_cloudflare as cloudflare import tb_pulumi import tb_pulumi.cloudwatch import tb_pulumi.fargate @@ -65,3 +66,13 @@ 'tb:fargate:AutoscalingFargateCluster' ).items() } + + # cloudflare_zone_id = project.pulumi_config.require_secret('cloudflare_zone_id') + # fluent_bit_dns = cloudflare.DnsRecord( + # f'{project.name_prefix}-dns-fluentbit', + # name='fluent-bit' if project.stack == 'prod' else f'fluent-bit-{project.stack}', + # content=ecs_clusters['fluentbit'].resources['load_balancers'], + # ttl=60, + # type='CNAME', + # zone_id=cloudflare_zone_id, + # ) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index b845aff..bdf1a26 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -45,15 +45,21 @@ resources: cluster: {} container_security_groups: - fluentbit: - fluentbit-http: + fluentbit: # Service + fluentbit: # Load Balancer rules: ingress: - - description: Allow traffic from the load balancer to the container + # Maddeningly, apostrophes aren't allowed in descriptions + - description: Allow traffic from the load balancer to the containers HTTP service # Sources are set in code protocol: tcp from_port: 1337 to_port: 1337 + - description: Allow traffic from the load balancer to the containers log forwarding service + # Sources are set in code + protocol: tcp + from_port: 24224 + to_port: 24224 egress: - description: Allow traffic from the container out to the Internet protocol: tcp @@ -69,7 +75,7 @@ resources: ssm_params: {} task_definitions: - fluentbit: + fluentbit: # Service container_definitions: - name: fluentbit environment: @@ -86,7 +92,7 @@ resources: secrets: - name: POSTHOG_API_KEY valueFrom: arn:aws:secretsmanager:eu-central-1:768512802988:secret:observability/dev/posthog_api_key-e3UEK4 - image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:d7d656619957e4cc5df44d9b9004f63e81d3732c4bb16e10ba0297f1b58755fa + image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:0bc8e6cdee4226f9c090ca22b08b3f6c61a5456a2090669414a43c52dd2580a8 logConfiguration: logDriver: awslogs options: @@ -107,8 +113,8 @@ resources: - FARGATE load_balancer_security_groups: - fluentbit-http: - description: Governs access to the fluent-bit-http load balancer in dev + fluentbit: # Load Balancer + description: Governs access to the fluentbit-http load balancer in dev rules: ingress: - from_port: 443 @@ -116,7 +122,13 @@ resources: protocol: tcp cidr_blocks: - 10.2.0.0/16 # stalwart-dev - description: Allow access from stalwart-dev + description: Allow access to telemetry forwarding service from stalwart-dev + - from_port: 24224 + to_port: 24224 + protocol: tcp + cidr_blocks: + - 10.2.0.0/16 # stalwart-dev + description: Allow access to log forwarding service from stalwart-dev egress: - from_port: 0 to_port: 65535 @@ -125,46 +137,51 @@ resources: - 0.0.0.0/0 load_balancers: - fluentbit-http: + fluentbit: enable_cross_zone_load_balancing: yes internal: yes ip_address_type: ipv4 - load_balancer_type: application - name: fluentbit-http-dev + load_balancer_type: network + name: obsv-dev-fluentbit preserve_host_header: yes targets: + cwlogs: + name: obsv-dev-fluent-logs + health_check: + port: 24224 + protocol: TCP + port: 24224 + protocol: TCP + target_type: ip + ip_address_type: ipv4 stalwart-metrics: - name: dev-telemetry-http + name: obsv-dev-fluent-telemetry health_check: - protocol: HTTP - # fluentbit is set up to throw away records submitted to this endpoint - path: /health/check port: 1337 - # I wish it wasn't like this. fluentbit will not accept non-POST methods, but these health checks cannot be - # made with custom methods. Therefore, we expect a 400 Bad Request as a sign of health. *shrug* - matcher: "400" + protocol: TCP port: 1337 - protocol: HTTP - # Next two options are required for ECS services; ref: - # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/alb.html + protocol: TCP target_type: ip ip_address_type: ipv4 listeners: - fluentbit-http: - stalwart-metrics: + fluentbit: # Load Balancer + cwlogs: # Target + port: 24224 + protocol: TCP + stalwart-metrics: # Target # This cert is for fluentbit-dev.tb.pro - certificate_arn: arn:aws:acm:eu-central-1:768512802988:certificate/04dd0573-a3cc-4c19-b483-a868876c63b0 + # certificate_arn: arn:aws:acm:eu-central-1:768512802988:certificate/04dd0573-a3cc-4c19-b483-a868876c63b0 port: 443 - protocol: HTTPS + protocol: TCP services: - fluentbit: + fluentbit: # Service assign_public_ip: yes - container_name: fluentbit + container_name: fluentbit # Name from container definition container_port: 1337 - load_balancer: fluentbit-http + load_balancer: fluentbit service: desired_count: 2 target: stalwart-metrics diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt index 3573c82..9715d89 100644 --- a/pulumi/requirements.txt +++ b/pulumi/requirements.txt @@ -1,3 +1,4 @@ requests>=2.32.5 +pulumi_cloudflare>=6.14.0,<7 tb_pulumi @ git+https://github.com/thunderbird/pulumi.git@main sdks/site24x7 From 01cdbac84bc0740e3edc647b77ad0421b1116cef Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Tue, 7 Apr 2026 11:42:41 -0600 Subject: [PATCH 04/16] Lint --- pulumi/__main__.py | 1 - pulumi/config.dev.yaml | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pulumi/__main__.py b/pulumi/__main__.py index 07056fb..e9ad261 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -9,7 +9,6 @@ of any of those larger infrastructure patterns. """ -import pulumi_cloudflare as cloudflare import tb_pulumi import tb_pulumi.cloudwatch import tb_pulumi.fargate diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index bdf1a26..19da8f5 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -39,7 +39,7 @@ resources: additional_routes: - destination_cidr_block: 10.2.0.0/16 # mailstrom-dev vpc_peering_connection_id: pcx-0d2027442f0e54ca4 - + tb:fargate:AutoscalingFargateCluster: fluentbit: cluster: {} @@ -96,12 +96,14 @@ resources: logConfiguration: logDriver: awslogs options: - awslogs-group: observability-dev-fargate-fluentbit-loggroup-fluentbit + awslogs-group: /tb/dev/observability awslogs-region: eu-central-1 - awslogs-stream-prefix: observability/dev/fluentbit/ + awslogs-stream-prefix: 'ecs' portMappings: - containerPort: 1337 protocol: tcp + - containerPort: 24224 + protocol: tcp restartPolicy: enabled: yes restartAttemptPeriod: 300 @@ -171,10 +173,10 @@ resources: port: 24224 protocol: TCP stalwart-metrics: # Target - # This cert is for fluentbit-dev.tb.pro - # certificate_arn: arn:aws:acm:eu-central-1:768512802988:certificate/04dd0573-a3cc-4c19-b483-a868876c63b0 port: 443 - protocol: TCP + protocol: TLS + # This cert is for fluentbit-dev.tb.pro + certificate_arn: arn:aws:acm:eu-central-1:768512802988:certificate/04dd0573-a3cc-4c19-b483-a868876c63b0 services: fluentbit: # Service @@ -184,7 +186,13 @@ resources: load_balancer: fluentbit service: desired_count: 2 - target: stalwart-metrics + targets: + - container_name: fluentbit + container_port: 24224 + target_name: cwlogs + - container_name: fluentbit + container_port: 1337 + target_name: stalwart-metrics extra_policies: fluentbit: From 43366a9144d06bb4803d3ff083bcc0fc16e89ecd Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Tue, 7 Apr 2026 14:24:47 -0600 Subject: [PATCH 05/16] Add user, domain name --- pulumi/__main__.py | 39 ++++++++++++++++++++++++++++++--------- pulumi/config.dev.yaml | 3 +-- pulumi/requirements.txt | 3 ++- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/pulumi/__main__.py b/pulumi/__main__.py index e9ad261..2d102e7 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -9,8 +9,11 @@ of any of those larger infrastructure patterns. """ +import pulumi_aws as aws +import pulumi_cloudflare as cloudflare import tb_pulumi import tb_pulumi.cloudwatch +import tb_pulumi.iam import tb_pulumi.fargate import tb_pulumi.network import tb_pulumi.secrets @@ -47,6 +50,24 @@ for logdest_name, logdest_config in logdest_opts.items() } + fluentbit_user = aws.iam.User( + f'{project.name_prefix}-user-fluentbit', + name=f'observability-{project.stack}-fluentbit', + tags=project.common_tags, + ) + + aws.iam.UserPolicyAttachment( + f'{project.name_prefix}-upa-obsv-logwrite', + policy_arn=logdests['observability'].resources['iam_policies']['write'], + user=fluentbit_user.id, + ) + + aws.iam.UserPolicyAttachment( + f'{project.name_prefix}-upa-mailstrom-logwrite', + policy_arn=f'arn:aws:iam::768512802988:policy/mailstrom-{project.stack}-stalwart-logs-write-access', + user=fluentbit_user.id, + ) + vpc_config = resources.get('tb:network:MultiCidrVpc', {}).get('fluentbit', {}) vpc_fluentbit = tb_pulumi.network.MultiCidrVpc( f'{project.name_prefix}-vpc-fluentbit', @@ -66,12 +87,12 @@ ).items() } - # cloudflare_zone_id = project.pulumi_config.require_secret('cloudflare_zone_id') - # fluent_bit_dns = cloudflare.DnsRecord( - # f'{project.name_prefix}-dns-fluentbit', - # name='fluent-bit' if project.stack == 'prod' else f'fluent-bit-{project.stack}', - # content=ecs_clusters['fluentbit'].resources['load_balancers'], - # ttl=60, - # type='CNAME', - # zone_id=cloudflare_zone_id, - # ) + cloudflare_zone_id = project.pulumi_config.require_secret('cloudflare_zone_id') + fluent_bit_dns = cloudflare.DnsRecord( + f'{project.name_prefix}-dns-fluentbit', + name='fluentbit' if project.stack == 'prod' else f'fluentbit-{project.stack}', + content=ecs_clusters['fluentbit'].resources['load_balancers']['fluentbit'].dns_name, + ttl=60, + type='CNAME', + zone_id=cloudflare_zone_id, + ) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index 19da8f5..fe6dc89 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -38,7 +38,7 @@ resources: - secretsmanager additional_routes: - destination_cidr_block: 10.2.0.0/16 # mailstrom-dev - vpc_peering_connection_id: pcx-0d2027442f0e54ca4 + vpc_peering_connection_id: pcx-04d7e54008cd9326c tb:fargate:AutoscalingFargateCluster: fluentbit: @@ -196,7 +196,6 @@ resources: extra_policies: fluentbit: - - arn:aws:iam::768512802988:policy/mailstrom-dev-stalwart-logs-write-access - arn:aws:iam::768512802988:policy/observability-dev-observability-logs-write-access autoscalers: diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt index 9715d89..ec75fb2 100644 --- a/pulumi/requirements.txt +++ b/pulumi/requirements.txt @@ -1,4 +1,5 @@ requests>=2.32.5 pulumi_cloudflare>=6.14.0,<7 -tb_pulumi @ git+https://github.com/thunderbird/pulumi.git@main +# tb_pulumi @ git+https://github.com/thunderbird/pulumi.git@main +-e /home/rjung/workspace/thunderbird/pulumi sdks/site24x7 From e1fcd8fe0f83a6a492949d616562068517aaf628 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Thu, 9 Apr 2026 12:21:49 -0600 Subject: [PATCH 06/16] Fix dependencies; remove stalwart log destination --- pulumi/__main__.py | 23 ++++------------------- pulumi/config.prod.yaml | 8 ++++++++ pulumi/config.stage.yaml | 8 ++++++++ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/pulumi/__main__.py b/pulumi/__main__.py index 2d102e7..74b0a73 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -9,7 +9,7 @@ of any of those larger infrastructure patterns. """ -import pulumi_aws as aws +import pulumi import pulumi_cloudflare as cloudflare import tb_pulumi import tb_pulumi.cloudwatch @@ -45,29 +45,12 @@ logdest_name: tb_pulumi.cloudwatch.LogDestination( f'{project.name_prefix}-logdest-{logdest_name}', project=project, + app_name=logdest_name, **logdest_config, ) for logdest_name, logdest_config in logdest_opts.items() } - fluentbit_user = aws.iam.User( - f'{project.name_prefix}-user-fluentbit', - name=f'observability-{project.stack}-fluentbit', - tags=project.common_tags, - ) - - aws.iam.UserPolicyAttachment( - f'{project.name_prefix}-upa-obsv-logwrite', - policy_arn=logdests['observability'].resources['iam_policies']['write'], - user=fluentbit_user.id, - ) - - aws.iam.UserPolicyAttachment( - f'{project.name_prefix}-upa-mailstrom-logwrite', - policy_arn=f'arn:aws:iam::768512802988:policy/mailstrom-{project.stack}-stalwart-logs-write-access', - user=fluentbit_user.id, - ) - vpc_config = resources.get('tb:network:MultiCidrVpc', {}).get('fluentbit', {}) vpc_fluentbit = tb_pulumi.network.MultiCidrVpc( f'{project.name_prefix}-vpc-fluentbit', @@ -81,6 +64,7 @@ project=project, subnets=vpc_fluentbit.resources.get('subnets', []), **cluster_config, + opts=pulumi.ResourceOptions(depends_on=[vpc_fluentbit]), ) for cluster_name, cluster_config in resources.get( 'tb:fargate:AutoscalingFargateCluster' @@ -95,4 +79,5 @@ ttl=60, type='CNAME', zone_id=cloudflare_zone_id, + opts=pulumi.ResourceOptions(depends_on=[*ecs_clusters.values()]), ) diff --git a/pulumi/config.prod.yaml b/pulumi/config.prod.yaml index 333a236..fcf9d35 100644 --- a/pulumi/config.prod.yaml +++ b/pulumi/config.prod.yaml @@ -11,6 +11,14 @@ resources: secret_names: - posthog_api_key + tb:cloudwatch:LogDestination: + observability: + log_group: + retention_in_days: 7 + log_streams: + untagged: untagged + org_name: tb + tb:network:MultiCidrVpc: fluentbit: # The observability project has all of 10.200.0.0/16 assigned to it, but let's not soak all diff --git a/pulumi/config.stage.yaml b/pulumi/config.stage.yaml index 9fe4881..3650a45 100644 --- a/pulumi/config.stage.yaml +++ b/pulumi/config.stage.yaml @@ -10,6 +10,14 @@ resources: secret_names: - posthog_api_key + tb:cloudwatch:LogDestination: + observability: + log_group: + retention_in_days: 7 + log_streams: + untagged: untagged + org_name: tb + tb:network:MultiCidrVpc: fluentbit: # The observability project has all of 10.201.0.0/16 assigned to it, but let's not soak all From ec1cbd5fd658d9934a8a7241df1db25ef87eeef4 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Thu, 9 Apr 2026 13:56:32 -0600 Subject: [PATCH 07/16] Remove Cloudwatch configs --- .../filesystem/etc/fluent-bit/fluent-bit.yaml | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml index 9628157..4b77fcb 100644 --- a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml +++ b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml @@ -19,11 +19,6 @@ pipeline: port: 1337 threaded: true - # Log shippers should override the tag with the name of the application - - name: forward - tag: cloudwatch.untagged - port: 24224 - filters: # Run a Lua script that breaks all events in a single Stalwart telemetry payload into multiple # records. The tag "telemetry_stalwart" here is the result of POSTing to the http input at the @@ -50,26 +45,3 @@ pipeline: tls: on uri: /batch format: json_lines - - # Send logs onward to CloudWatch. Log groups by the derived name must pre-exist, and this - # service must have sufficient IAM permissions to create log streams and post events to them. - - name: cloudwatch_logs - match: cloudwatch.stalwart.mail - log_group_name: /tb/${ENV}/stalwart - log_stream_name: mail - region: eu-central-1 - # log_key: systemd message field? - - - name: cloudwatch_logs - match: cloudwatch.stalwart.api - log_group_name: /tb/${ENV}/stalwart - log_stream_name: api - region: eu-central-1 - # log_key: systemd message field? - - - name: cloudwatch_logs - match: cloudwatch.untagged - log_group_name: /tb/${ENV}/observability - log_stream_name: untagged - region: eu-central-1 - # log_key: systemd message field? \ No newline at end of file From dc0b0a0e973b55b987fd2f1f423760db833f27af Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:00:08 -0600 Subject: [PATCH 08/16] Bump tb_pulumi dependency to next future version --- fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml | 2 +- pulumi/requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml index 4b77fcb..aa4b010 100644 --- a/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml +++ b/fluent-bit/filesystem/etc/fluent-bit/fluent-bit.yaml @@ -18,7 +18,7 @@ pipeline: tag: untagged port: 1337 threaded: true - + filters: # Run a Lua script that breaks all events in a single Stalwart telemetry payload into multiple # records. The tag "telemetry_stalwart" here is the result of POSTing to the http input at the diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt index ec75fb2..e964f86 100644 --- a/pulumi/requirements.txt +++ b/pulumi/requirements.txt @@ -1,5 +1,4 @@ requests>=2.32.5 pulumi_cloudflare>=6.14.0,<7 -# tb_pulumi @ git+https://github.com/thunderbird/pulumi.git@main --e /home/rjung/workspace/thunderbird/pulumi +tb_pulumi @ git+https://github.com/thunderbird/pulumi.git@v0.0.18 sdks/site24x7 From 03766f1a12989d88ecffeaf7be2f125bf2c2eb31 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:05:40 -0600 Subject: [PATCH 09/16] Undo some things we don't need in the dev config --- pulumi/config.dev.yaml | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index fe6dc89..3621ba9 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -55,11 +55,6 @@ resources: protocol: tcp from_port: 1337 to_port: 1337 - - description: Allow traffic from the load balancer to the containers log forwarding service - # Sources are set in code - protocol: tcp - from_port: 24224 - to_port: 24224 egress: - description: Allow traffic from the container out to the Internet protocol: tcp @@ -102,8 +97,6 @@ resources: portMappings: - containerPort: 1337 protocol: tcp - - containerPort: 24224 - protocol: tcp restartPolicy: enabled: yes restartAttemptPeriod: 300 @@ -125,12 +118,6 @@ resources: cidr_blocks: - 10.2.0.0/16 # stalwart-dev description: Allow access to telemetry forwarding service from stalwart-dev - - from_port: 24224 - to_port: 24224 - protocol: tcp - cidr_blocks: - - 10.2.0.0/16 # stalwart-dev - description: Allow access to log forwarding service from stalwart-dev egress: - from_port: 0 to_port: 65535 @@ -143,20 +130,11 @@ resources: enable_cross_zone_load_balancing: yes internal: yes ip_address_type: ipv4 - load_balancer_type: network - name: obsv-dev-fluentbit + load_balancer_type: application + name: fluentbit-http-dev preserve_host_header: yes targets: - cwlogs: - name: obsv-dev-fluent-logs - health_check: - port: 24224 - protocol: TCP - port: 24224 - protocol: TCP - target_type: ip - ip_address_type: ipv4 stalwart-metrics: name: obsv-dev-fluent-telemetry health_check: @@ -169,9 +147,6 @@ resources: listeners: fluentbit: # Load Balancer - cwlogs: # Target - port: 24224 - protocol: TCP stalwart-metrics: # Target port: 443 protocol: TLS @@ -187,9 +162,6 @@ resources: service: desired_count: 2 targets: - - container_name: fluentbit - container_port: 24224 - target_name: cwlogs - container_name: fluentbit container_port: 1337 target_name: stalwart-metrics @@ -200,5 +172,5 @@ resources: autoscalers: fluentbit: - min_capacity: 2 - max_capacity: 4 \ No newline at end of file + min_capacity: 1 + max_capacity: 1 \ No newline at end of file From c65a57e2e1bde7aa2248759320d59835190e894e Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:09:02 -0600 Subject: [PATCH 10/16] Undo some things we don't need in the dev config --- pulumi/config.dev.yaml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index 3621ba9..3dd08a2 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -46,11 +46,10 @@ resources: container_security_groups: fluentbit: # Service - fluentbit: # Load Balancer + fluentbit-http: # Load Balancer rules: ingress: - # Maddeningly, apostrophes aren't allowed in descriptions - - description: Allow traffic from the load balancer to the containers HTTP service + - description: Allow traffic from the load balancer to the container # Sources are set in code protocol: tcp from_port: 1337 @@ -108,7 +107,7 @@ resources: - FARGATE load_balancer_security_groups: - fluentbit: # Load Balancer + fluentbit-http: # Load Balancer description: Governs access to the fluentbit-http load balancer in dev rules: ingress: @@ -126,7 +125,7 @@ resources: - 0.0.0.0/0 load_balancers: - fluentbit: + fluentbit-http: enable_cross_zone_load_balancing: yes internal: yes ip_address_type: ipv4 @@ -136,7 +135,7 @@ resources: targets: stalwart-metrics: - name: obsv-dev-fluent-telemetry + name: dev-telemetry-http health_check: port: 1337 protocol: TCP @@ -146,19 +145,19 @@ resources: ip_address_type: ipv4 listeners: - fluentbit: # Load Balancer + fluentbit-http: # Load Balancer stalwart-metrics: # Target - port: 443 - protocol: TLS # This cert is for fluentbit-dev.tb.pro certificate_arn: arn:aws:acm:eu-central-1:768512802988:certificate/04dd0573-a3cc-4c19-b483-a868876c63b0 + port: 443 + protocol: HTTPS services: fluentbit: # Service assign_public_ip: yes container_name: fluentbit # Name from container definition container_port: 1337 - load_balancer: fluentbit + load_balancer: fluentbit-http service: desired_count: 2 targets: From e8676570508e77c461d381fbb147d22eda616750 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:12:12 -0600 Subject: [PATCH 11/16] Undo some things we don't need in the dev config --- pulumi/config.dev.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index 3dd08a2..b4cdce9 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -108,7 +108,7 @@ resources: load_balancer_security_groups: fluentbit-http: # Load Balancer - description: Governs access to the fluentbit-http load balancer in dev + description: Governs access to the fluent-bit-http load balancer in dev rules: ingress: - from_port: 443 @@ -116,7 +116,7 @@ resources: protocol: tcp cidr_blocks: - 10.2.0.0/16 # stalwart-dev - description: Allow access to telemetry forwarding service from stalwart-dev + description: Allow access from stalwart-dev egress: - from_port: 0 to_port: 65535 @@ -137,10 +137,12 @@ resources: stalwart-metrics: name: dev-telemetry-http health_check: + protocol: HTTP + path: /health/check port: 1337 - protocol: TCP + matcher: "400" port: 1337 - protocol: TCP + protocol: HTTP target_type: ip ip_address_type: ipv4 From 0e81c9c87b13ccb0648836db7ceb2e4dd93a9710 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:20:44 -0600 Subject: [PATCH 12/16] Clean up dev config --- pulumi/config.dev.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pulumi/config.dev.yaml b/pulumi/config.dev.yaml index b4cdce9..da13d53 100644 --- a/pulumi/config.dev.yaml +++ b/pulumi/config.dev.yaml @@ -1,5 +1,7 @@ --- +.fluentbit_image: &FLUENTBIT_IMAGE 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:426154b20a1b0d005f9f6029836a5902e5b5b228edd9688686f10d373e72a5b2 + config: build_site24x7: False build_tbpulumi: True @@ -86,7 +88,7 @@ resources: secrets: - name: POSTHOG_API_KEY valueFrom: arn:aws:secretsmanager:eu-central-1:768512802988:secret:observability/dev/posthog_api_key-e3UEK4 - image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:0bc8e6cdee4226f9c090ca22b08b3f6c61a5456a2090669414a43c52dd2580a8 + image: *FLUENTBIT_IMAGE logConfiguration: logDriver: awslogs options: @@ -138,11 +140,16 @@ resources: name: dev-telemetry-http health_check: protocol: HTTP + # fluentbit is set up to throw away records submitted to this endpoint path: /health/check port: 1337 + # I wish it wasn't like this. fluentbit will not accept non-POST methods, but these health checks cannot be + # made with custom methods. Therefore, we expect a 400 Bad Request as a sign of health. *shrug* matcher: "400" port: 1337 protocol: HTTP + # Next two options are required for ECS services; ref: + # https://docs.aws.amazon.com/AmazonECS/latest/developerguide/alb.html target_type: ip ip_address_type: ipv4 From 7a4d4181f39ce3aa1a9c55cd709b946d0f2588fc Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:20:57 -0600 Subject: [PATCH 13/16] Update fluent-bit image across envs --- pulumi/config.prod.yaml | 4 +++- pulumi/config.stage.yaml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pulumi/config.prod.yaml b/pulumi/config.prod.yaml index fcf9d35..d97b2f1 100644 --- a/pulumi/config.prod.yaml +++ b/pulumi/config.prod.yaml @@ -1,5 +1,7 @@ --- +.fluentbit_image: &FLUENTBIT_IMAGE 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:426154b20a1b0d005f9f6029836a5902e5b5b228edd9688686f10d373e72a5b2 + config: build_site24x7: True build_tbpulumi: True @@ -87,7 +89,7 @@ resources: secrets: - name: POSTHOG_API_KEY valueFrom: arn:aws:secretsmanager:eu-central-1:768512802988:secret:observability/prod/posthog_api_key-pVtqmp - image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:aa968a499d7e + image: *FLUENTBIT_IMAGE logConfiguration: logDriver: awslogs options: diff --git a/pulumi/config.stage.yaml b/pulumi/config.stage.yaml index 3650a45..850f03d 100644 --- a/pulumi/config.stage.yaml +++ b/pulumi/config.stage.yaml @@ -1,5 +1,7 @@ --- +.fluentbit_image: &FLUENTBIT_IMAGE 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:426154b20a1b0d005f9f6029836a5902e5b5b228edd9688686f10d373e72a5b2 + config: build_site24x7: False build_tbpulumi: True @@ -86,7 +88,7 @@ resources: secrets: - name: POSTHOG_API_KEY valueFrom: arn:aws:secretsmanager:eu-central-1:768512802988:secret:observability/stage/posthog_api_key-3xsHYd - image: 768512802988.dkr.ecr.eu-central-1.amazonaws.com/thunderbird/fluent-bit:fdd1b4748cfaee29553ee2c83fcaa428b68ba8e88c2791e1626e282b48127b9d + image: *FLUENTBIT_IMAGE logConfiguration: logDriver: awslogs options: From 989b14e2651c7727090f54ea668fa6b422ded196 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:34:29 -0600 Subject: [PATCH 14/16] Update stage/prod configs, fix lb name reference --- pulumi/__main__.py | 2 +- pulumi/config.prod.yaml | 9 ++++++--- pulumi/config.stage.yaml | 9 ++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pulumi/__main__.py b/pulumi/__main__.py index 74b0a73..74ddf9b 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -75,7 +75,7 @@ fluent_bit_dns = cloudflare.DnsRecord( f'{project.name_prefix}-dns-fluentbit', name='fluentbit' if project.stack == 'prod' else f'fluentbit-{project.stack}', - content=ecs_clusters['fluentbit'].resources['load_balancers']['fluentbit'].dns_name, + content=ecs_clusters['fluentbit'].resources['load_balancers']['fluentbit-http'].dns_name, ttl=60, type='CNAME', zone_id=cloudflare_zone_id, diff --git a/pulumi/config.prod.yaml b/pulumi/config.prod.yaml index d97b2f1..c5549bc 100644 --- a/pulumi/config.prod.yaml +++ b/pulumi/config.prod.yaml @@ -93,9 +93,9 @@ resources: logConfiguration: logDriver: awslogs options: - awslogs-group: observability-prod-fargate-fluentbit-loggroup-fluentbit + awslogs-group: /tb/prod/observability awslogs-region: eu-central-1 - awslogs-stream-prefix: observability/prod/fluentbit/ + awslogs-stream-prefix: 'ecs' portMappings: - containerPort: 1337 protocol: tcp @@ -170,7 +170,10 @@ resources: load_balancer: fluentbit-http service: desired_count: 2 - target: stalwart-metrics + targets: + - container_name: fluentbit + container_port: 1337 + target_name: stalwart-metrics autoscalers: fluentbit: diff --git a/pulumi/config.stage.yaml b/pulumi/config.stage.yaml index 850f03d..0b06952 100644 --- a/pulumi/config.stage.yaml +++ b/pulumi/config.stage.yaml @@ -92,9 +92,9 @@ resources: logConfiguration: logDriver: awslogs options: - awslogs-group: observability-stage-fargate-fluentbit-loggroup-fluentbit + awslogs-group: /tb/stage/observability awslogs-region: eu-central-1 - awslogs-stream-prefix: observability/stage/fluentbit/ + awslogs-stream-prefix: 'ecs' portMappings: - containerPort: 1337 hostPort: 1337 @@ -170,7 +170,10 @@ resources: load_balancer: fluentbit-http service: desired_count: 2 - target: stalwart-metrics + targets: + - container_name: fluentbit + container_port: 1337 + target_name: stalwart-metrics autoscalers: fluentbit: From 6e3a1aefc9edd71b2cae17b51d5a7511222ffb92 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:37:15 -0600 Subject: [PATCH 15/16] Fix prod log retention period --- pulumi/config.prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pulumi/config.prod.yaml b/pulumi/config.prod.yaml index c5549bc..e3024be 100644 --- a/pulumi/config.prod.yaml +++ b/pulumi/config.prod.yaml @@ -16,7 +16,7 @@ resources: tb:cloudwatch:LogDestination: observability: log_group: - retention_in_days: 7 + retention_in_days: 3 log_streams: untagged: untagged org_name: tb From 7b8ba8cd60d549b20c7df13fe9f505737ea4b930 Mon Sep 17 00:00:00 2001 From: Ryan Jung Date: Fri, 10 Apr 2026 08:57:36 -0600 Subject: [PATCH 16/16] Set Cloudflare provider variables in prod --- pulumi/Pulumi.prod.yaml | 4 ++++ pulumi/Pulumi.stage.yaml | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pulumi/Pulumi.prod.yaml b/pulumi/Pulumi.prod.yaml index 682a5ab..c49f704 100644 --- a/pulumi/Pulumi.prod.yaml +++ b/pulumi/Pulumi.prod.yaml @@ -201,6 +201,10 @@ config: selection_type: all_monitors statusiq_role: super_admin user_role: super_admin + observability:cloudflare_zone_id: + secure: AAABAFkTz7RxaV86Kw6RQ+XJ9O1orS7QTUgkRDIwvoE4kXYdQGWJ2i6zj9XqLoevXb3PgOGnNmv550aMA/H+zA== + cloudflare:apiToken: + secure: AAABAJ/XXYzFsIlhvWdl0FwFnUWHUTKIyJMAFNicpTtxUlWcOurQC9Y1O5qu+hjL/DmMRoV2c0+KdBy00YUzvIWf0HWuyIZk # The "api" kind seems to have some bugs in the Terraform provider, but this is what such a # configuration should look like: diff --git a/pulumi/Pulumi.stage.yaml b/pulumi/Pulumi.stage.yaml index a3b9132..5c0d806 100644 --- a/pulumi/Pulumi.stage.yaml +++ b/pulumi/Pulumi.stage.yaml @@ -1,3 +1,7 @@ -config: +config: observability:posthog_api_key: secure: AAABADNVbsoTmx0hogPjFb+Egd5TX7Wheactt3JgEv21j1G+OJSjHF+CUpY/w9qTS3KEw4IdoYUuufBKX2sJXudMUYFAEc8m8o2rg56eGw== + observability:cloudflare_zone_id: + secure: AAABALYXXhMfqqRW1FbwgqDZIsD7VwJ+AGDcqv4RiMDfi7cadVqba5L1esVND3ieXJdpE/qwcnN1AIUfBe6Zpw== + cloudflare:apiToken: + secure: AAABAB5n/gkdIZz7ZKyeu00kWDwDrpPr0Wl/FD449u+/WrX88r8nk+FHj/GTjyQL/GURB9TrMRzJ2as9ceKoWfOO6Z4+24Yk