Skip to content

Instantly share code, notes, and snippets.

@AdheipSingh
Last active February 16, 2025 20:39
Show Gist options
  • Save AdheipSingh/b266f9d96e7b42b1e27bca70e636c0e6 to your computer and use it in GitHub Desktop.
Save AdheipSingh/b266f9d96e7b42b1e27bca70e636c0e6 to your computer and use it in GitHub Desktop.
Parseable Postgres Fluentbit
fluent-bit:
config:
customParsers: |
[PARSER]
Name docker_no_time
Format json
Time_Keep Off
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L
[MULTILINE_PARSER]
name multiline-postgres
type regex
flush_timeout 5000
# Start of a SQL statement
rule "start_state" "/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z (?:stderr|stdout) [A-Z] \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3} GMT \[(?<pid>\d+)\] (?<user>[^@]+)@(?<database>[^\s]+) LOG:\s+statement: (?<statement>.*)$/" "collect_statement"
# Any continuation line that starts with a tab
rule "collect_statement" "/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z (?:stderr|stdout) [A-Z]\s+\t(?<continuation>.*)$/" "collect_statement"
# Duration line that ends the statement
rule "collect_statement" "/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z (?:stderr|stdout) [A-Z] \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3} GMT \[(?<pid>\d+)\] (?<user>[^@]+)@(?<database>[^\s]+) LOG:\s+duration: (?<duration>\d+\.\d+) ms$/" "start_state"
[PARSER]
Name postgres_structured
Format regex
Regex ^(?<postgres_timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{3})\s+GMT\s+\[(?<pid>\d+)\]\s+(?<user>[^@]+)@(?<database>[^\s]+)\s+LOG:\s+(?:statement:\s+(?<statement>(?:.|\r\n)*(?:duration:.*ms)?)|(?<message>.*))$
Time_Key postgres_timestamp
Time_Format %Y-%m-%d %H:%M:%S.%L
Time_Keep On
filters: |
[FILTER]
Name kubernetes
Match kube.*
Merge_Log On
Keep_Log Off
K8S-Logging.Parser On
K8S-Logging.Exclude On
[FILTER]
Name lua
Match raw.postgres
Script /fluent-bit/scripts/clean_postgres_logs.lua
Call clean_postgres_logs
[FILTER]
Name parser
Match raw.postgres
Parser postgres_structured
Key_Name log
Reserve_Data true
[FILTER]
Name lua
Match raw.postgres
Script /fluent-bit/scripts/extract_duration.lua
Call extract_duration
inputs: |
[INPUT]
Name tail
Path /var/log/containers/*.log
multiline.parser docker, cri
Tag kube.*
Mem_Buf_Limit 5MB
Skip_Long_Lines On
[INPUT]
Name tail
Path /var/log/containers/postgres-postgresql-*.log
Multiline.parser multiline-postgres
Tag raw.postgres
Mem_Buf_Limit 5MB
Read_from_head true
Skip_Long_Lines Off
[INPUT]
Name systemd
Tag host.*
Systemd_Filter _SYSTEMD_UNIT=kubelet.service
Read_From_Tail On
outputs: |
[OUTPUT]
Name parseable
Match kube.*
Server_Host {{ .Values.serverHost }}
Username {{ .Values.serverUsername }}
Password {{ .Values.serverPassword }}
Server_Port 80
Stream {{ .Values.serverStream }}
Exclude_Namespaces {{ .Values.excludeNamespaces }}
[OUTPUT]
Name parseable
Match raw.postgres
Server_Host {{ .Values.serverHost }}
Server_Port 80
Username {{ .Values.serverUsername }}
Password {{ .Values.serverPassword }}
Stream postgres-logs
service: |
[SERVICE]
Daemon Off
Flush {{ .Values.flush }}
Log_Level {{ .Values.logLevel }}
Parsers_File parsers.conf
Parsers_File custom_parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port {{ .Values.metricsPort }}
Health_Check On
upstream: {}
daemonSetVolumeMounts:
- mountPath: /var/log
name: varlog
- mountPath: /var/lib/docker/containers
name: varlibdockercontainers
readOnly: true
- mountPath: /etc/machine-id
name: etcmachineid
readOnly: true
daemonSetVolumes:
- hostPath:
path: /var/log
name: varlog
- hostPath:
path: /var/lib/docker/containers
name: varlibdockercontainers
- hostPath:
path: /etc/machine-id
type: File
name: etcmachineid
dnsPolicy: ClusterFirst
enabled: true
excludeNamespaces: kube-system, default
flush: 1
image:
pullPolicy: Always
repository: parseable/fluent-bit
tag: v3
kind: DaemonSet
livenessProbe:
httpGet:
path: /
port: http
logLevel: info
luaScripts:
clean_postgres_logs.lua: |
function clean_postgres_logs(tag, timestamp, record)
if record.log then
-- Remove container runtime prefixes and collect lines
local lines = {}
for line in record.log:gmatch("[^\n]+") do
-- Remove the container runtime prefix
local cleaned = line:gsub("^%d%d%d%d%-%d%d%-%d%dT%d%d:%d%d:%d%d%.%d+Z%s+stderr%s+F%s+", "")
lines[#lines + 1] = cleaned
end
-- Reconstruct the log without the runtime prefixes
record.log = table.concat(lines, "\n")
end
return 2, timestamp, record
end
extract_duration.lua: |
function extract_duration(tag, timestamp, record)
if record.statement then
-- Try to extract duration from the statement
local duration = record.statement:match("duration:%s*(%d+%.%d+)%s*ms")
if duration then
-- Add duration as a separate column
record.duration = duration
-- Remove the duration line from statement
record.statement = record.statement:gsub("\n%d%d%d%d%-%d%d%-%d%d%s+%d%d:%d%d:%d%d%.%d%d%d%s+GMT%s+%[%d+%]%s+[^@]+@[^%s]+%s+LOG:%s+duration:%s+%d+%.%d+%s+ms$", "")
end
end
return 2, timestamp, record
end
metricsPort: 2020
rbac:
create: true
eventsAccess: true
nodeAccess: false
readinessProbe:
httpGet:
path: /api/v1/health
port: http
replicaCount: 1
serverHost: parseable-ingestor-service.parseable.svc.cluster.local
serverPassword: admin
serverStream: $NAMESPACE
serverUsername: admin
service:
labels: {}
loadBalancerClass: null
loadBalancerSourceRanges: []
port: 2020
type: ClusterIP
serviceAccount:
annotations: {}
create: true
name: null
testFramework:
enabled: true
image:
pullPolicy: Always
repository: busybox
tag: latest
volumeMounts:
- mountPath: /fluent-bit/etc/fluent-bit.conf
name: config
subPath: fluent-bit.conf
- mountPath: /fluent-bit/etc/custom_parsers.conf
name: config
subPath: custom_parsers.conf
parseable:
affinity: {}
auditLogging:
enabled: false
p_password: admin
p_server: http://parseable-ingestor-service.parseable.svc.cluster.local
p_username: admin
blobModeSecret:
enabled: false
secrets:
- keys:
- addr
- username
- password
- azr.access_key
- azr.account
- azr.container
- azr.url
name: parseable-env-secret
prefix: P_
env:
RUST_LOG: warn
fullnameOverride: ""
gcsModeSecret:
enabled: false
secrets:
- keys:
- addr
- username
- password
- staging.dir
- fs.dir
- gcs.url
- gcs.access.key
- gcs.secret.key
- gcs.bucket
- gcs.region
name: parseable-env-secret
prefix: P_
highAvailability:
enabled: true
ingestor:
affinity: {}
count: 3
env:
RUST_LOG: warn
extraLabels:
app: parseable
labels:
app: parseable
component: ingestor
nodeSelector: {}
podAnnotations: {}
port: 8000
readinessProbe:
httpGet:
path: /api/v1/readiness
port: 8000
resources:
limits:
cpu: 500m
memory: 4Gi
requests:
cpu: 250m
memory: 1Gi
service:
port: 80
type: ClusterIP
tolerations: []
image:
pullPolicy: Always
repository: containers.parseable.com/parseable/parseable
tag: v1.7.3
localModeSecret:
enabled: false
secrets:
- keys:
- addr
- username
- password
- staging.dir
- fs.dir
name: parseable-env-secret
prefix: P_
metrics:
serviceMonitor:
enabled: false
labels: {}
namespace: ""
spec:
attachMetadata: {}
bodySizeLimit: {}
endpoints: []
jobLabel: ""
keepDroppedTargets: 0
labelLimit: 0
labelNameLengthLimit: 0
labelValueLengthLimit: 0
namespaceSelector: {}
podTargetLabels: []
sampleLimit: 0
scrapeClass: ""
scrapeProtocols: []
selector: {}
targetLabels: []
targetLimit: 0
nameOverride: ""
nodeSelector: {}
persistence:
data:
accessMode: ReadWriteOnce
enabled: false
size: 5Gi
storageClass: ""
ingestor:
accessMode: ReadWriteOnce
enabled: true
size: 5Gi
storageClass: do-block-storage
querier:
accessMode: ReadWriteOnce
enabled: true
size: 100Gi
storageClass: do-block-storage
staging:
accessMode: ReadWriteOnce
enabled: true
size: 5Gi
storageClass: do-block-storage
podAnnotations:
prometheus.io/path: /api/v1/metrics
prometheus.io/port: "80"
prometheus.io/scrape: "true"
podLabels:
app: parseable
component: query
podSecurityContext:
fsGroup: 1000
fsGroupChangePolicy: Always
runAsGroup: 1000
runAsUser: 1000
readinessProbe:
httpGet:
path: /api/v1/readiness
port: 8000
resources:
limits:
cpu: 500m
memory: 4Gi
requests:
cpu: 250m
memory: 1Gi
s3ModeSecret:
enabled: true
secrets:
- keys:
- addr
- username
- password
- staging.dir
- fs.dir
- s3.url
- s3.access.key
- s3.secret.key
- s3.bucket
- s3.region
name: parseable-env-secret
prefix: P_
securityContext:
allowPrivilegeEscalation: false
service:
port: 80
type: ClusterIP
serviceAccount:
annotations: {}
create: true
name: parseable
sidecar:
args: []
command: []
enabled: false
env:
RUST_LOG: warn
image:
pullPolicy: IfNotPresent
repository: busybox
tag: latest
ports: 8000
resources:
limits:
cpu: 500m
memory: 4Gi
requests:
cpu: 250m
memory: 1Gi
volumeClaimTemplates:
- metadata:
name: test-volume
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
volumeMounts:
- mountPath: /parseable/test
name: test-volume
store: s3-store
toleration: []
tolerations: []
vector:
args:
- --config-dir
- /etc/vector/
customConfig:
api:
address: 127.0.0.1:8686
enabled: true
playground: false
data_dir: /vector-data-dir
sinks:
parseable:
auth:
password: admin
strategy: basic
user: admin
batch:
max_bytes: 10485760
max_events: 1000
timeout_secs: 10
compression: gzip
encoding:
codec: json
healthcheck:
enabled: true
path: http://parseable.parseable.svc.cluster.local/api/v1/liveness
port: 80
inputs:
- kubernetes_logs
method: post
request:
headers:
X-P-Stream: vectordemo
type: http
uri: http://parseable.parseable.svc.cluster.local/api/v1/ingest
sources:
kubernetes_logs:
type: kubernetes_logs
dnsPolicy: ClusterFirst
enabled: false
image:
pullPolicy: IfNotPresent
pullSecrets: []
repository: timberio/vector
sha: ""
tag: ""
podDisruptionBudget:
enabled: false
maxUnavailable: null
minAvailable: 1
podLabels:
vector.dev/exclude: "true"
podManagementPolicy: OrderedReady
rbac:
create: true
replicas: 1
role: Agent
rollWorkload: true
service:
annotations: {}
enabled: true
externalTrafficPolicy: ""
ipFamilies: []
ipFamilyPolicy: ""
loadBalancerIP: ""
ports: []
topologyKeys: []
type: ClusterIP
serviceAccount:
annotations: {}
automountToken: true
create: true
name: null
serviceHeadless:
enabled: true
terminationGracePeriodSeconds: 60
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment