作者:京东科技 刘恩浩
一、背景
基于K8s集群的私有化交付方案中,日志收集采用了ilogtail+logstash+kafka+es方案,其中ilogtail负责日志收集,logstash负责对数据转换,kafka负责对日志传递中的消峰进而减少es的写入压力,es用来保存日志数据。在私有化交付中本方案中涉及的中间件一般需要单独部署,但是在京东内网环境的部署考虑到kafka和es的高可用,则不推荐采用单独部署的方案。
二、新方案实践
1.新方案简介
在京东内网环境部署K8S并收集日志, kafka+es的替代方案考虑使用JMQ+JES,由于JMQ的底层是基于kafaka、JES的底层基于ES,所以该替换方案理论上是可行的
2.主要架构
数据流向大致如下
应用日志 -> ilogtail -> JMQ -> logstash -> JES
3.如何使用
核心改造点汇总
- ilogtail nameservers配置
增加解析JMQ域名的nameserver(京东云主机上无法直接解析.local域名)
spec:
spec:
dnsPolicy: "None"
dnsConfig:
nameservers:
- x.x.x.x # 可以解析jmq域名的nameserver
- ilogtail flushers配置
调整发送到JMQ到配置
apiVersion: v1
kind: ConfigMap
metadata:
name: ilogtail-user-cm
namespace: elastic-system
data:
app_stdout.yaml: |
flushers:
- Type: flusher_stdout
OnlyStdout: true
- Type: flusher_kafka_v2
Brokers:
- nameserver.jmq.jd.local:80 # jmq元数据地址
Topic: ai-middle-k8s-log-prod # jmq topic
ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应jmq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)
- logstash kafka&es配置
apiVersion: v1
kind: ConfigMap
metadata:
name: logstash-config
namespace: elastic-system
labels:
elastic-app: logstash
data:
logstash.conf: |-
input {
kafka {
bootstrap_servers => ["nameserver.jmq.jd.local:80"] #jmq的元数据地址
group_id => "ai4middle4log" # jmq的Group的名称
client_id => "ai4middle4log" # jmq的Group的名称,即jmq的省略了kafka中的client_id概念,用Group名称代替
consumer_threads => 2
decorate_events => true
topics => ["ai-middle-k8s-log-prod"] # jmp的topic
auto_offset_reset => "latest"
codec => json { charset => "UTF-8" }
}
}
output {
elasticsearch {
hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址
index => "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则
user => "XXXXXX" #jes的用户名
password => "xxxxx" #jes的密码
ssl => "false"
ssl_certificate_verification => "false"
}
}
ilogtail 的配置如下
# ilogtail-daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: ilogtail-ds
namespace: elastic-system
labels:
k8s-app: logtail-ds
spec:
selector:
matchLabels:
k8s-app: logtail-ds
template:
metadata:
labels:
k8s-app: logtail-ds
spec:
dnsPolicy: "None"
dnsConfig:
nameservers:
- x.x.x.x # (京东云主机上)可以解析jmq域名的nameserver
tolerations:
- operator: Exists # deploy on all nodes
containers:
- name: logtail
env:
- name: ALIYUN_LOG_ENV_TAGS # add log tags from env
value: _node_name_|_node_ip_
- name: _node_name_
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: _node_ip_
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
- name: cpu_usage_limit # iLogtail's self monitor cpu limit
value: "1"
- name: mem_usage_limit # iLogtail's self monitor mem limit
value: "512"
image: dockerhub.ai.jd.local/ai-middleware/ilogtail-community-edition/ilogtail:1.3.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 400m
memory: 384Mi
volumeMounts:
- mountPath: /var/run # for container runtime socket
name: run
- mountPath: /logtail_host # for log access on the node
mountPropagation: HostToContainer
name: root
readOnly: true
- mountPath: /usr/local/ilogtail/checkpoint # for checkpoint between container restart
name: checkpoint
- mountPath: /usr/local/ilogtail/user_yaml_config.d # mount config dir
name: user-config
readOnly: true
- mountPath: /usr/local/ilogtail/apsara_log_conf.json
name: apsara-log-config
readOnly: true
subPath: apsara_log_conf.json
dnsPolicy: ClusterFirst
hostNetwork: true
volumes:
- hostPath:
path: /var/run
type: Directory
name: run
- hostPath:
path: /
type: Directory
name: root
- hostPath:
path: /etc/ilogtail-ilogtail-ds/checkpoint
type: DirectoryOrCreate
name: checkpoint
- configMap:
defaultMode: 420
name: ilogtail-user-cm
name: user-config
- configMap:
defaultMode: 420
name: ilogtail-apsara-log-config-cm
name: apsara-log-config
# ilogtail-user-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: ilogtail-user-cm
namespace: elastic-system
data:
app_stdout.yaml: |
enable: true
inputs:
- Type: service_docker_stdout
Stderr: true
Stdout: true
K8sNamespaceRegex: ai-train
ExternalK8sLabelTag:
platform/resource-name: k8s_label_resource-name
platform/task-identify: k8s_label_task-identify
task-id: k8s_label_task-id
run-id: k8s_label_run-id
request-id: k8s_label_request-id
processors:
- Type: processor_rename
SourceKeys:
- k8s_label_resource-name
- k8s_label_task-identify
- k8s_label_task-id
- k8s_label_run-id
- k8s_label_request-id
- _namespace_
- _image_name_
- _pod_uid_
- _pod_name_
- _container_name_
- _container_ip_
- __path__
- _source_
DestKeys:
- resource_name
- task_identify
- task_id
- run_id
- request_id
- namespace
- image_name
- pod_uid
- pod_name
- container_name
- container_ip
- path
- source
flushers:
- Type: flusher_stdout
OnlyStdout: true
- Type: flusher_kafka_v2
Brokers:
- nameserver.jmq.jd.local:80 # jmq元数据地址
Topic: ai-middle-k8s-log-prod # jmq topic
ClientID: ai4middle4log # Kafka的用户ID(识别客户端并设置其唯一性),对应jmq的Group名称,重要‼️ (https://ilogtail.gitbook.io/ilogtail-docs/plugins/input/service-kafka#cai-ji-pei-zhi-v2)
app_file_log.yaml: |
enable: true
inputs:
- Type: file_log
LogPath: /export/Logs/ai-dt-algorithm-tools
FilePattern: "*.log"
ContainerInfo:
K8sNamespaceRegex: ai-train
ExternalK8sLabelTag:
platform/resource-name: k8s_label_resource-name
platform/task-identify: k8s_label_task-identify
task-id: k8s_label_task-id
run-id: k8s_label_run-id
request-id: k8s_label_request-id
processors:
- Type: processor_add_fields
Fields:
source: file
- Type: processor_rename
SourceKeys:
- __tag__:k8s_label_resource-name
- __tag__:k8s_label_task-identify
- __tag__:k8s_label_task-id
- __tag__:k8s_label_run-id
- __tag__:k8s_label_request-id
- __tag__:_namespace_
- __tag__:_image_name_
- __tag__:_pod_uid_
- __tag__:_pod_name_
- __tag__:_container_name_
- __tag__:_container_ip_
- __tag__:__path__
DestKeys:
- resource_name
- task_identify
- task_id
- run_id
- request_id
- namespace
- image_name
- pod_uid
- pod_name
- container_name
- container_ip
- path
flushers:
- Type: flusher_stdout
OnlyStdout: true
- Type: flusher_kafka_v2
Brokers:
- nameserver.jmq.jd.local:80
Topic: ai-middle-k8s-log-prod
ClientID: ai4middle4log
logstash 的配置如下
# logstash-configmap.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: logstash-config
namespace: elastic-system
labels:
elastic-app: logstash
data:
logstash.conf: |-
input {
kafka {
bootstrap_servers => ["nameserver.jmq.jd.local:80"] #jmq的元数据地址
#group_id => "services"
group_id => "ai4middle4log" # jmq的Group的名称
client_id => "ai4middle4log" # jmq的Group的名称,即jmq的省略了kafka中的client_id概念,用Group名称代替
consumer_threads => 2
decorate_events => true
#topics_pattern => ".*"
topics => ["ai-middle-k8s-log-prod"] # jmp的topic
auto_offset_reset => "latest"
codec => json { charset => "UTF-8" }
}
}
filter {
ruby {
code => "event.set('index_date', event.get('@timestamp').time.localtime + 8*60*60)"
}
ruby {
code => "event.set('message',event.get('contents'))"
}
#ruby {
# code => "event.set('@timestamp',event.get('time').time.localtime)"
#}
mutate {
remove_field => ["contents"]
convert => ["index_date", "string"]
#convert => ["@timestamp", "string"]
gsub => ["index_date", "T.*Z",""]
#gsub => ["@timestamp", "T.*Z",""]
}
}
output {
elasticsearch {
#hosts => ["https://ai-middle-cluster-es-http:9200"]
hosts => ["http://x.x.x.x:40000","http://x.x.x.x:40000","http://x.x.x.x:40000"] # es地址
index => "%{[@metadata][kafka][topic]}-%{+YYYY-MM-dd}" # 索引规则
user => "XXXXXX" #jes的用户名
password => "xxxxx" #jes的密码
ssl => "false"
ssl_certificate_verification => "false"
#cacert => "/usr/share/logstash/cert/ca_logstash.cer"
}
stdout {
codec => rubydebug
}
}
4.核心价值
在私有化部署的基础上通过简单改造实现了与京东内部中间件的完美融合,使得系统在高可用性上适应性更强、可用范围更广。