Initial commit - Fully working architecture

2026-04-20 13:09:38 +00:00 · 2017-03-17 12:34:00 +01:00
commit e0b11ef1a2
10 changed files with 344 additions and 0 deletions
--- a/.env
+++ b/.env
@@ -0,0 +1 @@
+COMPOSE_CONVERT_WINDOWS_PATHS=1
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+logs/
--- a/config/timezone
+++ b/config/timezone
@@ -0,0 +1 @@
+Europe/Paris
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,100 @@
+version: '2'
+
+services:
+ #################
+ # Log producers #
+ #################
+ apache_log_generator:
+  image: febbweiss/apache-log-generator
+  volumes:
+   - ./logs/apache:/var/log/apache
+   - ./config/timezone:/etc/timezone
+   - ./config/timezone:/etc/localtime
+ random_log_generator: # Star Wars quote generator
+  image: davidmccormick/random_log_generator
+  command: python log_generator.py --logFile /var/log/random/random.log
+  volumes:
+   - ./logs/random:/var/log/random
+ #############
+ # Log agent #
+ #############
+ filebeat:
+  image: nguoianphu/docker-filebeat
+  volumes:
+   - ./filebeat/filebeat.yml:/filebeat.yml
+   - ./logs/random:/var/log/random
+   - ./logs/apache:/var/log/apache
+  links:
+   - shipper
+ ####################
+ # Logstash shipper #
+ ####################
+ shipper:
+  image: docker.elastic.co/logstash/logstash:5.2.2
+  ports:
+   - "5400:5400"
+  links:
+   - kafka
+  volumes:
+   - ./logstash/logstash.yml:/usr/share/logstash/config/logstash.yml
+   - ./logstash/shipper/pipeline/:/usr/share/logstash/pipeline/
+ ########################
+ # Kafka infrastructure #
+ ########################
+ zookeeper:
+  image: wurstmeister/zookeeper
+  ports:
+   - "2181:2181"
+ kafka:
+  image: wurstmeister/kafka
+  ports:
+   - "9092:9092"
+  links:
+   - zookeeper:zk
+  environment:
+   KAFKA_ADVERTISED_HOST_NAME: 192.168.99.100
+   KAFKA_ADVERTISED_PORT: 9092
+   KAFKA_ZOOKEEPER_CONNECT: zk:2181
+ #################
+ # Elasticsearch #
+ #################
+ elasticsearch:
+  image: docker.elastic.co/elasticsearch/elasticsearch:5.2.2
+  volumes:
+   - ./elasticsearch/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
+  ports:
+   - "9200:9200"
+   - "9300:9300"
+ ####################
+ # Logstash indexer #
+ ####################
+ indexer:
+  image: docker.elastic.co/logstash/logstash:5.2.2
+  ports:
+   - "5401:5400"
+  volumes:
+   - ./logstash/logstash.yml:/usr/share/logstash/config/logstash.yml
+   - ./logstash/indexer/pipeline/:/usr/share/logstash/pipeline/
+  links:
+   - kafka
+   - elasticsearch
+ ###########
+ # Tooling #
+ ###########
+ kibana:
+  image: docker.elastic.co/kibana/kibana:5.2.2
+  ports:
+   - "5601:5601"
+  volumes:
+   - ./kibana/kibana.yml:/etc/kibana/kibana.yml
+  links:
+   - elasticsearch
+ kafka-manager:
+  image: sheepkiller/kafka-manager
+  ports:
+   - "9000:9000"
+  links:
+   - zookeeper
+   - kafka
+  environment:
+   ZK_HOSTS: "zookeeper:2181"
--- a/elasticsearch/elasticsearch.yml
+++ b/elasticsearch/elasticsearch.yml
@@ -0,0 +1,5 @@
+cluster.name: "docker-FEC"
+network.host: 0.0.0.0
+discovery.zen.minimum_master_nodes: 1
+#action.auto_create_index: .security,.monitoring*,.watches,.triggered_watches,.watcher-history*
+xpack.security.enabled: false
--- a/filebeat/filebeat.yml
+++ b/filebeat/filebeat.yml
@@ -0,0 +1,24 @@
+output:
+  logstash:
+    enabled: true
+    hosts:
+      - shipper:5044
+    timeout: 15
+#    ssl:
+#      certificate_authorities:
+#      - /etc/pki/tls/certs/logstash-beats.crt
+
+filebeat:
+  prospectors:
+    -
+      paths:
+        - "/var/log/nginx/*.log"
+      document_type: nginx-access
+    -
+      paths:
+        - "/var/log/random/*.log"
+      document_type: random
+    -
+      paths:
+        - "/var/log/apache/*.log"
+      document_type: apache
--- a/kibana/kibana.yml
+++ b/kibana/kibana.yml
@@ -0,0 +1,101 @@
+# Kibana is served by a back end server. This setting specifies the port to use.
+#server.port: 5601
+xpack.monitoring.enabled: false
+xpack.security.enabled: false
+
+# Specifies the address to which the Kibana server will bind. IP addresses and host names are both valid values.
+# The default is 'localhost', which usually means remote machines will not be able to connect.
+# To allow connections from remote users, set this parameter to a non-loopback address.
+server.host: '0.0.0.0'
+
+# Enables you to specify a path to mount Kibana at if you are running behind a proxy. This only affects
+# the URLs generated by Kibana, your proxy is expected to remove the basePath value before forwarding requests
+# to Kibana. This setting cannot end in a slash.
+#server.basePath: ""
+
+# The maximum payload size in bytes for incoming server requests.
+#server.maxPayloadBytes: 1048576
+
+# The Kibana server's name.  This is used for display purposes.
+#server.name: "your-hostname"
+
+# The URL of the Elasticsearch instance to use for all your queries.
+elasticsearch.url: 'http://elasticsearch:9200'
+
+# When this setting's value is true Kibana uses the hostname specified in the server.host
+# setting. When the value of this setting is false, Kibana uses the hostname of the host
+# that connects to this Kibana instance.
+#elasticsearch.preserveHost: true
+
+# Kibana uses an index in Elasticsearch to store saved searches, visualizations and
+# dashboards. Kibana creates a new index if the index doesn't already exist.
+#kibana.index: ".kibana"
+
+# The default application to load.
+#kibana.defaultAppId: "discover"
+
+# If your Elasticsearch is protected with basic authentication, these settings provide
+# the username and password that the Kibana server uses to perform maintenance on the Kibana
+# index at startup. Your Kibana users still need to authenticate with Elasticsearch, which
+# is proxied through the Kibana server.
+#elasticsearch.username: "user"
+#elasticsearch.password: "pass"
+
+# Paths to the PEM-format SSL certificate and SSL key files, respectively. These
+# files enable SSL for outgoing requests from the Kibana server to the browser.
+#server.ssl.cert: /path/to/your/server.crt
+#server.ssl.key: /path/to/your/server.key
+
+# Optional settings that provide the paths to the PEM-format SSL certificate and key files.
+# These files validate that your Elasticsearch backend uses the same key files.
+#elasticsearch.ssl.cert: /path/to/your/client.crt
+#elasticsearch.ssl.key: /path/to/your/client.key
+
+# Optional setting that enables you to specify a path to the PEM file for the certificate
+# authority for your Elasticsearch instance.
+#elasticsearch.ssl.ca: /path/to/your/CA.pem
+
+# To disregard the validity of SSL certificates, change this setting's value to false.
+#elasticsearch.ssl.verify: true
+
+# Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of
+# the elasticsearch.requestTimeout setting.
+#elasticsearch.pingTimeout: 1500
+
+# Time in milliseconds to wait for responses from the back end or Elasticsearch. This value
+# must be a positive integer.
+#elasticsearch.requestTimeout: 30000
+
+# List of Kibana client-side headers to send to Elasticsearch. To send *no* client-side
+# headers, set this value to [] (an empty list).
+#elasticsearch.requestHeadersWhitelist: [ authorization ]
+
+# Header names and values that are sent to Elasticsearch. Any custom headers cannot be overwritten
+# by client-side headers, regardless of the elasticsearch.requestHeadersWhitelist configuration.
+#elasticsearch.customHeaders: {}
+
+# Time in milliseconds for Elasticsearch to wait for responses from shards. Set to 0 to disable.
+#elasticsearch.shardTimeout: 0
+
+# Time in milliseconds to wait for Elasticsearch at Kibana startup before retrying.
+#elasticsearch.startupTimeout: 5000
+
+# Specifies the path where Kibana creates the process ID file.
+#pid.file: /var/run/kibana.pid
+
+# Enables you specify a file where Kibana stores log output.
+#logging.dest: stdout
+
+# Set the value of this setting to true to suppress all logging output.
+#logging.silent: false
+
+# Set the value of this setting to true to suppress all logging output other than error messages.
+#logging.quiet: false
+
+# Set the value of this setting to true to log all events, including system usage information
+# and all requests.
+#logging.verbose: false
+
+# Set the interval in milliseconds to sample system and process performance
+# metrics. Minimum is 100ms. Defaults to 5000.
+#ops.interval: 5000
--- a/logstash/indexer/pipeline/kafka_elasticsearch.conf
+++ b/logstash/indexer/pipeline/kafka_elasticsearch.conf
@@ -0,0 +1,94 @@
+input {
+	kafka {
+        codec =>  json{}
+        bootstrap_servers => "kafka:9092"
+        topics => ["nginx-access", "random", "apache"]
+        client_id => "logstash_indexer_1"
+	}
+}
+
+filter {
+	if [type] == "nginx-access" {
+		grok {
+			match => [ "message" , "%{COMBINEDAPACHELOG}+%{GREEDYDATA:extra_fields}"]
+			overwrite => [ "message" ]
+		}
+		mutate {
+			convert => ["response", "integer"]
+			convert => ["bytes", "integer"]
+			convert => ["responsetime", "float"]
+		}
+		geoip {
+			source => "clientip"
+			target => "geoip"
+			add_tag => [ "nginx-geoip" ]
+		}
+		date {
+			match => [ "timestamp" , "dd/MMM/YYYY:HH:mm:ss Z" ]
+			remove_field => [ "timestamp" ]
+		}
+		useragent {
+			source => "agent"
+		}
+	}	
+	if [type] == "random" {
+		grok {
+			match => [ "message" , "(?<timestamp>%{YEAR}[./-]%{MONTHNUM}[./-]%{MONTHDAY}[- ]%{TIME}) %{NUMBER:pid} %{GREEDYDATA:filename} %{NUMBER:line} %{GREEDYDATA:logger} %{LOGLEVEL:severity} %{GREEDYDATA:quote}"]
+			overwrite => [ "message" ]
+		}
+		date {
+			match => [ "timestamp", "YYYY-MM-dd HH:mm:ss,SSS"]
+			remove_field => [ "timestamp" ]
+		}
+	}
+	if [type] == "apache" {
+		grok {
+			match => [ "message" , "%{COMBINEDAPACHELOG}"]
+			overwrite => [ "message" ]
+		}
+		mutate {
+			convert => ["response", "integer"]
+			convert => ["bytes", "integer"]
+			convert => ["responsetime", "float"]
+		}
+		geoip {
+			source => "clientip"
+			target => "geoip"
+			add_tag => [ "apache-geoip" ]
+		}
+		date {
+			match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
+			remove_field => [ "timestamp" ]
+		}
+	}
+}
+
+output {
+	if [type] == "nginx-access" {
+        elasticsearch {
+			hosts => ["elasticsearch:9200"]
+			index => "nginx-%{+YYYYMM}"
+        }
+        stdout {
+          codec => rubydebug
+        }
+	}
+	if [type] == "random" {
+        elasticsearch {
+			hosts => ["elasticsearch:9200"]
+			index => "random-%{+YYYYMM}"
+        }
+        stdout {
+          codec => rubydebug
+        }
+	}
+	if [type] == "apache" {
+        elasticsearch {
+			hosts => ["elasticsearch:9200"]
+			index => "apache-%{+YYYYMM}"
+        }
+        stdout {
+          codec => rubydebug
+        }
+	}
+}
--- a/logstash/logstash.yml
+++ b/logstash/logstash.yml
@@ -0,0 +1,2 @@
+http.host: "0.0.0.0"
+xpack.monitoring.enabled: false
--- a/logstash/shipper/pipeline/beat_kafka.conf
+++ b/logstash/shipper/pipeline/beat_kafka.conf
@@ -0,0 +1,15 @@
+input {
+  beats {
+    port => 5044
+  }
+}
+output {
+  kafka {
+    codec => json
+    bootstrap_servers => "kafka:9092"
+	topic_id => "%{type}"
+  }
+  stdout {
+    codec => rubydebug
+  }
+}