最近在学习大数据,学习了Flume 和kafka,做了一个小实验,分享一下。
实验环境:一个虚机 ,主机名:master
软件安装,不在重复。下裁:apache-flume-1.7.0-bin.tar.gz,kafka_2.10-0.10.1.1.tar
解压、安装。
1. 日志生成脚本:
[root@master software]# more createlog.sh
#!/bin/bash
i=1
while [ “1” = “1” ]
do
echo “Flume get log to hdfs bigdata02″$i >> /application/software/test.log
sleep 0.3
i=` expr $i + 1 `
done
[root@master software]#
[root@master software]# mkdir /application/software/test.log
2. 配置Flume agent(flume的conf目录)
[root@master conf]# more agentkafka.properties
a11.sources = r11
a11.channels = c11
a11.sinks = k11
a11.sources.r11.type = exec
a11.sources.r11.command = tail -F /application/software/test.log
a11.sources.r11.channels = c11
#kafka config
a11.sinks.k11.channel = c11
a11.sinks.k11.type = org.apache.flume.sink.kafka.KafkaSink
a11.sinks.k11.brokerList = master:9092
a11.sinks.k11.topic = flume-data
a11.sinks.k11.batchSize = 20
a11.sinks.k11.requiredAcks = 1
a11.channels.c11.type = memory
a11.channels.c11.capacity = 10000
a11.channels.c11.transactionCapacity = 100
[root@master conf]#
启动Flume agent
[root@master conf]# bin/flume-ng agent –conf conf –conf-file conf/agentkafka.properties –name a11 -Dflume.root.logger=DEBUG,console
3. 启动kafka
启动zookeeper
bin/zookeeper-server-start.sh config/zookeeper.properties &
启动Kafka Broker
创建topic,名字为flume-data,包含5个分区,副本数为1,数据保留时长为2天(默认是1天)
bin/kafka-topics.sh –create –zookeeper localhost:2181 –replication-factor 1 –partitions 5 –topic flume-data –config delete.retention.ms=172800000bin/kafka-server-start.sh -daemon config/server.properties
启动消费终端:
[root@master config]# bin/kafka-console-consumer.sh –bootstrap-server master:9092 –topic flume-data –from-beginning
4. 执行日志生成脚本(另一终端)
[root@master software]# ./createlog.sh
5. 查看结果(在步骤3启动消费终端窗口)
Flume get log to hdfs bigdata02440
Flume get log to hdfs bigdata02441
Flume get log to hdfs bigdata02442
Flume get log to hdfs bigdata02443
Flume get log to hdfs bigdata02444
Flume get log to hdfs bigdata02445
Flume get log to hdfs bigdata02446
Flume get log to hdfs bigdata02447
Flume get log to hdfs bigdata02448
Flume get log to hdfs bigdata02449
Flume get log to hdfs bigdata02450
Flume get log to hdfs bigdata02451
Flume get log to hdfs bigdata02452
Flume get log to hdfs bigdata02453
Flume get log to hdfs bigdata02454
Flume get log to hdfs bigdata02455
Flume get log to hdfs bigdata02456
Flume get log to hdfs bigdata02457
Flume get log to hdfs bigdata02458
Flume get log to hdfs bigdata02459
Flume get log to hdfs bigdata02460
Flume get log to hdfs bigdata02461
Flume get log to hdfs bigdata02462
Flume get log to hdfs bigdata02463
Flume get log to hdfs bigdata02464
Flume get log to hdfs bigdata02465
Flume get log to hdfs bigdata02466
Flume get log to hdfs bigdata02467
Flume get log to hdfs bigdata02468
Flume get log to hdfs bigdata02469
Flume get log to hdfs bigdata02470
Flume get log to hdfs bigdata02471
Flume get log to hdfs bigdata02472
Flume get log to hdfs bigdata02473
Flume get log to hdfs bigdata02474
Flume get log to hdfs bigdata02475
Flume get log to hdfs bigdata02476
Flume get log to hdfs bigdata02477
Flume get log to hdfs bigdata02478
Flume get log to hdfs bigdata02479
Flume get log to hdfs bigdata02480
Flume get log to hdfs bigdata02481
Flume get log to hdfs bigdata02482
Flume get log to hdfs bigdata02483
Flume get log to hdfs bigdata02484