sparkstream重启+删除任务日志

  • Post author:
  • Post category:其他


#!/bin/bash
set -x
cd /mnt/trafficflow

echo "Please waiting streaming log to output \"Empty stream, no data to process, job can be killed safty ........\" , and then kill the flowtraffic pid"
job_id=`yarn application -list | grep clickstream_flow | awk -F '\t' '{print $1}'`
closeStream(){
    id=`pgrep -f "com.litb.flowtraffic.clickstream.StreamMain"`
    kill -9 $id
    echo "Stream closed!"
}


tail -n 0 -f run.log |sed '/job can be killed safty/q'
closeStream
sleep 5

sh /mnt/trafficflow/start.sh
echo "start ok"

hadoop fs -rm /var/log/spark/apps/${job_id}.inprogress
nohup spark-submit --class com.litb.flowtraffic.clickstream.StreamMain  --executor-memory 6g --executor-cores 4 --num-executors 8 --driver-memory 2g --driver-cores 4 --conf spark.default.parallelism=200 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 s3://litb.dev.bi/jars/flowtraffic-1.1.2.jar >run.log 2>&1 &