azkaban 提交任务编写案例(flow 2.0)

发布时间 2023-11-23 16:10:25作者: Yr-Zhang
config:
  #导出数据任务
  user.to.proxy: foo
  day: $(new("org.joda.time.DateTime").minusDays(1).toString("yyyyMMdd"))
  jarPATH: ${working.dir}/jars/algorithm-framework-1.0-SNAPSHOT.jar
  mainClassName: com.iov.flow.task.StarterTask
  systype: idc

nodes:
 - name: nsure
   type: command
   dependsOn:
   config:
    command: spark-submit --class ${mainClassName} \
        --master yarn \
        --deploy-mode client \
        --driver-memory 4g \
        --executor-memory 16g \
        --executor-cores 1 \
        --num-executors 100 \
        --queue default \
        --conf spark.default.parallelism=1000 \
        --conf spark.task.maxFailures=10 \
        --conf spark.storage.memoryFraction=0.4 \
        --conf spark.shuffle.memoryFraction=0.4 \
        --conf spark.shuffle.io.maxRetries=120 \
        --conf spark.sql.shuffle.partitions=1000 \
        --conf spark.sql.files.maxPartitionBytes=1073741824 \
        --conf spark.network.timeout=36000 \
        --conf dfs.client.socket-timeout=3600000 \
        --conf spark.shuffle.spill.numElementsForceSpillThreshold=2000000 \
        ${jarPATH} -DtaskType=spark -DtaskName=PARKINGPOINTCALCTASK -DtaskKind=PARKINGPOINTCALCTASK -DorgDataType=1 -DcalcDate=${day} -DtaskOutputPath=/data/tmp/ -DparamsPgEnv="" -Dtime=1800