Airflow

    安装 Airflow

    Base on Ubuntu 20.04

    pip3 install apache-airflow==2.0.0

    初始化后端 MySQL 数据库

    CREATE DATABASE airflow CHARACTER SET utf8 COLLATE utf8_unicode_ci;
    CREATE USER 'airflow' IDENTIFIED BY 'airflow';
    GRANT ALL PRIVILEGES ON airflow.* TO 'airflow';
    

    配置 MySQL 数据库连接

    vim ~/airflow/airflow.cfg

    sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow
    

    安装 MySQL 驱动

    apt install libmysqlclient-dev python3-dev
    pip3 install 'apache-airflow[mysql]'
    

    初始化数据库

    过程可能有些慢, 耐心等待

    airflow db init
    

    配置Web认证

    pip3 install apache-airflow[password]

    airflow users create \
    --username airflow \
    --firstname airflow\
    --lastname airflow \
    --role Admin \ 
    --email admin@example.org
    

    配置到 Systemd

    编辑 Webserver 启动文件

    vim /etc/systemd/system/airflow-webserver.service

    [Unit]
    Description=Airflow
    Documentation=https://airflow.apache.org/
    After=network-online.target
    Wants=network-online.target
    
    [Service]
    Environment="DAEMON_ARGS=  "
    EnvironmentFile=-/etc/default/%p
    ExecStart=/usr/local/bin/airflow webserver $DAEMON_ARGS
    ExecReload=/bin/kill -HUP $MAINPID
    Restart=on-failure
    
    KillSignal=SIGINT
    
    [Install]
    WantedBy=multi-user.target
    
    

    编辑 Scheduler 启动文件

    vim /etc/systemd/system/airflow-scheduler.service

    [Unit]
    Description=Airflow
    Documentation=https://airflow.apache.org/
    After=network-online.target
    Wants=network-online.target
    
    [Service]
    Environment="DAEMON_ARGS=  "
    EnvironmentFile=-/etc/default/%p
    ExecStart=/usr/local/bin/airflow scheduler $DAEMON_ARGS
    ExecReload=/bin/kill -HUP $MAINPID
    Restart=on-failure
    
    KillSignal=SIGINT
    
    [Install]
    WantedBy=multi-user.target
    
    

    使配置生效

    systemctl daemon-reload

    配置开机启动

    systemctl enable airflow-scheduler.service 
    systemctl enable airflow-webserver.service 
    

    启动服务

    systemctl start airflow-scheduler.service
    systemctl start airflow-webserver.service 
    
    

    查看服务状态

    systemctl status airflow-scheduler.service 
    systemctl status airflow-webserver.service 
    

    资源

    https://airflow.apachecn.org/#/zh/scheduler

    评论栏