Ansible Role 结构化编排
大约 11 分钟约 3437 字
Ansible Role 结构化编排
简介
当 Playbook 数量开始增多、环境差异变复杂、重复任务越来越多时,Ansible Role 就成为组织自动化配置的核心方式。Role 的价值不只是"把任务拆文件",而是让变量、模板、处理器、依赖关系和可复用逻辑形成统一结构,方便团队协作、版本管理和跨环境交付。
Ansible Role 的设计理念借鉴了软件工程中的模块化思想:将复杂的自动化配置拆分为职责单一、可独立测试、可版本管理的模块。每个 Role 负责一个明确的配置领域(如 Nginx 安装、Docker 部署、监控代理安装),通过变量注入实现环境差异化,通过依赖声明实现组合编排。
特点
Role 目录结构详解
标准目录结构
# 初始化 Role
ansible-galaxy init roles/nginx
ansible-galaxy init roles/docker
ansible-galaxy init roles/app-deploy
# 典型目录结构
roles/
└── nginx/
├── defaults/
│ └── main.yml # 默认变量(优先级最低,可被覆盖)
├── files/
│ └── ssl/ # 静态文件(直接复制,不经过模板渲染)
│ ├── cert.pem
│ └── key.pem
├── handlers/
│ └── main.yml # 处理器(配置变更时触发的操作)
├── meta/
│ └── main.yml # 元信息(作者、依赖、支持平台)
├── tasks/
│ ├── main.yml # 主任务入口
│ ├── install.yml # 安装任务
│ ├── configure.yml # 配置任务
│ └── selinux.yml # SELinux 相关任务
├── templates/
│ ├── nginx.conf.j2 # Jinja2 模板文件
│ ├── vhost.conf.j2 # 虚拟主机模板
│ └── upstream.conf.j2 # 上游配置模板
├── tests/
│ ├── inventory # 测试用 inventory
│ └── test.yml # 测试用 Playbook
└── vars/
└── main.yml # 强约束变量(优先级高,不应被覆盖)各目录职责
# 目录职责说明
# defaults/ — 默认变量
# 优先级最低,用于定义可被覆盖的默认值
# 适合放:端口号、路径、版本号、开关等
# vars/ — 强约束变量
# 优先级高,通常不应被覆盖
# 适合放:操作系统相关的固定路径、内部状态变量
# files/ — 静态文件
# 直接复制到目标主机,不做任何处理
# 适合放:证书、配置文件、二进制文件
# templates/ — Jinja2 模板
# 使用变量渲染后复制到目标主机
# 适合放:需要根据环境变化的配置文件
# handlers/ — 处理器
# 只在配置实际变更时触发
# 适合放:服务重启、配置重载
# meta/ — 元信息
# Role 的描述、作者、依赖关系
# 适合放:Galaxy 信息、Role 依赖声明
# tasks/ — 任务列表
# Role 的核心逻辑
# 可以进一步拆分为多个文件实现
Role 基础任务
# roles/nginx/defaults/main.yml
# 默认变量(可被 inventory 或 extra-vars 覆盖)
nginx_worker_processes: auto
nginx_worker_connections: 2048
nginx_worker_rlimit_nofile: 65535
nginx_listen_port: 80
nginx_ssl_listen_port: 443
nginx_server_name: _
nginx_root: /usr/share/nginx/html
nginx_enable_gzip: true
nginx_enable_https: false
nginx_ssl_cert_path: /etc/nginx/ssl/cert.pem
nginx_ssl_key_path: /etc/nginx/ssl/key.pem
nginx_client_max_body_size: 10m
nginx_proxy_timeout: 30s
nginx_upstream_servers: []
nginx_extra_configs: []
nginx_log_format: |
'$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" $request_time'# roles/nginx/vars/main.yml
# 强约束变量(通常不被覆盖)
nginx_package_name: nginx
nginx_service_name: nginx
nginx_conf_dir: /etc/nginx
nginx_conf_path: /etc/nginx/nginx.conf
nginx_confd_dir: /etc/nginx/conf.d
nginx_log_dir: /var/log/nginx
nginx_user: nginx# roles/nginx/tasks/main.yml
# 主任务入口
---
- name: Include OS-specific tasks
include_tasks: "{{ ansible_os_family | lower }}.yml"
- name: Create nginx configuration directory
file:
path: "{{ item }}"
state: directory
owner: root
group: root
mode: '0755'
loop:
- "{{ nginx_conf_dir }}"
- "{{ nginx_confd_dir }}"
- "{{ nginx_log_dir }}"
- name: Deploy main nginx configuration
template:
src: nginx.conf.j2
dest: "{{ nginx_conf_path }}"
owner: root
group: root
mode: '0644'
validate: 'nginx -t -c %s'
notify: Reload nginx
- name: Deploy virtual host configurations
template:
src: vhost.conf.j2
dest: "{{ nginx_confd_dir }}/{{ item.server_name | default('default') }}.conf"
owner: root
group: root
mode: '0644'
validate: 'nginx -t -c %s'
loop: "{{ nginx_vhosts }}"
loop_control:
label: "{{ item.server_name | default('default') }}"
notify: Reload nginx
when: nginx_vhosts is defined
- name: Remove old virtual host configurations
file:
path: "{{ nginx_confd_dir }}/{{ item }}.conf"
state: absent
loop: "{{ nginx_removed_vhosts | default([]) }}"
notify: Reload nginx
- name: Ensure nginx service is enabled and started
service:
name: "{{ nginx_service_name }}"
state: started
enabled: true# roles/nginx/tasks/redhat.yml
- name: Install EPEL repository
yum:
name: epel-release
state: present
when: ansible_distribution_major_version | int < 8
- name: Install nginx (RHEL/CentOS)
yum:
name: "{{ nginx_package_name }}"
state: present
notify: Reload nginx
- name: Configure SELinux for nginx
seboolean:
name: "{{ item }}"
state: true
persistent: true
loop:
- httpd_can_network_connect
- httpd_can_network_connect_db
when: ansible_selinux.status == 'enabled'# roles/nginx/tasks/debian.yml
- name: Install nginx prerequisites
apt:
name:
- apt-transport-https
- ca-certificates
state: present
update_cache: true
- name: Install nginx (Debian/Ubuntu)
apt:
name: "{{ nginx_package_name }}"
state: present
update_cache: true
notify: Reload nginx# roles/nginx/handlers/main.yml
---
- name: Reload nginx
service:
name: "{{ nginx_service_name }}"
state: reloaded
- name: Restart nginx
service:
name: "{{ nginx_service_name }}"
state: restarted
- name: Validate nginx configuration
command: nginx -t
changed_when: false模板与变量覆盖
{# roles/nginx/templates/nginx.conf.j2 #}
user {{ nginx_user }};
worker_processes {{ nginx_worker_processes }};
worker_rlimit_nofile {{ nginx_worker_rlimit_nofile }};
error_log {{ nginx_log_dir }}/error.log warn;
pid /run/nginx.pid;
events {
worker_connections {{ nginx_worker_connections }};
use epoll;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main {{ nginx_log_format }};
access_log {{ nginx_log_dir }}/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
{% if nginx_enable_gzip %}
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 5;
gzip_min_length 1024;
gzip_types text/plain text/css application/json application/javascript application/xml;
{% endif %}
client_max_body_size {{ nginx_client_max_body_size }};
{% if nginx_upstream_servers | length > 0 %}
upstream backend {
{% for server in nginx_upstream_servers %}
server {{ server.host }}:{{ server.port | default(80) }}
weight={{ server.weight | default(1) }}
max_fails={{ server.max_fails | default(3) }}
fail_timeout={{ server.fail_timeout | default('10s') }};
{% endfor %}
keepalive 64;
}
{% endif %}
include {{ nginx_confd_dir }}/*.conf;
}{# roles/nginx/templates/vhost.conf.j2 #}
{% for vhost in nginx_vhosts %}
{% if vhost.server_name %}
server {
listen {{ vhost.listen | default(nginx_listen_port) }};
server_name {{ vhost.server_name }};
{% if vhost.root is defined %}
root {{ vhost.root }};
{% endif %}
{% if vhost.ssl | default(false) %}
ssl_certificate {{ vhost.ssl_cert | default(nginx_ssl_cert_path) }};
ssl_certificate_key {{ vhost.ssl_key | default(nginx_ssl_key_path) }};
ssl_protocols TLSv1.2 TLSv1.3;
{% endif %}
{% if vhost.proxy_pass is defined %}
location / {
proxy_pass {{ vhost.proxy_pass }};
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout {{ nginx_proxy_timeout }};
proxy_read_timeout {{ nginx_proxy_timeout }};
proxy_send_timeout {{ nginx_proxy_timeout }};
}
{% endif %}
{% if vhost.locations is defined %}
{% for location in vhost.locations %}
location {{ location.path }} {
{% if location.proxy_pass is defined %}
proxy_pass {{ location.proxy_pass }};
{% elif location.root is defined %}
root {{ location.root }};
try_files $uri $uri/ =404;
{% elif location.return_code is defined %}
return {{ location.return_code }};
{% endif %}
}
{% endfor %}
{% endif %}
}
{% endif %}
{% endfor %}多环境配置
# inventory/prod.ini
[web]
web01 ansible_host=10.0.0.11 ansible_user=deploy
web02 ansible_host=10.0.0.12 ansible_user=deploy
web03 ansible_host=10.0.0.13 ansible_user=deploy
[api]
api01 ansible_host=10.0.1.11 ansible_user=deploy
api02 ansible_host=10.0.1.12 ansible_user=deploy
[web:vars]
nginx_listen_port=8080
nginx_server_name=prod.example.com
nginx_root=/opt/web/current
nginx_enable_https=true
[api:vars]
nginx_listen_port=80
nginx_server_name=api.example.com
nginx_enable_https=true
[all:vars]
ansible_python_interpreter=/usr/bin/python3
ansible_ssh_common_args='-o StrictHostKeyChecking=no'# group_vars/web.yml
# Web 组的公共变量
nginx_worker_processes: auto
nginx_worker_connections: 4096
nginx_enable_gzip: true
nginx_client_max_body_size: 50m
nginx_vhosts:
- server_name: www.example.com
listen: 80
root: /opt/web/current
ssl: true
ssl_cert: /etc/nginx/ssl/www.crt
ssl_key: /etc/nginx/ssl/www.key
locations:
- path: /api/
proxy_pass: http://api_backend
- path: /
root: /opt/web/current# group_vars/api.yml
# API 组的公共变量
nginx_worker_connections: 2048
nginx_proxy_timeout: 60s
nginx_vhosts:
- server_name: api.example.com
listen: 80
proxy_pass: http://app_backend
locations:
- path: /
proxy_pass: http://app_backend
nginx_upstream_servers:
- host: 10.0.1.20
port: 5000
weight: 3
- host: 10.0.1.21
port: 5000
weight: 1# site.yml — 主 Playbook
---
- name: Configure common settings for all nodes
hosts: all
become: yes
roles:
- role: common
tags: ['common']
- name: Configure web servers
hosts: web
become: yes
roles:
- role: nginx
tags: ['nginx', 'web']
- role: app-deploy
tags: ['app', 'web']
vars:
app_name: web-frontend
app_version: "{{ web_app_version }}"
- name: Configure API servers
hosts: api
become: yes
roles:
- role: nginx
tags: ['nginx', 'api']
- role: app-deploy
tags: ['app', 'api']
vars:
app_name: api-server
app_version: "{{ api_app_version }}"依赖关系与 Galaxy
# roles/app-deploy/meta/main.yml
galaxy_info:
author: team-devops
description: Deploy application with systemd service
company: example
license: MIT
min_ansible_version: '2.14'
platforms:
- name: EL
versions:
- '7'
- '8'
- '9'
- name: Ubuntu
versions:
- '20.04'
- '22.04'
dependencies:
- role: common
version: 1.0.0
tags: ['always']
- role: nginx
version: 2.0.0
when: app_deploy_with_nginx | default(false)# roles/app-deploy/tasks/main.yml
---
- name: Create application user
user:
name: "{{ app_user }}"
group: "{{ app_group }}"
create_home: false
system: true
- name: Create application directories
file:
path: "{{ item }}"
state: directory
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: '0755'
loop:
- "{{ app_dir }}"
- "{{ app_log_dir }}"
- "{{ app_config_dir }}"
- name: Deploy application package
unarchive:
src: "{{ app_package_path }}"
dest: "{{ app_dir }}"
owner: "{{ app_user }}"
group: "{{ app_group }}"
remote_src: "{{ app_remote_src | default(false) }}"
notify: Restart application
- name: Deploy application configuration
template:
src: app.config.j2
dest: "{{ app_config_dir }}/application.yml"
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: '0640'
notify: Restart application
- name: Deploy systemd service file
template:
src: app.service.j2
dest: "/etc/systemd/system/{{ app_service_name }}.service"
owner: root
group: root
mode: '0644'
notify: Restart application
- name: Ensure application service is running
systemd:
name: "{{ app_service_name }}"
state: started
enabled: true
daemon_reload: true# requirements.yml
roles:
- name: geerlingguy.docker
version: 7.0.2
src: geerlingguy.docker
- name: geerlingguy.repo-epel
version: 3.1.0
src: geerlingguy.repo-epel
- name: nginx
version: 2.0.0
src: git+https://github.com/example/ansible-role-nginx.git
scm: git# 安装 Galaxy 依赖
ansible-galaxy install -r requirements.yml -p roles
# 查看已安装的 Role
ansible-galaxy list -p roles
# 构建 Role 归档
ansible-galaxy role build roles/nginx
# 发布到私有 Galaxy
ansible-galaxy role publish --server https://galaxy.internal roles/nginx-1.0.0.tar.gz执行与调试
# 执行 Playbook
ansible-playbook -i inventory/prod.ini site.yml
# 只执行特定标签
ansible-playbook -i inventory/prod.ini site.yml --tags nginx
# 排除特定标签
ansible-playbook -i inventory/prod.ini site.yml --skip-tags docker
# 限制执行的主机
ansible-playbook -i inventory/prod.ini site.yml --limit web01
# 干跑模式(不实际执行,只显示变更)
ansible-playbook -i inventory/prod.ini site.yml --check
# 显示差异(干跑 + 显示文件变更)
ansible-playbook -i inventory/prod.ini site.yml --check --diff
# 详细模式
ansible-playbook -i inventory/prod.ini site.yml -v # 普通
ansible-playbook -i inventory/prod.ini site.yml -vv # 详细
ansible-playbook -i inventory/prod.ini site.yml -vvv # 更详细
ansible-playbook -i inventory/prod.ini site.yml -vvvv # 最详细
# 从特定任务开始执行
ansible-playbook -i inventory/prod.ini site.yml --start-at-task "Deploy application package"
# 覆盖变量
ansible-playbook -i inventory/prod.ini site.yml -e "nginx_enable_https=true"
# 从文件覆盖变量
ansible-playbook -i inventory/prod.ini site.yml -e @vars/override.yml
# 列出所有任务
ansible-playbook -i inventory/prod.ini site.yml --list-tasks
# 列出所有标签
ansible-playbook -i inventory/prod.ini site.yml --list-tags
# 验证语法
ansible-playbook -i inventory/prod.ini site.yml --syntax-checkAnsible Vault 加密敏感数据
# 创建加密文件
ansible-vault create group_vars/prod/vault.yml
# 编辑加密文件
ansible-vault edit group_vars/prod/vault.yml
# 加密已有文件
ansible-vault encrypt group_vars/prod/db_passwords.yml
# 解密文件
ansible-vault decrypt group_vars/prod/db_passwords.yml
# 查看加密文件内容
ansible-vault view group_vars/prod/vault.yml
# 使用密码文件执行
ansible-playbook -i inventory/prod.ini site.yml --vault-password-file ~/.ansible_vault_pass
# 使用密码脚本执行
ansible-playbook -i inventory/prod.ini site.yml --vault-password-file ~/.ansible_vault_pass.sh# group_vars/prod/vault.yml(加密文件)
db_password: "prod_db_password_123"
api_secret_key: "prod_api_secret_key_456"
redis_password: "prod_redis_password_789"
registry_password: "prod_registry_password_abc"Molecule 测试
# molecule.yml — Molecule 测试配置
dependency:
name: galaxy
driver:
name: docker
platforms:
- name: instance-centos7
image: centos:7
- name: instance-ubuntu22
image: ubuntu:22.04
provisioner:
name: ansible
inventory:
hosts:
all:
vars:
ansible_python_interpreter: /usr/bin/python3
verifier:
name: ansible# molecule/default/tests/test_default.yml
---
- name: Verify nginx role
hosts: all
become: yes
tasks:
- name: Check nginx is installed
package:
name: nginx
state: present
check_mode: yes
register: result
failed_when: result.changed
- name: Check nginx service is running
service:
name: nginx
state: started
check_mode: yes
register: result
failed_when: result.changed
- name: Check nginx config exists
stat:
path: /etc/nginx/nginx.conf
register: config
failed_when: not config.stat.exists
- name: Check nginx is listening on port 80
wait_for:
port: 80
timeout: 10# 运行 Molecule 测试
molecule test # 创建 -> 测试 -> 销毁
molecule create # 创建测试实例
molecule converge # 执行 Role
molecule verify # 运行验证
molecule destroy # 销毁测试实例
molecule lint # 代码检查优点
缺点
总结
Ansible Role 的真正价值在于把重复运维工作沉淀成稳定、可复用、可审计的模块。对于团队项目,建议优先把通用主机初始化、Web 服务部署、应用发布、日志采集、监控代理安装等能力拆成 Role,再通过 site.yml 组合出不同环境的交付流程。
关键知识点
- Role 结构不是强制语法糖,而是大型自动化项目的组织方式
defaults适合可覆盖默认值,vars更适合强约束变量- Handler 只在有变更时触发,是保证幂等与减少无效重启的关键
- Role 应以"一个明确职责"为边界,不要做成超级大杂烩
- 变量优先级:extra-vars > host_vars > group_vars > role vars > role defaults
include_tasksvsimport_tasks:前者动态加载(支持循环),后者静态加载(编译时解析)validate参数可以在部署前验证配置文件语法
项目落地视角
- 基础 Role:时区、用户、日志、SSH、安全基线、监控 Agent
- 中间件 Role:Nginx、Redis、MySQL、Docker、JDK
- 应用 Role:发布包解压、配置模板渲染、systemd 服务管理
- 环境差异通过 inventory/group_vars 管理,而不是复制整份 Playbook
- 敏感数据通过 Ansible Vault 加密管理
- 每个 Role 都应有对应的 Molecule 测试
常见误区
- 把所有任务都塞进一个 Role,导致职责不清
- 在 tasks 里硬编码 IP、路径、端口,不用变量抽象
- 忽略变量优先级,排障时不知道值到底从哪里来的
- 不做
--check和测试验证,直接把 Role 用到生产 - Handler 不使用
listen,导致多个配置变更触发多次重启 - 模板中缺少默认值,导致变量未定义时报错
- Role 之间通过隐式变量传递数据,导致耦合
进阶路线
- 学习
group_vars、host_vars与 Vault 的组合使用 - 使用 Molecule 为 Role 做自动化测试
- 把 Role 发布到私有 Galaxy 仓库统一复用
- 将 Role 纳入 CI/CD,合并前自动跑 lint 与幂等检查
- 学习 Ansible 的异步执行和策略插件提升大规模执行效率
- 研究 ansible-runner 和 AWX/Tower 的企业级管理方案
适用场景
- 多台服务器统一初始化配置
- Web / API / Worker 应用标准化部署
- 中间件安装与版本升级自动化
- 企业内部基础设施配置规范沉淀
- 多环境(dev/staging/prod)统一自动化管理
- 合规审计与安全基线自动化
落地建议
- 按职责拆分 Role:common、nginx、docker、app 等
- 明确变量命名规范,例如统一
role_name_*前缀 - 把生产敏感数据放进 Ansible Vault,不要写死在 vars 中
- 每个 Role 至少支持
--check、--diff和最小测试用例 - 为每个 Role 配置 CI 流水线:lint -> syntax-check -> molecule test
- 定期更新 Galaxy 依赖,关注安全补丁
排错清单
- 检查变量实际来源:defaults、vars、inventory 还是 extra-vars
- 用
--list-tasks、-vvv查看执行顺序和条件分支 - 检查 template 渲染结果与 handler 是否按预期触发
- 确认 Role 依赖是否已通过 Galaxy 或本地 roles 目录安装完成
- 检查 Ansible 版本是否支持所用的模块和特性
- 检查目标主机的 Python 版本和依赖
复盘问题
- 这个 Role 的职责边界是否足够清晰?
- 环境差异是否通过变量解决,而不是复制脚本?
- 失败后如何快速定位是变量问题、模板问题还是任务顺序问题?
- 当前 Role 是否已经沉淀出可复用的团队资产?
- 是否有自动化测试保证 Role 的幂等性和正确性?
