多云与混合云架构
大约 13 分钟约 3994 字
多云与混合云架构
简介
多云(Multi-Cloud)和混合云(Hybrid Cloud)架构是现代企业 IT 基础设施的重要战略选择。多云指同时使用两个或多个公有云服务提供商;混合云则将公有云与私有云/本地数据中心结合使用。这两种模式都旨在避免供应商锁定、优化成本、提高可用性和满足合规要求。
然而,多云和混合云架构也带来了显著的复杂性:跨云网络互联、身份联邦、数据一致性、运维工具统一、成本管理等。本文将系统地探讨这些挑战的解决方案,提供可操作的架构设计指导。
特点
- 供应商独立性:减少对单一云服务商的依赖
- 业务连续性:跨云灾备提高可用性
- 合规遵从:满足数据驻留和主权要求
- 成本优化:利用不同云服务商的价格优势
- 灵活部署:根据工作负载特征选择最佳平台
多云策略
多云架构模式
多云架构模式:
1. 分层模式(Layered Pattern)
┌─────────────────────────────┐
│ CDN / Edge │ CloudFlare / Akamai
├─────────────────────────────┤
│ API Gateway / LB │ Azure Front Door / AWS ALB
├──────────┬──────────────────┤
│ Azure │ AWS │
│ App Svc │ ECS/Fargate │ 应用层
├──────────┼──────────────────┤
│ Azure DB │ RDS/Aurora │ 数据库层
└──────────┴──────────────────┘
2. 活跃-活跃模式(Active-Active)
┌──────────────┐ ┌──────────────┐
│ Azure │ │ AWS │
│ Region A │ │ Region B │
│ (Active) │ │ (Active) │
└──────┬───────┘ └──────┬───────┘
│ 全局负载均衡 │
└────────┬─────────┘
│
┌────┴────┐
│ Users │
└─────────┘
3. 主备模式(Active-Passive)
┌──────────────┐ ┌──────────────┐
│ Azure │ │ AWS │
│ (Primary) │ │ (Standby) │
│ Active │──│ Warm │
└──────────────┘ └──────────────┘
4. 分区模式(Partitioned Pattern)
┌──────────────┐ ┌──────────────┐
│ Azure │ │ AWS │
│ 亚太业务 │ │ 北美业务 │
│ Region: EA │ │ Region: US │
└──────────────┘ └──────────────┘多云决策框架
/// <summary>
/// 多云工作负载放置决策引擎
/// </summary>
public class WorkloadPlacementEngine
{
private readonly List<CloudProviderProfile> _providers;
public WorkloadPlacementEngine(List<CloudProviderProfile> providers)
{
_providers = providers;
}
public PlacementRecommendation Recommend(WorkloadProfile workload)
{
var scores = new List<ProviderScore>();
foreach (var provider in _providers)
{
double score = CalculateScore(workload, provider);
scores.Add(new ProviderScore
{
Provider = provider.Name,
Score = score,
Reasons = GetReasons(workload, provider)
});
}
var best = scores.OrderByDescending(s => s.Score).First();
return new PlacementRecommendation
{
RecommendedProvider = best.Provider,
Score = best.Score,
Reasons = best.Reasons,
Alternatives = scores
.Where(s => s.Provider != best.Provider)
.OrderByDescending(s => s.Score)
.ToList()
};
}
private double CalculateScore(WorkloadProfile workload, CloudProviderProfile provider)
{
double score = 0;
// 延迟要求(30% 权重)
if (workload.MaxLatencyMs.HasValue)
{
double latency = provider.GetEstimatedLatency(workload.PrimaryRegion);
if (latency <= workload.MaxLatencyMs.Value)
score += 30;
else
score += 30 * (workload.MaxLatencyMs.Value / latency);
}
// 成本(25% 权重)
double cost = provider.EstimateMonthlyCost(workload);
double baselineCost = _providers.Min(p => p.EstimateMonthlyCost(workload));
score += 25 * (baselineCost / cost);
// 合规要求(25% 权重)
if (workload.ComplianceRequirements.All(
req => provider.SupportedCompliance.Contains(req)))
score += 25;
// 服务可用性(20% 权重)
double serviceMatch = (double)workload.RequiredServices
.Count(s => provider.AvailableServices.Contains(s))
/ workload.RequiredServices.Count;
score += 20 * serviceMatch;
return score;
}
private List<string> GetReasons(
WorkloadProfile workload, CloudProviderProfile provider)
{
var reasons = new List<string>();
// ... 决策理由生成逻辑
return reasons;
}
}
public record WorkloadProfile(
string Name,
string PrimaryRegion,
double? MaxLatencyMs,
List<string> ComplianceRequirements,
List<string> RequiredServices,
double ComputeCores,
double StorageTB);
public record CloudProviderProfile(
string Name,
List<string> AvailableRegions,
List<string> AvailableServices,
List<string> SupportedCompliance);
public class ProviderScore
{
public string Provider { get; set; } = "";
public double Score { get; set; }
public List<string> Reasons { get; set; } = new();
}
public class PlacementRecommendation
{
public string RecommendedProvider { get; set; } = "";
public double Score { get; set; }
public List<string> Reasons { get; set; } = new();
public List<ProviderScore> Alternatives { get; set; } = new();
}Terraform 多云抽象
Terraform 多云项目结构
terraform/
├── modules/
│ ├── networking/
│ │ ├── azure/
│ │ │ ├── main.tf
│ │ │ ├── variables.tf
│ │ │ └── outputs.tf
│ │ ├── aws/
│ │ │ ├── main.tf
│ │ │ ├── variables.tf
│ │ │ └── outputs.tf
│ │ └── gcp/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── outputs.tf
│ ├── compute/
│ │ ├── azure/
│ │ │ ├── main.tf
│ │ │ └── variables.tf
│ │ └── aws/
│ │ ├── main.tf
│ │ └── variables.tf
│ ├── database/
│ │ ├── azure/
│ │ └── aws/
│ └── monitoring/
│ ├── azure/
│ └── aws/
├── environments/
│ ├── production/
│ │ ├── azure/
│ │ │ ├── main.tf
│ │ │ ├── backend.tf
│ │ │ └── terraform.tfvars
│ │ └── aws/
│ │ ├── main.tf
│ │ ├── backend.tf
│ │ └── terraform.tfvars
│ └── staging/
│ ├── azure/
│ └── aws/
└── shared/
├── remote-state/
└── providers.tfTerraform 统一网络模块
# modules/networking/azure/main.tf
variable "resource_group_name" { type = string }
variable "location" { type = string }
variable "vnet_cidr" { type = string }
variable "environment" { type = string }
variable "subnets" {
type = map(object({
cidr = string
service_endpoints = list(string)
}))
}
resource "azurerm_virtual_network" "main" {
name = "vnet-${var.environment}"
location = var.location
resource_group_name = var.resource_group_name
address_space = [var.vnet_cidr]
tags = { Environment = var.environment }
}
resource "azurerm_subnet" "subnets" {
for_each = var.subnets
name = "snet-${each.key}-${var.environment}"
resource_group_name = var.resource_group_name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = [each.value.cidr]
service_endpoints = each.value.service_endpoints
}
output "vnet_id" {
value = azurerm_virtual_network.main.id
}
output "subnet_ids" {
value = { for k, v in azurerm_subnet.subnets : k => v.id }
}# modules/networking/aws/main.tf
variable "vpc_cidr" { type = string }
variable "environment" { type = string }
variable "availability_zones" { type = list(string) }
variable "subnets" {
type = map(object({
cidr = string
type = string # public, private, isolated
}))
}
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
tags = {
Name = "vpc-${var.environment}"
Environment = var.environment
}
}
resource "aws_subnet" "subnets" {
for_each = var.subnets
vpc_id = aws_vpc.main.id
cidr_block = each.value.cidr
availability_zone = index(var.availability_zones) < length(var.availability_zones)
? var.availability_zones[index(keys(var.subnets), each.key) % length(var.availability_zones)]
: var.availability_zones[0]
tags = {
Name = "subnet-${each.key}-${var.environment}"
}
}
output "vpc_id" {
value = aws_vpc.main.id
}
output "subnet_ids" {
value = { for k, v in aws_subnet.subnets : k => v.id }
}环境配置
# environments/production/azure/main.tf
terraform {
backend "azurerm" {
resource_group_name = "rg-terraform-state"
storage_account_name = "sttfstateprod"
container_name = "tfstate"
key = "azure-production.tfstate"
}
}
provider "azurerm" {
features {}
subscription_id = var.azure_subscription_id
}
module "networking" {
source = "../../../modules/networking/azure"
resource_group_name = azurerm_resource_group.main.name
location = var.location
environment = "production"
vnet_cidr = "10.0.0.0/16"
subnets = {
app = { cidr = "10.0.1.0/24", service_endpoints = ["Microsoft.Sql", "Microsoft.Storage"] }
data = { cidr = "10.0.2.0/24", service_endpoints = ["Microsoft.Sql"] }
mgmt = { cidr = "10.0.3.0/24", service_endpoints = [] }
}
}
variable "azure_subscription_id" {
type = string
sensitive = true
}
variable "location" {
default = "East Asia"
}# environments/production/aws/main.tf
terraform {
backend "s3" {
bucket = "terraform-state-prod"
key = "aws-production.tfstate"
region = "ap-northeast-1"
}
}
provider "aws" {
region = var.aws_region
}
module "networking" {
source = "../../../modules/networking/aws"
environment = "production"
vpc_cidr = "10.1.0.0/16"
availability_zones = ["ap-northeast-1a", "ap-northeast-1c"]
subnets = {
app-public-1 = { cidr = "10.1.1.0/24", type = "public" }
app-public-2 = { cidr = "10.1.2.0/24", type = "public" }
app-private-1 = { cidr = "10.1.3.0/24", type = "private" }
app-private-2 = { cidr = "10.1.4.0/24", type = "private" }
data-1 = { cidr = "10.1.5.0/24", type = "isolated" }
data-2 = { cidr = "10.1.6.0/24", type = "isolated" }
}
}
variable "aws_region" {
default = "ap-northeast-1"
}数据驻留与合规
数据分类与放置策略
数据分类与云放置策略:
数据敏感级别:
┌────────────────────────────────────────────────┐
│ Level 4: 绝密(Top Secret) │
│ → 私有云 / 本地数据中心 │
│ → 加密存储 + 严格访问控制 │
│ → 例如:金融核心数据、医疗记录 │
├────────────────────────────────────────────────┤
│ Level 3: 机密(Confidential) │
│ → 满足合规的云区域 │
│ → 客户管理的加密密钥(CMK) │
│ → 例如:个人身份信息(PII)、财务数据 │
├────────────────────────────────────────────────┤
│ Level 2: 内部(Internal) │
│ → 任意合规的公有云区域 │
│ → 平台默认加密 │
│ → 例如:内部文档、运营数据 │
├────────────────────────────────────────────────┤
│ Level 1: 公开(Public) │
│ → CDN / 边缘节点 │
│ → 无特殊加密要求 │
│ → 例如:产品文档、公开 API 响应 │
└────────────────────────────────────────────────┘/// <summary>
/// 数据驻留策略引擎
/// </summary>
public class DataResidencyEngine
{
private readonly Dictionary<string, DataResidencyPolicy> _policies;
public DataResidencyEngine()
{
_policies = new Dictionary<string, DataResidencyPolicy>
{
["GDPR"] = new DataResidencyPolicy
{
AllowedRegions = new[] { "EU West", "EU North", "EU Central" },
BlockedRegions = new[] { "China East", "China North" },
RequiresEncryption = true,
RequiresCustomerKey = true,
RetentionDays = 365
},
["CCPA"] = new DataResidencyPolicy
{
AllowedRegions = new[] { "US West", "US East", "US Central" },
BlockedRegions = Array.Empty<string>(),
RequiresEncryption = true,
RequiresCustomerKey = false,
RetentionDays = 730
},
["PIPL"] = new DataResidencyPolicy
{
AllowedRegions = new[] { "China East", "China North" },
BlockedRegions = new[] { "US West", "EU West" },
RequiresEncryption = true,
RequiresCustomerKey = true,
RetentionDays = 1825 // 5年
}
};
}
public PlacementDecision Evaluate(DataPlacementRequest request)
{
var violations = new List<string>();
var allowedClouds = new List<CloudRegion>();
foreach (var regulation in request.ApplicableRegulations)
{
if (!_policies.TryGetValue(regulation, out var policy))
continue;
// 检查目标区域是否合规
foreach (var targetRegion in request.TargetRegions)
{
if (policy.BlockedRegions.Contains(targetRegion.RegionName))
{
violations.Add(
$"法规 {regulation} 禁止数据存储在 {targetRegion.RegionName}");
}
else if (policy.AllowedRegions.Contains(targetRegion.RegionName))
{
allowedClouds.Add(targetRegion);
}
}
}
return new PlacementDecision
{
IsCompliant = !violations.Any(),
AllowedRegions = allowedClouds.Distinct().ToList(),
Violations = violations,
RequiresEncryption = request.ApplicableRegulations
.Any(r => _policies.ContainsKey(r) && _policies[r].RequiresEncryption),
RequiresCustomerManagedKey = request.ApplicableRegulations
.Any(r => _policies.ContainsKey(r) && _policies[r].RequiresCustomerKey)
};
}
}
public record DataResidencyPolicy(
string[] AllowedRegions,
string[] BlockedRegions,
bool RequiresEncryption,
bool RequiresCustomerKey,
int RetentionDays);
public record DataPlacementRequest(
string DataType,
List<string> ApplicableRegulations,
List<CloudRegion> TargetRegions);
public record CloudRegion(string Provider, string RegionName);
public class PlacementDecision
{
public bool IsCompliant { get; set; }
public List<CloudRegion> AllowedRegions { get; set; } = new();
public List<string> Violations { get; set; } = new();
public bool RequiresEncryption { get; set; }
public bool RequiresCustomerManagedKey { get; set; }
}跨云灾备
灾备架构设计
跨云灾备架构:
方案1:Azure 主 + AWS 备(Warm Standby)
┌─────────────────────┐ ┌─────────────────────┐
│ Azure │ │ AWS │
│ ┌───────────┐ │ │ ┌───────────┐ │
│ │ App Svc │ │ │ │ ECS │ │
│ │ (Active) │ │ │ │ (Standby) │ │
│ └─────┬─────┘ │ │ └─────┬─────┘ │
│ ┌─────┴─────┐ │ │ ┌─────┴─────┐ │
│ │ Cosmos DB │◄─────┼─────┼─►│ DynamoDB │ │
│ │ (Active) │ CDC │ │ │ (Standby) │ │
│ └───────────┘ │ │ └───────────┘ │
└─────────────────────┘ └─────────────────────┘
│ │
└───────────┬───────────────┘
│
┌──────┴──────┐
│ Azure Front │
│ Door │
│ (DNS Failover)│
└─────────────┘
方案2:DNS 层面故障切换
┌──────────────────┐
│ Route53 / │
│ Azure DNS │ DNS 层故障切换(TTL=60s)
│ Health Check │
└────────┬─────────┘
│
┌────┴────┐
│ │
┌──┴──┐ ┌──┴──┐
│Azure│ │ AWS │
│Active│ │Standby│
└─────┘ └─────┘/// <summary>
/// 跨云灾备故障切换管理器
/// </summary>
public class CrossCloudFailoverManager
{
private readonly ILogger _logger;
private readonly List<CloudEndpoint> _endpoints;
private CloudEndpoint? _activeEndpoint;
public CrossCloudFailoverManager(
ILogger<CrossCloudFailoverManager> logger,
List<CloudEndpoint> endpoints)
{
_logger = logger;
_endpoints = endpoints;
_activeEndpoint = endpoints.FirstOrDefault(e => e.IsPrimary);
}
public async Task<FailoverResult> FailoverAsync(
string reason, bool requireApproval = true)
{
_logger.LogWarning("发起故障切换,原因: {Reason}", reason);
var targetEndpoint = _endpoints
.FirstOrDefault(e => e != _activeEndpoint && e.IsHealthy);
if (targetEndpoint == null)
{
return new FailoverResult
{
Success = false,
Message = "没有健康的备用端点"
};
}
// 1. 验证备用环境就绪
bool isHealthy = await HealthCheck(targetEndpoint);
if (!isHealthy)
{
return new FailoverResult
{
Success = false,
Message = $"备用端点 {targetEndpoint.Name} 健康检查失败"
};
}
// 2. 停止数据同步
_logger.LogInformation("停止数据同步...");
// 3. 更新 DNS 记录
await UpdateDnsRecord(targetEndpoint);
// 4. 激活备用环境
await ActivateEndpoint(targetEndpoint);
// 5. 验证切换成功
bool verified = await VerifyFailover(targetEndpoint);
var previousActive = _activeEndpoint;
_activeEndpoint = targetEndpoint;
return new FailoverResult
{
Success = verified,
PreviousActive = previousActive?.Name,
NewActive = targetEndpoint.Name,
SwitchedAt = DateTime.UtcNow,
Message = verified ? "故障切换成功" : "故障切换后验证失败"
};
}
private async Task<bool> HealthCheck(CloudEndpoint endpoint)
{
try
{
using var client = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
var response = await client.GetAsync($"{endpoint.Url}/health");
return response.IsSuccessStatusCode;
}
catch
{
return false;
}
}
private async Task UpdateDnsRecord(CloudEndpoint target)
{
// 使用 Azure DNS 或 Route53 API 更新 DNS
_logger.LogInformation("更新 DNS 记录指向 {Endpoint}", target.Url);
await Task.Delay(1000); // 模拟 DNS 更新
}
private async Task ActivateEndpoint(CloudEndpoint endpoint)
{
// 激活备用环境(扩容、启动服务等)
_logger.LogInformation("激活 {Endpoint}", endpoint.Name);
await Task.Delay(1000);
}
private async Task<bool> VerifyFailover(CloudEndpoint endpoint)
{
// 验证故障切换后服务正常
await Task.Delay(5000); // 等待 DNS 传播
return await HealthCheck(endpoint);
}
}
public class CloudEndpoint
{
public string Name { get; set; } = "";
public string Provider { get; set; } = "";
public string Url { get; set; } = "";
public bool IsPrimary { get; set; }
public bool IsHealthy { get; set; } = true;
}
public class FailoverResult
{
public bool Success { get; set; }
public string? PreviousActive { get; set; }
public string? NewActive { get; set; }
public DateTime SwitchedAt { get; set; }
public string Message { get; set; } = "";
}成本优化
跨云成本管理
/// <summary>
/// 多云成本分析和优化引擎
/// </summary>
public class MultiCloudCostOptimizer
{
public CostAnalysis AnalyzeCosts(List<CloudCost> costs)
{
var analysis = new CostAnalysis
{
TotalMonthlyCost = costs.Sum(c => c.MonthlyCost),
ByProvider = costs.GroupBy(c => c.Provider)
.ToDictionary(
g => g.Key,
g => g.Sum(c => c.MonthlyCost)),
ByService = costs.GroupBy(c => c.ServiceType)
.ToDictionary(
g => g.Key,
g => g.Sum(c => c.MonthlyCost))
};
// 检测成本异常
foreach (var providerCost in analysis.ByProvider)
{
// 简化示例:实际应与历史数据对比
if (providerCost.Value > 10000)
{
analysis.OptimizationOpportunities.Add(
new OptimizationOpportunity
{
Provider = providerCost.Key,
Category = "成本预警",
Description = $"{providerCost.Key} 月度费用超过 $10,000",
EstimatedSaving = providerCost.Value * 0.1m,
Effort = "Low"
});
}
}
// 预留实例建议
var computeCosts = costs.Where(c => c.ServiceType == "Compute");
foreach (var compute in computeCosts)
{
if (compute.UsagePattern == "Steady" && compute.MonthlyCost > 500)
{
analysis.OptimizationOpportunities.Add(
new OptimizationOpportunity
{
Provider = compute.Provider,
Category = "预留实例",
Description = $"{compute.ResourceName} 使用模式稳定,建议购买预留实例",
EstimatedSaving = compute.MonthlyCost * 0.3m,
Effort = "Medium"
});
}
}
return analysis;
}
public List<PricingComparison> ComparePricing(
string serviceType,
string region,
string specification)
{
// 简化示例:实际应调用各云 API 获取实时价格
return new List<PricingComparison>
{
new()
{
Provider = "Azure",
Service = "D4s v5",
HourlyPrice = 0.192m,
MonthlyPrice = 0.192m * 730,
Region = region
},
new()
{
Provider = "AWS",
Service = "r5.xlarge",
HourlyPrice = 0.252m,
MonthlyPrice = 0.252m * 730,
Region = region
},
new()
{
Provider = "GCP",
Service = "n2-standard-4",
HourlyPrice = 0.1998m,
MonthlyPrice = 0.1998m * 730,
Region = region
}
};
}
}
public record CloudCost(
string Provider,
string ResourceName,
string ServiceType,
decimal MonthlyCost,
string UsagePattern);
public class CostAnalysis
{
public decimal TotalMonthlyCost { get; set; }
public Dictionary<string, decimal> ByProvider { get; set; } = new();
public Dictionary<string, decimal> ByService { get; set; } = new();
public List<OptimizationOpportunity> OptimizationOpportunities { get; set; } = new();
}
public class OptimizationOpportunity
{
public string Provider { get; set; } = "";
public string Category { get; set; } = "";
public string Description { get; set; } = "";
public decimal EstimatedSaving { get; set; }
public string Effort { get; set; } = "";
}
public class PricingComparison
{
public string Provider { get; set; } = "";
public string Service { get; set; } = "";
public decimal HourlyPrice { get; set; }
public decimal MonthlyPrice { get; set; }
public string Region { get; set; } = "";
}供应商锁定缓解
抽象层设计
/// <summary>
/// 云服务抽象接口
/// 通过接口抽象减少对特定云服务商的依赖
/// </summary>
public interface ICloudStorage
{
Task<string> UploadAsync(string container, string blobName, Stream data);
Task<Stream> DownloadAsync(string container, string blobName);
Task DeleteAsync(string container, string blobName);
Task<bool> ExistsAsync(string container, string blobName);
Task<string> GetSignedUrlAsync(string container, string blobName, TimeSpan expiry);
}
public interface ICloudQueue
{
Task SendMessageAsync(string queueName, string message, TimeSpan? visibilityDelay = null);
Task<QueueMessage?> ReceiveMessageAsync(string queueName, TimeSpan visibilityTimeout);
Task CompleteMessageAsync(string queueName, string messageId);
Task DeadLetterMessageAsync(string queueName, string messageId, string reason);
}
public interface ICloudDatabase
{
Task<T?> GetByIdAsync<T>(string table, string id);
Task UpsertAsync<T>(string table, T entity);
Task DeleteAsync(string table, string id);
Task<IAsyncEnumerable<T>> QueryAsync<T>(string table, string query);
}
// Azure 实现
public class AzureStorageService : ICloudStorage
{
private readonly BlobServiceClient _blobServiceClient;
public AzureStorageService(BlobServiceClient blobServiceClient)
{
_blobServiceClient = blobServiceClient;
}
public async Task<string> UploadAsync(string container, string blobName, Stream data)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(container);
await containerClient.CreateIfNotExistsAsync();
var blobClient = containerClient.GetBlobClient(blobName);
await blobClient.UploadAsync(data, overwrite: true);
return blobClient.Uri.ToString();
}
public async Task<Stream> DownloadAsync(string container, string blobName)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(container);
var blobClient = containerClient.GetBlobClient(blobName);
var response = await blobClient.DownloadContentAsync();
return response.Value.Content.ToStream();
}
public async Task DeleteAsync(string container, string blobName)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(container);
await containerClient.GetBlobClient(blobName).DeleteIfExistsAsync();
}
public async Task<bool> ExistsAsync(string container, string blobName)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(container);
return await containerClient.GetBlobClient(blobName).ExistsAsync();
}
public async Task<string> GetSignedUrlAsync(
string container, string blobName, TimeSpan expiry)
{
var containerClient = _blobServiceClient.GetBlobContainerClient(container);
var blobClient = containerClient.GetBlobClient(blobName);
var sasBuilder = new BlobSasBuilder(BlobSasPermissions.Read, expiry)
{
BlobContainerName = container,
BlobName = blobName
};
return blobClient.GenerateSasUri(sasBuilder).ToString();
}
}
// AWS 实现
public class AwsStorageService : ICloudStorage
{
private readonly IAmazonS3 _s3Client;
public AwsStorageService(IAmazonS3 s3Client)
{
_s3Client = s3Client;
}
public async Task<string> UploadAsync(string container, string blobName, Stream data)
{
var request = new PutObjectRequest
{
BucketName = container,
Key = blobName,
InputStream = data
};
await _s3Client.PutObjectAsync(request);
return $"https://{container}.s3.amazonaws.com/{blobName}";
}
public async Task<Stream> DownloadAsync(string container, string blobName)
{
var request = new GetObjectRequest
{
BucketName = container,
Key = blobName
};
var response = await _s3Client.GetObjectAsync(request);
return response.ResponseStream;
}
public async Task DeleteAsync(string container, string blobName)
{
await _s3Client.DeleteObjectAsync(container, blobName);
}
public async Task<bool> ExistsAsync(string container, string blobName)
{
try
{
await _s3Client.GetObjectMetadataAsync(container, blobName);
return true;
}
catch (AmazonS3Exception ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound)
{
return false;
}
}
public async Task<string> GetSignedUrlAsync(
string container, string blobName, TimeSpan expiry)
{
var request = new GetPreSignedUrlRequest
{
BucketName = container,
Key = blobName,
Expires = DateTime.UtcNow.Add(expiry)
};
return await _s3Client.GetPreSignedURLAsync(request);
}
}DI 注册
// 根据配置选择云服务实现
public static class CloudServiceExtensions
{
public static IServiceCollection AddCloudServices(
this IServiceCollection services,
IConfiguration configuration)
{
string provider = configuration["CloudProvider"] ?? "Azure";
switch (provider)
{
case "Azure":
services.AddSingleton<ICloudStorage, AzureStorageService>();
services.AddSingleton<ICloudQueue, AzureQueueService>();
services.AddSingleton<ICloudDatabase, AzureCosmosService>();
break;
case "AWS":
services.AddSingleton<ICloudStorage, AwsStorageService>();
services.AddSingleton<ICloudQueue, AwsSqsService>();
services.AddSingleton<ICloudDatabase, AwsDynamoDbService>();
break;
default:
throw new InvalidOperationException($"不支持的云服务商: {provider}");
}
return services;
}
}混合云模式
本地到云的连接架构
混合云连接方案:
方案1:VPN 连接
┌──────────────────┐ ┌──────────────────┐
│ 本地数据中心 │ │ 公有云 │
│ │ │ │
│ ┌────────┐ │ VPN │ ┌────────┐ │
│ │ 防火墙 │◄─────┼────────►│ │ VPC │ │
│ └───┬────┘ │ Tunnel │ └───┬────┘ │
│ │ │ │ │ │
│ ┌───┴────┐ │ │ ┌───┴────┐ │
│ │ 内部网 │ │ │ │ 子网 │ │
│ └────────┘ │ │ └────────┘ │
└──────────────────┘ └──────────────────┘
方案2:ExpressRoute / Direct Connect
┌──────────────────┐ ┌──────────────────┐
│ 本地数据中心 │ │ 公有云 │
│ │ │ │
│ ┌────────┐ │ Express │ ┌────────┐ │
│ │ Router │◄─────┼─Route──►│ │GW/ER │ │
│ └───┬────┘ │ │ └───┬────┘ │
│ │ │ │ │ │
│ ┌───┴────┐ │ │ ┌───┴────┐ │
│ │ 内部网 │ │ │ │ 子网 │ │
│ └────────┘ │ │ └────────┘ │
└──────────────────┘ └──────────────────┘
私有对等连接 私有网络混合云身份联邦
/// <summary>
/// 跨云身份联邦认证
/// 使用 OpenID Connect / SAML 联合认证
/// </summary>
public static class IdentityFederationExtensions
{
public static WebApplicationBuilder AddCloudIdentityFederation(
this WebApplicationBuilder builder)
{
var identityConfig = builder.Configuration
.GetSection("IdentityFederation").Get<IdentityFederationConfig>();
// 配置 OIDC 认证
builder.Services.AddAuthentication()
.AddOpenIdConnect("AzureAD", options =>
{
options.Authority = identityConfig!.AzureAD.Authority;
options.ClientId = identityConfig.AzureAD.ClientId;
options.ClientSecret = identityConfig.AzureAD.ClientSecret;
options.ResponseType = "code";
options.SaveTokens = true;
})
.AddOpenIdConnect("AWSIdentityCenter", options =>
{
options.Authority = identityConfig!.AWS.Authority;
options.ClientId = identityConfig.AWS.ClientId;
options.ClientSecret = identityConfig.AWS.ClientSecret;
options.ResponseType = "code";
options.SaveTokens = true;
});
// 配置策略允许两种认证方式
builder.Services.AddAuthorization(options =>
{
options.AddPolicy("CloudAccess", policy =>
{
policy.RequireAuthenticatedUser();
policy.AuthenticationSchemes =
new[] { "AzureAD", "AWSIdentityCenter" };
});
});
return builder;
}
}
public class IdentityFederationConfig
{
public OidcConfig AzureAD { get; set; } = new();
public OidcConfig AWS { get; set; } = new();
}
public class OidcConfig
{
public string Authority { get; set; } = "";
public string ClientId { get; set; } = "";
public string ClientSecret { get; set; } = "";
}连接性管理
# Azure VPN Gateway 连接到 AWS
# azure-vpn-gateway.tf
resource "azurerm_virtual_network_gateway" "vpn" {
name = "vnet-gw-hybrid"
location = var.location
resource_group_name = var.resource_group_name
type = "VPN"
vpn_type = "RouteBased"
sku = "VpnGw1AZ"
enable_bgp = true
ip_configuration {
name = "vnetGatewayConfig"
public_ip_address_id = azurerm_public_ip.vpn.id
private_ip_address_allocation = "Dynamic"
subnet_id = var.gateway_subnet_id
}
bgp_settings {
asn = 65001
peering_addresses {
apipa_addresses = ["169.254.21.1"]
}
}
}
# AWS VPN Connection
# aws-vpn-connection.tf
resource "aws_vpn_gateway" "vpn_gw" {
vpc_id = var.vpc_id
tags = { Name = "hybrid-vpn-gw" }
}
resource "aws_customer_gateway" "azure" {
bgp_asn = 65001
ip_address = var.azure_vpn_public_ip
type = "ipsec.1"
tags = { Name = "azure-cgw" }
}
resource "aws_vpn_connection" "to_azure" {
vpn_gateway_id = aws_vpn_gateway.vpn_gw.id
customer_gateway_id = aws_customer_gateway.azure.id
type = "ipsec.1"
static_routes_only = false
tunnel1_preshared_key = var.vpn_preshared_key
}总结
多云和混合云架构为企业提供了灵活性、可用性和成本优化的机会,但也增加了运维复杂性。成功的关键在于:统一的抽象层、自动化的基础设施管理、清晰的故障切换策略和完善的成本控制。
关键知识点
- 多云策略包括分层、活跃-活跃、主备和分区四种模式
- Terraform 是多云基础设施管理的首选工具
- 数据驻留策略必须考虑各国的合规法规(GDPR、CCPA、PIPL)
- 云服务抽象接口减少供应商锁定风险
- 身份联邦实现跨云统一认证
- VPN/ExpressRoute/Direct Connect 实现混合云网络互联
常见误区
误区1:多云总是比单云好
多云增加了运维复杂度和成本。如果业务没有明确的多云需求(合规、灾备、成本),单云策略可能更合适。
误区2:抽象层可以完全屏蔽云差异
不同云的服务能力和 API 语义存在本质差异。过度抽象可能导致无法利用云特有功能或增加不必要的复杂度。
误区3:跨云灾备只需要数据复制
灾备不仅需要数据复制,还需要应用部署、DNS 切换、健康检查和团队演练。
误区4:Terraform 可以解决所有多云问题
Terraform 管理基础设施,但应用层的差异(SDK、服务集成)需要通过代码抽象来解决。
进阶路线
- 云原生标准化:Kubernetes 和容器化作为多云统一运行时
- 服务网格:Istio/Linkerd 跨云服务通信
- GitOps:ArgoCD/Flux 跨云持续部署
- FinOps:云成本优化专业实践
- 零信任网络:跨云零信任安全架构
适用场景
- 跨国企业的数据合规要求
- 金融行业的高可用灾备
- 企业从本地数据中心迁移到云的过渡阶段
- 大型企业的供应商风险分散策略
- 利用不同云的特有服务优势
落地建议
- 从一个云开始,逐步扩展到多云
- 建立统一的云抽象层,但不要过度设计
- 使用 Terraform 管理所有云基础设施
- 建立跨云的监控和告警体系
- 定期进行跨云灾备演练
- 建立统一的成本管理和优化流程
- 确保团队具备多云运维能力
排错清单
复盘问题
- 你的多云策略的主要驱动力是什么?(合规、灾备、成本?)
- 你的跨云灾备 RTO/RPO 目标是多少?上次演练是什么时候?
- 你的云抽象层覆盖了哪些服务?是否有过度抽象的问题?
- 你的多云月度成本是多少?是否有优化空间?
- 团队是否具备多云运维能力?
延伸阅读
- Terraform Multi-Cloud Documentation
- Azure Hybrid Connection
- AWS Hybrid Cloud
- Cloud Security Alliance
- 《Multi-Cloud Architecture and Governance》- Jeroen Mulder
- 《Cloud Strategy》- Gregor Hohpe
