TimescaleDB 时序数据库
大约 12 分钟约 3467 字
TimescaleDB 时序数据库
简介
TimescaleDB 是基于 PostgreSQL 构建的开源时序数据库,它通过自动分区(Hypertable)将时序数据按时间和空间维度拆分,同时保留了 PostgreSQL 的完整功能。TimescaleDB 特别适合 IoT 设备监控、应用性能监控、金融数据分析、业务指标追踪等场景,在保留关系型数据库灵活性的同时,提供了时序数据的高效存储和查询能力。
特点
Hypertable 超级表
创建与使用 Hypertable
-- TimescaleDB 基于 PostgreSQL 扩展
-- 安装扩展
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- 1. 创建普通表
CREATE TABLE sensor_readings (
time TIMESTAMPTZ NOT NULL, -- 时间列(必须包含)
sensor_id INTEGER NOT NULL, -- 设备 ID
location TEXT NOT NULL, -- 位置
temperature DOUBLE PRECISION, -- 温度
humidity DOUBLE PRECISION, -- 湿度
pressure DOUBLE PRECISION, -- 气压
battery DOUBLE PRECISION -- 电池电量
);
-- 2. 转换为 Hypertable
-- 按时间列分区,可选按空间列分区
SELECT create_hypertable(
'sensor_readings',
'time', -- 时间分区列
'sensor_id', -- 空间分区列(可选)
3, -- 空间分区数
chunk_time_interval => INTERVAL '1 day' -- 每个 Chunk 的时间跨度
);
-- 3. 插入数据(与普通 PostgreSQL 表完全一样)
INSERT INTO sensor_readings (time, sensor_id, location, temperature, humidity, pressure, battery)
VALUES
('2026-04-12 10:00:00+08', 1, '北京-海淀区', 25.5, 60.0, 1013.2, 95.0),
('2026-04-12 10:00:00+08', 2, '北京-朝阳区', 24.8, 58.5, 1013.0, 88.0),
('2026-04-12 10:00:00+08', 3, '上海-浦东', 27.1, 65.2, 1012.5, 92.0),
('2026-04-12 10:05:00+08', 1, '北京-海淀区', 25.7, 59.8, 1013.1, 94.8),
('2026-04-12 10:05:00+08', 2, '北京-朝阳区', 24.6, 58.2, 1012.9, 87.5),
('2026-04-12 10:05:00+08', 3, '上海-浦东', 27.3, 65.0, 1012.4, 91.5);
-- 4. 批量插入(推荐使用 COPY 或批量 INSERT 提升性能)
INSERT INTO sensor_readings (time, sensor_id, location, temperature, humidity)
SELECT
time + (n || ' minutes')::INTERVAL,
(random() * 10)::INT + 1,
'Location-' || ((random() * 5)::INT + 1),
20 + random() * 15,
50 + random() * 30
FROM generate_series('2026-04-01'::TIMESTAMPTZ, '2026-04-12'::TIMESTAMPTZ, '5 minutes') AS time,
generate_series(1, 100) AS n;
-- 5. 查看 Hypertable 信息
SELECT * FROM timescaledb_information.hypertables;
-- 查看 Chunk 信息
SELECT
hypertable_name,
chunk_name,
range_start,
range_end,
is_compressed
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_readings'
ORDER BY range_start DESC;Hypertable 索引与优化
-- 1. 创建索引
-- Hypertable 自动为时间列创建索引
-- 可以为查询模式创建额外索引
-- 按设备+时间查询的索引
CREATE INDEX idx_sensor_time ON sensor_readings (sensor_id, time DESC);
-- 按位置查询的索引
CREATE INDEX idx_location ON sensor_readings (location, time DESC);
-- 2. Chunk 压缩(节省存储空间)
-- 启用压缩
ALTER TABLE sensor_readings SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'sensor_id, location',
timescaledb.compress_orderby = 'time DESC'
);
-- 手动压缩 7 天前的 Chunk
SELECT compress_chunk(
show_chunks('sensor_readings', older_than => INTERVAL '7 days')
);
-- 自动压缩策略(压缩 7 天前的数据)
SELECT add_compression_policy(
'sensor_readings',
INTERVAL '7 days'
);
-- 压缩效果
-- | 指标 | 压缩前 | 压缩后 | 压缩比 |
-- |----------|----------|----------|---------|
-- | 磁盘占用 | 10 GB | 1.5 GB | ~85% 节省 |
-- | 查询性能 | 基准 | 略慢(解压) | 可接受 |
-- 3. 分区策略选择
-- chunk_time_interval 设置建议
-- | 数据频率 | 推荐间隔 | 说明 |
-- |-------------|-----------|-------------------|
-- | 每秒级 | 1-6 小时 | 高频采集数据 |
-- | 每分钟级 | 1 天 | IoT 传感器数据 |
-- | 每小时级 | 1 周 | 业务指标数据 |
-- | 每天级 | 1 月 | 日报/统计数据 |
-- 修改 Chunk 时间间隔
SELECT set_chunk_time_interval('sensor_readings', INTERVAL '6 hours');连续聚合(Continuous Aggregates)
实时预聚合
-- 连续聚合:自动维护的物化视图,专为时序数据设计
-- 当新数据插入时,自动更新聚合结果
-- 1. 创建小时级聚合
CREATE MATERIALIZED VIEW sensor_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket, -- 按小时分桶
sensor_id,
location,
AVG(temperature) AS avg_temperature,
MAX(temperature) AS max_temperature,
MIN(temperature) AS min_temperature,
AVG(humidity) AS avg_humidity,
COUNT(*) AS reading_count
FROM sensor_readings
GROUP BY bucket, sensor_id, location;
-- 查询小时级聚合
SELECT
bucket,
sensor_id,
avg_temperature,
max_temperature,
min_temperature,
reading_count
FROM sensor_hourly
WHERE sensor_id = 1
AND bucket >= '2026-04-12 00:00:00+08'
ORDER BY bucket;
-- 2. 创建日级聚合(基于小时聚合)
CREATE MATERIALIZED VIEW sensor_daily
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', bucket) AS day,
sensor_id,
location,
AVG(avg_temperature) AS avg_temperature,
MAX(max_temperature) AS max_temperature,
MIN(min_temperature) AS min_temperature,
SUM(reading_count) AS total_readings
FROM sensor_hourly
GROUP BY day, sensor_id, location;
-- 3. 自动刷新策略
-- 每小时刷新一次小时级聚合
SELECT add_continuous_aggregate_policy(
'sensor_hourly',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
-- 每天刷新日级聚合
SELECT add_continuous_aggregate_policy(
'sensor_daily',
start_offset => INTERVAL '3 days',
end_offset => INTERVAL '1 day',
schedule_interval => INTERVAL '1 day'
);
-- 4. 查看连续聚合信息
SELECT * FROM timescaledb_information.continuous_aggregates;
-- 查看刷新策略
SELECT * FROM timescaledb_information.jobs
WHERE proc_name = 'policy_refresh_continuous_aggregate';
-- 5. 手动刷新连续聚合
CALL refresh_continuous_aggregate(
'sensor_hourly',
'2026-04-12 00:00:00+08',
'2026-04-12 12:00:00+08'
);
-- 6. 实时聚合(默认行为)
-- 连续聚合会合并已物化的历史数据和新插入的实时数据
-- 确保查询结果始终是最新的
-- 查询时添加 WITH 选项控制行为
-- 只查物化数据(更快)
SELECT * FROM sensor_hourly
WITH (materialized_only = true)
WHERE bucket >= '2026-04-12 00:00:00+08';
-- 查实时+物化数据(默认)
SELECT * FROM sensor_hourly
WITH (materialized_only = false)
WHERE bucket >= '2026-04-12 00:00:00+08';多层级聚合设计
-- 多层级聚合架构
-- 原始数据(5分钟粒度)
-- -> 小时聚合
-- -> 日聚合
-- -> 月聚合
-- 月级聚合
CREATE MATERIALIZED VIEW sensor_monthly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 month', day) AS month,
sensor_id,
location,
AVG(avg_temperature) AS avg_temperature,
MAX(max_temperature) AS max_temperature,
MIN(min_temperature) AS min_temperature,
SUM(total_readings) AS total_readings
FROM sensor_daily
GROUP BY month, sensor_id, location;
-- 自动刷新月级聚合
SELECT add_continuous_aggregate_policy(
'sensor_monthly',
start_offset => INTERVAL '3 months',
end_offset => INTERVAL '1 month',
schedule_interval => INTERVAL '1 day'
);
-- 各层级存储对比
-- | 层级 | 粒度 | 数据量(估算) | 保留策略 |
-- |---------|----------|----------------|-----------|
-- | 原始数据 | 5 分钟 | 100 万行/天 | 30 天 |
-- | 小时聚合 | 1 小时 | 2.4 万行/天 | 1 年 |
-- | 日聚合 | 1 天 | 1000 行/天 | 5 年 |
-- | 月聚合 | 1 月 | ~33 行/天 | 永久 |数据保留策略
自动数据清理
-- 1. 添加数据保留策略
-- 原始数据保留 30 天
SELECT add_retention_policy(
'sensor_readings',
INTERVAL '30 days',
schedule_interval => INTERVAL '1 day'
);
-- 小时聚合保留 1 年
SELECT add_retention_policy(
'sensor_hourly',
INTERVAL '1 year',
schedule_interval => INTERVAL '1 week'
);
-- 2. 查看保留策略
SELECT
job_id,
application_name,
schedule_interval,
max_runtime,
config
FROM timescaledb_information.jobs
WHERE proc_name = 'policy_retention';
-- 3. 手动执行清理
SELECT drop_chunks(
'sensor_readings',
older_than => INTERVAL '30 days'
);
-- 删除特定时间范围的 Chunk
SELECT drop_chunks(
'sensor_readings',
from_time => '2026-01-01 00:00:00+08',
to_time => '2026-02-01 00:00:00+08'
);
-- 4. 分层存储策略
-- | 数据层级 | 存储 | 保留时间 | 访问频率 |
-- |-----------|-----------|-----------|---------|
-- | 热数据 | SSD | 7 天 | 高频 |
-- | 温数据 | HDD/压缩 | 30 天 | 中频 |
-- | 冷数据 | 对象存储 | 1 年+ | 低频 |
-- | 聚合数据 | SSD | 永久 | 分析 |
-- 5. 完整的数据生命周期管理
-- 创建原始表
SELECT create_hypertable('sensor_readings', 'time', chunk_time_interval => INTERVAL '1 day');
-- 添加压缩策略(7天后压缩)
SELECT add_compression_policy('sensor_readings', INTERVAL '7 days');
-- 添加保留策略(30天后删除)
SELECT add_retention_policy('sensor_readings', INTERVAL '30 days');
-- 创建连续聚合(长期保留聚合数据)
CREATE MATERIALIZED VIEW sensor_daily
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', time) AS day,
sensor_id,
AVG(temperature) AS avg_temperature,
MAX(temperature) AS max_temperature,
MIN(temperature) AS min_temperature
FROM sensor_readings
GROUP BY day, sensor_id;
-- 聚合数据长期保留(不设保留策略,或设很长的保留时间)时序查询函数
time_bucket 与聚合函数
-- 1. time_bucket — 时序数据分桶
-- 按小时统计
SELECT
time_bucket('1 hour', time) AS hour,
AVG(temperature) AS avg_temp,
MAX(temperature) AS max_temp,
MIN(temperature) AS min_temp
FROM sensor_readings
WHERE time >= NOW() - INTERVAL '24 hours'
AND sensor_id = 1
GROUP BY hour
ORDER BY hour;
-- 按 15 分钟统计
SELECT
time_bucket('15 minutes', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temp
FROM sensor_readings
WHERE time >= '2026-04-12 00:00:00+08'
GROUP BY bucket, sensor_id
ORDER BY sensor_id, bucket;
-- 按周统计(从周一开始)
SELECT
time_bucket('1 week', time,
origin => '2026-01-05 00:00:00+08') AS week_start, -- 2026-01-05 是周一
AVG(temperature) AS avg_temp
FROM sensor_readings
WHERE time >= '2026-01-01 00:00:00+08'
GROUP BY week_start
ORDER BY week_start;
-- 2. 时间偏移函数
-- first() 和 last() — 获取每个时间桶的首尾值
SELECT
time_bucket('1 hour', time) AS hour,
sensor_id,
first(temperature, time) AS first_temp, -- 该时段第一个温度值
last(temperature, time) AS last_temp, -- 该时段最后一个温度值
last(temperature, time) - first(temperature, time) AS temp_change
FROM sensor_readings
WHERE time >= NOW() - INTERVAL '24 hours'
GROUP BY hour, sensor_id
ORDER BY sensor_id, hour;
-- 3. 滞后与差值函数
-- 计算与前一个时间点的差值
SELECT
time,
temperature,
temperature - LAG(temperature) OVER (
PARTITION BY sensor_id ORDER BY time
) AS temp_delta
FROM sensor_readings
WHERE sensor_id = 1
AND time >= NOW() - INTERVAL '1 hour'
ORDER BY time;
-- 4. 移动平均
-- 30 分钟滑动平均温度
SELECT
time,
sensor_id,
temperature,
AVG(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
RANGE BETWEEN INTERVAL '30 minutes' PRECEDING AND CURRENT ROW
) AS moving_avg_30min
FROM sensor_readings
WHERE time >= NOW() - INTERVAL '2 hours'
AND sensor_id = 1
ORDER BY time;
-- 5. 异常检测
-- 找出温度超过 3 倍标准差的异常数据点
WITH stats AS (
SELECT
sensor_id,
AVG(temperature) AS mean_temp,
STDDEV(temperature) AS std_temp
FROM sensor_readings
WHERE time >= NOW() - INTERVAL '24 hours'
GROUP BY sensor_id
)
SELECT
s.time,
s.sensor_id,
s.temperature,
st.mean_temp,
ABS(s.temperature - st.mean_temp) / NULLIF(st.std_temp, 0) AS z_score
FROM sensor_readings s
JOIN stats st ON s.sensor_id = st.sensor_id
WHERE s.time >= NOW() - INTERVAL '24 hours'
AND ABS(s.temperature - st.mean_temp) > 3 * st.std_temp
ORDER BY z_score DESC;高级时序分析
-- 1. 同比环比分析
WITH daily_stats AS (
SELECT
time_bucket('1 day', time) AS day,
AVG(temperature) AS avg_temp
FROM sensor_readings
WHERE sensor_id = 1
GROUP BY day
)
SELECT
day,
avg_temp,
LAG(avg_temp, 1) OVER (ORDER BY day) AS prev_day_temp,
LAG(avg_temp, 7) OVER (ORDER BY day) AS prev_week_temp,
ROUND((avg_temp - LAG(avg_temp, 1) OVER (ORDER BY day)) /
NULLIF(LAG(avg_temp, 1) OVER (ORDER BY day), 0) * 100, 2) AS day_over_day_pct,
ROUND((avg_temp - LAG(avg_temp, 7) OVER (ORDER BY day)) /
NULLIF(LAG(avg_temp, 7) OVER (ORDER BY day), 0) * 100, 2) AS week_over_week_pct
FROM daily_stats
ORDER BY day DESC
LIMIT 30;
-- 2. 时间加权平均
-- 按传感器计算时间加权平均温度
SELECT
sensor_id,
SUM(temperature * EXTRACT(EPOCH FROM (next_time - time))) /
SUM(EXTRACT(EPOCH FROM (next_time - time))) AS time_weighted_avg
FROM (
SELECT
sensor_id,
time,
temperature,
LEAD(time) OVER (PARTITION BY sensor_id ORDER BY time) AS next_time
FROM sensor_readings
WHERE time >= '2026-04-12 00:00:00+08'
) t
WHERE next_time IS NOT NULL
GROUP BY sensor_id;
-- 3. 设备在线率统计
SELECT
sensor_id,
COUNT(*) AS reading_count,
COUNT(DISTINCT time_bucket('1 hour', time)) AS active_hours,
ROUND(COUNT(DISTINCT time_bucket('1 hour', time)) / 24.0 * 100, 2) AS online_rate_pct
FROM sensor_readings
WHERE time >= '2026-04-12 00:00:00+08'
AND time < '2026-04-13 00:00:00+08'
GROUP BY sensor_id
ORDER BY online_rate_pct DESC;
-- 4. 区间统计(温度分布)
SELECT
sensor_id,
COUNT(*) FILTER (WHERE temperature < 20) AS cold_count,
COUNT(*) FILTER (WHERE temperature BETWEEN 20 AND 25) AS comfortable_count,
COUNT(*) FILTER (WHERE temperature BETWEEN 25 AND 30) AS warm_count,
COUNT(*) FILTER (WHERE temperature > 30) AS hot_count,
COUNT(*) AS total_count
FROM sensor_readings
WHERE time >= NOW() - INTERVAL '24 hours'
GROUP BY sensor_id
ORDER BY sensor_id;优点
缺点
总结
TimescaleDB 是一款基于 PostgreSQL 的时序数据库,通过 Hypertable 实现了时序数据的高效存储和自动分区管理。连续聚合功能可以自动维护多层级的预计算结果,大幅提升查询性能。数据保留策略和压缩功能实现了完整的数据生命周期管理。对于已经在使用 PostgreSQL 的团队来说,TimescaleDB 是时序数据场景下最自然的技术选型,可以在不引入新数据库组件的情况下获得专业的时序数据处理能力。
关键知识点
- 数据库主题一定要同时看数据模型、读写模式和执行代价。
- 很多性能问题不是 SQL 语法问题,而是索引、统计信息、事务和数据分布问题。
- 高可用、备份、迁移和治理与查询优化同样重要。
- 先把数据模型、访问模式和执行代价绑定起来理解。
项目落地视角
- 所有优化前后都保留执行计划、样本 SQL 和关键指标对比。
- 上线前准备回滚脚本、备份点和校验方案。
- 把连接池、锁等待、慢查询和容量增长纳入日常巡检。
- 保留执行计划、样本 SQL、索引定义和优化前后指标。
常见误区
- 脱离真实数据分布讨论索引或分片。
- 只看单条 SQL,不看整条业务链路的事务和锁。
- 把测试环境结论直接等同于生产环境结论。
- 脱离真实数据分布设计索引。
进阶路线
- 继续向执行计划、存储引擎、复制机制和数据治理层深入。
- 把主题与 ORM、缓存、消息队列和归档策略联动起来思考。
- 沉淀成数据库设计规范、SQL 审核规则和变更流程。
- 继续深入存储引擎、复制机制、归档与冷热分层治理。
适用场景
- 当你准备把《TimescaleDB 时序数据库》真正落到项目里时,最适合先在一个独立模块或最小样例里验证关键路径。
- 适合数据建模、查询优化、事务控制、高可用和迁移治理。
- 当系统开始遇到慢查询、锁冲突、热点数据或容量增长时,这类主题价值最高。
落地建议
- 先分析真实查询模式、数据量级和写入特征,再决定索引或分片策略。
- 所有优化结论都结合执行计划、样本数据和监控指标验证。
- 高风险操作前准备备份、回滚脚本与校验 SQL。
排错清单
- 先确认瓶颈在 CPU、I/O、锁等待、网络还是 SQL 本身。
- 检查执行计划是否走错索引、是否发生排序或全表扫描。
- 排查长事务、隐式类型转换、统计信息过期和参数嗅探。
复盘问题
- 如果把《TimescaleDB 时序数据库》放进你的当前项目,最先要验证的输入、输出和失败路径分别是什么?
- 《TimescaleDB 时序数据库》最容易在什么规模、什么边界条件下暴露问题?你会用什么指标或日志去确认?
- 相比默认实现或替代方案,采用《TimescaleDB 时序数据库》最大的收益和代价分别是什么?
