[关闭]
@hhlf282 2026-03-20T01:02:33.000000Z 字数 12450 阅读 17

数据库设计文档

目标: 设计 Daraz 选品系统的数据库架构
版本: 1.0
创建时间: 2026-03-19


1. ER 图

  1. ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
  2. categories products sellers
  3. ├─────────────┤ ├─────────────┤ ├─────────────┤
  4. id │◄──────│ category_id id
  5. name title │──────►│ seller_id
  6. parent_id price name
  7. level ... rating
  8. └─────────────┘ └──────┬──────┘ └─────────────┘
  9. ┌──────────────┼──────────────┐
  10. ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
  11. price_history sales_history reviews
  12. ├─────────────┤ ├─────────────┤ ├─────────────┤
  13. item_id item_id item_id
  14. price sold_count rating
  15. crawl_time crawl_time comment
  16. └─────────────┘ └─────────────┘ └─────────────┘

2. 表结构

2.1 核心表

  1. -- 类目表
  2. CREATE TABLE categories (
  3. id VARCHAR(50) PRIMARY KEY,
  4. name VARCHAR(200) NOT NULL,
  5. parent_id VARCHAR(50),
  6. level INTEGER NOT NULL DEFAULT 1,
  7. path VARCHAR(500), -- 类目路径,如 "1/10/100"
  8. is_active BOOLEAN DEFAULT TRUE,
  9. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  10. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  11. FOREIGN KEY (parent_id) REFERENCES categories(id)
  12. );
  13. -- 商品表
  14. CREATE TABLE products (
  15. id SERIAL PRIMARY KEY,
  16. item_id VARCHAR(50) UNIQUE NOT NULL,
  17. title VARCHAR(500) NOT NULL,
  18. price DECIMAL(10,2) NOT NULL,
  19. original_price DECIMAL(10,2),
  20. discount_rate DECIMAL(5,2),
  21. currency VARCHAR(3) DEFAULT 'LKR',
  22. -- 类目信息
  23. category_id VARCHAR(50) NOT NULL,
  24. category_name VARCHAR(200),
  25. -- 品牌信息
  26. brand VARCHAR(200),
  27. -- 卖家信息
  28. seller_id VARCHAR(50),
  29. seller_name VARCHAR(200),
  30. -- 评价信息
  31. rating DECIMAL(3,2) CHECK (rating >= 0 AND rating <= 5),
  32. review_count INTEGER DEFAULT 0,
  33. sold_count INTEGER DEFAULT 0,
  34. -- 商品信息
  35. stock_status VARCHAR(50),
  36. images JSONB,
  37. description TEXT,
  38. specifications JSONB,
  39. -- 物流信息
  40. shipping_fee DECIMAL(10,2),
  41. delivery_time VARCHAR(100),
  42. return_policy TEXT,
  43. -- 爬取信息
  44. source_url VARCHAR(500),
  45. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  46. -- 系统字段
  47. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  48. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  49. FOREIGN KEY (category_id) REFERENCES categories(id),
  50. FOREIGN KEY (seller_id) REFERENCES sellers(seller_id)
  51. );
  52. -- 卖家表
  53. CREATE TABLE sellers (
  54. id SERIAL PRIMARY KEY,
  55. seller_id VARCHAR(50) UNIQUE NOT NULL,
  56. name VARCHAR(200) NOT NULL,
  57. rating DECIMAL(3,2),
  58. follower_count INTEGER DEFAULT 0,
  59. product_count INTEGER DEFAULT 0,
  60. response_rate DECIMAL(5,2),
  61. ship_on_time_rate DECIMAL(5,2),
  62. chat_response_time VARCHAR(50),
  63. is_verified BOOLEAN DEFAULT FALSE,
  64. is_mall BOOLEAN DEFAULT FALSE,
  65. location VARCHAR(200),
  66. established_date DATE,
  67. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  68. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  69. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  70. );

2.2 历史数据表

  1. -- 价格历史表
  2. CREATE TABLE price_history (
  3. id SERIAL PRIMARY KEY,
  4. item_id VARCHAR(50) NOT NULL,
  5. price DECIMAL(10,2) NOT NULL,
  6. original_price DECIMAL(10,2),
  7. discount_rate DECIMAL(5,2),
  8. currency VARCHAR(3) DEFAULT 'LKR',
  9. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  10. FOREIGN KEY (item_id) REFERENCES products(item_id)
  11. );
  12. -- 销量历史表
  13. CREATE TABLE sales_history (
  14. id SERIAL PRIMARY KEY,
  15. item_id VARCHAR(50) NOT NULL,
  16. sold_count INTEGER NOT NULL,
  17. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  18. FOREIGN KEY (item_id) REFERENCES products(item_id)
  19. );
  20. -- 评论表
  21. CREATE TABLE reviews (
  22. id SERIAL PRIMARY KEY,
  23. item_id VARCHAR(50) NOT NULL,
  24. review_id VARCHAR(50) UNIQUE,
  25. user_name VARCHAR(200),
  26. user_avatar VARCHAR(500),
  27. rating INTEGER CHECK (rating >= 1 AND rating <= 5),
  28. comment TEXT,
  29. images JSONB,
  30. helpful_count INTEGER DEFAULT 0,
  31. is_verified_purchase BOOLEAN DEFAULT FALSE,
  32. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  33. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  34. FOREIGN KEY (item_id) REFERENCES products(item_id)
  35. );
  36. -- 创建索引
  37. CREATE INDEX idx_price_history_item ON price_history(item_id);
  38. CREATE INDEX idx_price_history_time ON price_history(crawl_time);
  39. CREATE INDEX idx_sales_history_item ON sales_history(item_id);
  40. CREATE INDEX idx_sales_history_time ON sales_history(crawl_time);
  41. CREATE INDEX idx_reviews_item ON reviews(item_id);
  42. CREATE INDEX idx_reviews_rating ON reviews(rating);

2.3 供应链表

  1. -- 供应商表
  2. CREATE TABLE suppliers (
  3. id SERIAL PRIMARY KEY,
  4. supplier_id VARCHAR(50) UNIQUE NOT NULL,
  5. name VARCHAR(200) NOT NULL,
  6. source_platform VARCHAR(50) DEFAULT '1688',
  7. source_url VARCHAR(500),
  8. rating DECIMAL(3,2),
  9. years_in_business INTEGER,
  10. response_rate DECIMAL(5,2),
  11. repeat_purchase_rate DECIMAL(5,2),
  12. is_verified BOOLEAN DEFAULT FALSE,
  13. is_manufacturer BOOLEAN DEFAULT FALSE,
  14. location VARCHAR(200),
  15. main_categories JSONB,
  16. crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  17. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  18. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  19. );
  20. -- 商品 - 供应商关联表
  21. CREATE TABLE product_sources (
  22. id SERIAL PRIMARY KEY,
  23. product_id INTEGER NOT NULL,
  24. supplier_id VARCHAR(50) NOT NULL,
  25. source_url VARCHAR(500) NOT NULL,
  26. -- 价格信息
  27. source_price DECIMAL(10,2) NOT NULL, -- 采购价 (RMB)
  28. currency VARCHAR(3) DEFAULT 'CNY',
  29. -- 起订量
  30. moq INTEGER DEFAULT 1,
  31. -- 物流信息
  32. shipping_cost DECIMAL(10,2),
  33. lead_time INTEGER, -- 供货周期 (天)
  34. shipping_from VARCHAR(200),
  35. -- 匹配信息
  36. match_score DECIMAL(5,2), -- 匹配度评分
  37. match_method VARCHAR(50), -- 匹配方式 (manual/auto)
  38. -- 系统字段
  39. is_active BOOLEAN DEFAULT TRUE,
  40. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  41. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  42. FOREIGN KEY (product_id) REFERENCES products(id),
  43. FOREIGN KEY (supplier_id) REFERENCES suppliers(supplier_id),
  44. UNIQUE (product_id, supplier_id, source_url)
  45. );

2.4 选品评估表

  1. -- 选品评估表
  2. CREATE TABLE product_evaluations (
  3. id SERIAL PRIMARY KEY,
  4. item_id VARCHAR(50) NOT NULL,
  5. -- 评估时间
  6. evaluation_date DATE NOT NULL DEFAULT CURRENT_DATE,
  7. -- 各维度评分
  8. market_demand_score DECIMAL(3,2), -- 市场需求 (0-10)
  9. profitability_score DECIMAL(3,2), -- 利润空间 (0-10)
  10. competition_score DECIMAL(3,2), -- 竞争程度 (0-10)
  11. supply_chain_score DECIMAL(3,2), -- 供应链 (0-10)
  12. risk_score DECIMAL(3,2), -- 风险 (0-10)
  13. -- 综合评分
  14. total_score DECIMAL(5,2),
  15. recommendation VARCHAR(50), -- 强烈推荐/推荐/谨慎/不推荐
  16. -- 预测数据
  17. predicted_monthly_sales INTEGER,
  18. predicted_gross_margin DECIMAL(5,2),
  19. predicted_roi DECIMAL(5,2),
  20. -- 风险评估
  21. risk_level VARCHAR(50), -- low/medium/high
  22. risk_factors JSONB,
  23. -- 运营建议
  24. suggestions JSONB,
  25. -- 评估来源
  26. evaluated_by VARCHAR(50), -- ai/manual
  27. model_version VARCHAR(50),
  28. -- 后续跟踪
  29. actual_monthly_sales INTEGER, -- 实际月销量 (后续填充)
  30. actual_gross_margin DECIMAL(5,2), -- 实际毛利率 (后续填充)
  31. is_success BOOLEAN, -- 是否成功 (后续填充)
  32. success_date DATE, -- 成功/失败确认日期
  33. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  34. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  35. FOREIGN KEY (item_id) REFERENCES products(item_id)
  36. );
  37. -- 选品决策表
  38. CREATE TABLE product_decisions (
  39. id SERIAL PRIMARY KEY,
  40. evaluation_id INTEGER NOT NULL,
  41. -- 决策信息
  42. decision VARCHAR(50) NOT NULL, -- approve/reject/pending
  43. decision_date DATE NOT NULL,
  44. decided_by VARCHAR(100) NOT NULL, -- 决策人
  45. -- 采购计划
  46. planned_quantity INTEGER,
  47. planned_budget DECIMAL(12,2),
  48. expected_launch_date DATE,
  49. -- 实际执行
  50. actual_quantity INTEGER,
  51. actual_cost DECIMAL(12,2),
  52. actual_launch_date DATE,
  53. -- 备注
  54. notes TEXT,
  55. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  56. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  57. FOREIGN KEY (evaluation_id) REFERENCES product_evaluations(id)
  58. );
  59. -- 创建索引
  60. CREATE INDEX idx_evaluations_item ON product_evaluations(item_id);
  61. CREATE INDEX idx_evaluations_date ON product_evaluations(evaluation_date);
  62. CREATE INDEX idx_evaluations_score ON product_evaluations(total_score);
  63. CREATE INDEX idx_decisions_evaluation ON product_decisions(evaluation_id);
  64. CREATE INDEX idx_decisions_date ON product_decisions(decision_date);

2.5 运营数据表

  1. -- 销售订单表
  2. CREATE TABLE sales_orders (
  3. id SERIAL PRIMARY KEY,
  4. order_id VARCHAR(100) UNIQUE NOT NULL,
  5. item_id VARCHAR(50) NOT NULL,
  6. -- 订单信息
  7. order_date TIMESTAMP NOT NULL,
  8. quantity INTEGER NOT NULL,
  9. unit_price DECIMAL(10,2) NOT NULL,
  10. total_amount DECIMAL(10,2) NOT NULL,
  11. -- 成本信息
  12. source_cost DECIMAL(10,2),
  13. shipping_cost DECIMAL(10,2),
  14. commission DECIMAL(10,2),
  15. gross_profit DECIMAL(10,2),
  16. -- 订单状态
  17. status VARCHAR(50),
  18. payment_method VARCHAR(50),
  19. -- 客户信息
  20. customer_id VARCHAR(100),
  21. customer_location VARCHAR(200),
  22. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  23. FOREIGN KEY (item_id) REFERENCES products(item_id)
  24. );
  25. -- 库存表
  26. CREATE TABLE inventory (
  27. id SERIAL PRIMARY KEY,
  28. item_id VARCHAR(50) NOT NULL,
  29. -- 库存信息
  30. quantity_on_hand INTEGER NOT NULL,
  31. quantity_available INTEGER NOT NULL,
  32. quantity_reserved INTEGER DEFAULT 0,
  33. -- 仓库信息
  34. warehouse_id VARCHAR(50),
  35. warehouse_location VARCHAR(200),
  36. -- 成本信息
  37. unit_cost DECIMAL(10,2),
  38. total_value DECIMAL(12,2),
  39. -- 预警
  40. reorder_point INTEGER,
  41. is_low_stock BOOLEAN DEFAULT FALSE,
  42. -- 时间信息
  43. last_restock_date DATE,
  44. last_sale_date DATE,
  45. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  46. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  47. FOREIGN KEY (item_id) REFERENCES products(item_id)
  48. );
  49. -- 创建索引
  50. CREATE INDEX idx_orders_item ON sales_orders(item_id);
  51. CREATE INDEX idx_orders_date ON sales_orders(order_date);
  52. CREATE INDEX idx_inventory_item ON inventory(item_id);

3. 视图

  1. -- 商品综合视图
  2. CREATE VIEW v_product_summary AS
  3. SELECT
  4. p.item_id,
  5. p.title,
  6. p.price,
  7. p.category_name,
  8. p.seller_name,
  9. p.rating,
  10. p.sold_count,
  11. p.review_count,
  12. -- 价格趋势
  13. ph.current_price,
  14. ph.min_price_30d,
  15. ph.max_price_30d,
  16. ph.avg_price_30d,
  17. -- 销量趋势
  18. sh.current_sales,
  19. sh.avg_sales_30d,
  20. sh.sales_growth_30d,
  21. -- 供应链信息
  22. ps.source_price,
  23. ps.shipping_cost,
  24. ps.lead_time,
  25. ps.supplier_name,
  26. -- 利润测算
  27. (p.price - ps.source_price * 60 - ps.shipping_cost) / p.price as gross_margin,
  28. -- 评估信息
  29. pe.total_score,
  30. pe.recommendation,
  31. pe.risk_level
  32. FROM products p
  33. LEFT JOIN (
  34. SELECT
  35. item_id,
  36. price as current_price,
  37. MIN(price) as min_price_30d,
  38. MAX(price) as max_price_30d,
  39. AVG(price) as avg_price_30d
  40. FROM price_history
  41. WHERE crawl_time > NOW() - INTERVAL '30 days'
  42. GROUP BY item_id
  43. ) ph ON p.item_id = ph.item_id
  44. LEFT JOIN (
  45. SELECT
  46. item_id,
  47. sold_count as current_sales,
  48. AVG(sold_count) as avg_sales_30d,
  49. (sold_count - LAG(sold_count) OVER (PARTITION BY item_id ORDER BY crawl_time)) /
  50. NULLIF(LAG(sold_count) OVER (PARTITION BY item_id ORDER BY crawl_time), 0) as sales_growth_30d
  51. FROM sales_history
  52. WHERE crawl_time > NOW() - INTERVAL '30 days'
  53. ) sh ON p.item_id = sh.item_id
  54. LEFT JOIN (
  55. SELECT DISTINCT ON (item_id)
  56. item_id,
  57. source_price,
  58. shipping_cost,
  59. lead_time,
  60. s.name as supplier_name
  61. FROM product_sources ps
  62. JOIN suppliers s ON ps.supplier_id = s.supplier_id
  63. WHERE ps.is_active = TRUE
  64. ORDER BY item_id, ps.match_score DESC
  65. ) ps ON p.item_id = ps.item_id
  66. LEFT JOIN (
  67. SELECT DISTINCT ON (item_id)
  68. item_id,
  69. total_score,
  70. recommendation,
  71. risk_level
  72. FROM product_evaluations
  73. ORDER BY item_id, evaluation_date DESC
  74. ) pe ON p.item_id = pe.item_id;
  75. -- 类目统计视图
  76. CREATE VIEW v_category_stats AS
  77. SELECT
  78. c.id as category_id,
  79. c.name as category_name,
  80. c.parent_id,
  81. c.level,
  82. -- 商品统计
  83. COUNT(DISTINCT p.item_id) as product_count,
  84. COUNT(DISTINCT p.seller_id) as seller_count,
  85. -- 价格统计
  86. AVG(p.price) as avg_price,
  87. MIN(p.price) as min_price,
  88. MAX(p.price) as max_price,
  89. STDDEV(p.price) as price_variance,
  90. -- 销量统计
  91. AVG(p.sold_count) as avg_sales,
  92. SUM(p.sold_count) as total_sales,
  93. -- 评分统计
  94. AVG(p.rating) as avg_rating,
  95. AVG(p.review_count) as avg_reviews,
  96. -- 头部集中度
  97. (SELECT SUM(sold_count) FROM products p2
  98. WHERE p2.category_id = c.id
  99. ORDER BY sold_count DESC LIMIT 3) /
  100. NULLIF(SUM(p.sold_count), 0) as top3_concentration,
  101. updated_at as last_updated
  102. FROM categories c
  103. LEFT JOIN products p ON c.id = p.category_id AND p.crawl_time > NOW() - INTERVAL '7 days'
  104. WHERE c.is_active = TRUE
  105. GROUP BY c.id, c.name, c.parent_id, c.level;

4. 数据字典

4.1 核心字段说明

表名 字段名 类型 说明 示例
products item_id VARCHAR(50) 商品唯一 ID "123456789"
products price DECIMAL(10,2) 售价 (LKR) 2500.00
products sold_count INTEGER 销量 150
products rating DECIMAL(3,2) 评分 (0-5) 4.5
product_evaluations total_score DECIMAL(5,2) 综合评分 (0-100) 85.5
product_evaluations recommendation VARCHAR(50) 推荐等级 "强烈推荐"
sales_orders gross_profit DECIMAL(10,2) 毛利润 (LKR) 875.00

4.2 枚举值说明

  1. -- 推荐等级
  2. recommendation IN ('强烈推荐', '推荐', '谨慎考虑', '不推荐')
  3. -- 风险等级
  4. risk_level IN ('low', 'medium', 'high')
  5. -- 决策状态
  6. decision IN ('approve', 'reject', 'pending')
  7. -- 订单状态
  8. status IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled', 'refunded')
  9. -- 库存状态
  10. stock_status IN ('in_stock', 'low_stock', 'out_of_stock', 'pre_order')

5. 数据迁移

5.1 初始化脚本

  1. -- 1. 创建数据库
  2. CREATE DATABASE daraz_product_selection
  3. WITH ENCODING = 'UTF8'
  4. LC_COLLATE = 'en_US.UTF-8'
  5. LC_CTYPE = 'en_US.UTF-8';
  6. -- 2. 创建用户
  7. CREATE USER daraz_user WITH PASSWORD 'your_secure_password';
  8. GRANT ALL PRIVILEGES ON DATABASE daraz_product_selection TO daraz_user;
  9. -- 3. 创建扩展
  10. CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
  11. CREATE EXTENSION IF NOT EXISTS "pg_stat_statements";
  12. -- 4. 执行建表脚本
  13. -- (执行上述所有 CREATE TABLE 语句)
  14. -- 5. 授予权限
  15. GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO daraz_user;
  16. GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO daraz_user;

5.2 数据备份

  1. #!/bin/bash
  2. # backup.sh - 每日备份脚本
  3. DATE=$(date +%Y%m%d)
  4. BACKUP_DIR="/backups/daraz"
  5. DB_NAME="daraz_product_selection"
  6. DB_USER="daraz_user"
  7. # 创建备份目录
  8. mkdir -p $BACKUP_DIR
  9. # 备份数据库
  10. pg_dump -U $DB_USER -h localhost $DB_NAME | gzip > $BACKUP_DIR/${DB_NAME}_${DATE}.sql.gz
  11. # 删除 30 天前的备份
  12. find $BACKUP_DIR -name "*.sql.gz" -mtime +30 -delete
  13. # 上传到云存储 (可选)
  14. # aws s3 cp $BACKUP_DIR/${DB_NAME}_${DATE}.sql.gz s3://your-bucket/backups/
  15. echo "Backup completed: ${DB_NAME}_${DATE}.sql.gz"

6. 性能优化

6.1 索引策略

  1. -- 高频查询字段建立索引
  2. CREATE INDEX idx_products_category_price ON products(category_id, price);
  3. CREATE INDEX idx_products_sales_rating ON products(sold_count DESC, rating DESC);
  4. CREATE INDEX idx_products_crawl_time ON products(crawl_time);
  5. -- 组合索引 (根据实际查询)
  6. CREATE INDEX idx_evaluations_score_date ON product_evaluations(total_score DESC, evaluation_date DESC);
  7. -- 部分索引 (只索引活跃数据)
  8. CREATE INDEX idx_products_active ON products(item_id) WHERE stock_status = 'in_stock';
  9. -- 分析索引使用情况
  10. SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
  11. FROM pg_stat_user_indexes
  12. ORDER BY idx_scan DESC;

6.2 分区策略

  1. -- 价格历史表按月分区
  2. CREATE TABLE price_history_2026_01 PARTITION OF price_history
  3. FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
  4. CREATE TABLE price_history_2026_02 PARTITION OF price_history
  5. FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');
  6. -- 销售订单表按月分区
  7. CREATE TABLE sales_orders_2026_01 PARTITION OF sales_orders
  8. FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');

版本: 1.0
创建时间: 2026-03-19
维护者: 技术团队

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注