Files
railway_cloud/app/services/original_data.py

963 lines
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from sqlalchemy.orm import Session
from sqlalchemy import text, inspect
from sqlalchemy.exc import SQLAlchemyError, DisconnectionError, TimeoutError
from typing import List, Optional, Dict, Any
from ..models.original_data import OriginalData, get_original_data_model, get_table_name
from .base import BaseService
from ..models.settlement_data import SettlementData
from ..models.account import Account
from ..core.database import engine
from ..core.db_monitor import log_pool_status, get_pool_status
from ..core.retry import retry, circuit_breaker, RetryConfig
import logging
logger = logging.getLogger(__name__)
class OriginalDataService(BaseService[OriginalData]):
def __init__(self):
super().__init__(OriginalData)
def _get_table_name(self, account_id: int) -> str:
"""获取原始数据表名"""
return get_table_name(account_id)
def _ensure_table_exists(self, db: Session, account_id: int) -> bool:
"""
确保指定账号的原始数据表存在,不存在则创建
Args:
db: 数据库会话
account_id: 账号ID
Returns:
bool: 表是否存在或创建成功
"""
table_name = self._get_table_name(account_id)
inspector = inspect(engine)
# 检查表是否存在
if table_name in inspector.get_table_names():
logger.info(f"Table {table_name} already exists")
# 表已存在时,也执行预热操作
self._warmup_table(db, table_name)
return True
# 表不存在,创建表 - 添加重试机制
max_retries = 3
for attempt in range(max_retries):
try:
create_table_sql = f"""
CREATE TABLE `{table_name}` (
`id` INT AUTO_INCREMENT PRIMARY KEY,
`account_id` INT NOT NULL COMMENT '账号ID',
`bfpcode` VARCHAR(1000) NOT NULL COMMENT '前(后)视点名称',
`mtime` DATETIME NOT NULL COMMENT '测点观测时间',
`bffb` VARCHAR(1000) NOT NULL COMMENT '前(后)视标记符',
`bfpl` VARCHAR(1000) NOT NULL COMMENT '前(后)视距离(m)',
`bfpvalue` VARCHAR(1000) NOT NULL COMMENT '前(后)视尺读数(m)',
`NYID` VARCHAR(100) NOT NULL COMMENT '期数id',
`sort` INT COMMENT '序号',
INDEX `idx_nyid` (`NYID`),
INDEX `idx_account_id` (`account_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='原始数据表_账号{account_id}'
"""
# 使用引擎直接执行不依赖db会话的事务
with engine.begin() as conn: # 创建独立连接
conn.execute(text(create_table_sql))
logger.info(f"Table {table_name} created successfully on attempt {attempt + 1}")
# 表创建后延迟更长时间,避免立即查询触发"表定义变更"错误
import time
time.sleep(0.2)
# 预热表:执行一个简单的查询来稳定表定义
self._warmup_table(db, table_name)
return True
except Exception as e:
logger.warning(f"Attempt {attempt + 1} to create table {table_name} failed: {str(e)}")
# 检查是否是表已存在错误(并发情况下可能发生)
if "already exists" in str(e).lower() or "exist" in str(e).lower():
logger.info(f"Table {table_name} was created by another process")
# 延迟并预热表
import time
time.sleep(0.2)
self._warmup_table(db, table_name)
return True
if attempt == max_retries - 1:
logger.error(f"Failed to create table {table_name} after {max_retries} attempts: {str(e)}")
raise Exception(f"创建原始数据表失败: {str(e)}")
# 短暂延迟后重试
import time
time.sleep(0.1 * (attempt + 1))
return False
def _warmup_table(self, db: Session, table_name: str) -> None:
"""
预热表:执行一个简单查询来稳定表定义,避免"表定义变更"错误
"""
try:
# 执行一个简单的COUNT查询来预热表
warmup_query = text(f"SELECT COUNT(*) FROM `{table_name}` LIMIT 0")
db.execute(warmup_query)
logger.debug(f"Table {table_name} warmed up successfully")
except Exception as e:
logger.warning(f"Failed to warm up table {table_name}: {str(e)}")
# 预热失败不影响主流程,只是记录警告
def create_table_for_account(self, db: Session, account_id: int) -> Dict[str, Any]:
"""
为指定账号创建原始数据表
Args:
db: 数据库会话
account_id: 账号ID
Returns:
操作结果
"""
try:
# 验证账号是否存在
account = db.query(Account).filter(Account.id == account_id).first()
if not account:
return {
'success': False,
'message': f'账号ID {account_id} 不存在'
}
# 创建表 - 现在使用增强的错误处理
table_created = self._ensure_table_exists(db, account_id)
if table_created:
return {
'success': True,
'message': f'原始数据表 {self._get_table_name(account_id)} 创建成功'
}
else:
return {
'success': False,
'message': f'原始数据表 {self._get_table_name(account_id)} 创建失败'
}
except Exception as e:
logger.error(f"Failed to create table for account {account_id}: {str(e)}")
return {
'success': False,
'message': f'创建原始数据表失败: {str(e)}'
}
def get_by_nyid(self, db: Session, account_id: int, nyid: str) -> List[OriginalData]:
"""
根据期数ID获取原始数据
Args:
db: 数据库会话
account_id: 账号ID
nyid: 期数ID
Returns:
原始数据列表
"""
try:
table_name = self._get_table_name(account_id)
# 使用原生SQL查询
query = text(f"SELECT * FROM `{table_name}` WHERE NYID = :nyid")
result = db.execute(query, {"nyid": nyid})
return result.fetchall()
except Exception as e:
logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
return []
def get_by_bfpcode(self, db: Session, account_id: int, bfpcode: str) -> List[OriginalData]:
"""
根据前(后)视点名称获取原始数据
Args:
db: 数据库会话
account_id: 账号ID
bfpcode: 前(后)视点名称
Returns:
原始数据列表
"""
try:
table_name = self._get_table_name(account_id)
query = text(f"SELECT * FROM `{table_name}` WHERE bfpcode = :bfpcode")
result = db.execute(query, {"bfpcode": bfpcode})
return result.fetchall()
except Exception as e:
logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
return []
def get_all_original_data_tables(self, db: Session) -> List[str]:
"""
获取所有原始数据分表的表名
Args:
db: 数据库会话
Returns:
原始数据表名列表
"""
try:
inspector = inspect(engine)
all_tables = inspector.get_table_names()
# 筛选出所有原始数据分表
original_tables = [table for table in all_tables if table.startswith('original_data_')]
logger.info(f"Found {len(original_tables)} original data tables: {original_tables}")
return original_tables
except Exception as e:
logger.error(f"Failed to get original data tables: {str(e)}")
return []
def search_original_data(self, db: Session,
account_id: Optional[int] = None,
id: Optional[int] = None,
bfpcode: Optional[str] = None,
bffb: Optional[str] = None,
nyid: Optional[str] = None,
bfpl: Optional[str] = None) -> List[Any]:
"""
根据多个条件搜索原始数据
如果未提供account_id则遍历所有分表查询
Args:
db: 数据库会话
account_id: 账号ID可选。不填则查询所有分表
其他查询条件...
Returns:
原始数据列表
"""
print("搜索原始数据,参数:", {
"account_id": account_id,
"id": id,
"bfpcode": bfpcode,
"bffb": bffb,
"nyid": nyid,
"bfpl": bfpl})
try:
# 构建查询条件
conditions = []
params = {}
if id is not None:
conditions.append("id = :id")
params["id"] = id
if bfpcode is not None:
conditions.append("bfpcode = :bfpcode")
params["bfpcode"] = bfpcode
if bffb is not None:
conditions.append("bffb = :bffb")
params["bffb"] = bffb
if nyid is not None:
conditions.append("NYID = :nyid")
params["nyid"] = nyid
if bfpl is not None:
conditions.append("bfpl = :bfpl")
params["bfpl"] = bfpl
where_clause = " AND ".join(conditions) if conditions else "1=1"
# 如果指定了account_id只查询该账号的分表
if account_id is not None:
table_name = self._get_table_name(account_id)
query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
result = db.execute(query, params)
return result.fetchall()
# 未指定account_id遍历所有分表
all_results = []
tables = self.get_all_original_data_tables(db)
for table_name in tables:
try:
query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
result = db.execute(query, params)
rows = result.fetchall()
all_results.extend(rows)
logger.info(f"Found {len(rows)} records in table {table_name}")
except Exception as e:
logger.warning(f"Failed to query table {table_name}: {str(e)}")
continue
logger.info(f"Total found {len(all_results)} records from {len(tables)} tables")
return all_results
except Exception as e:
logger.error(f"Failed to search original data: {str(e)}")
return []
def _check_settlement_exists(self, db: Session, nyid: str) -> bool:
"""检查期数id沉降数据是否存在"""
settlement = db.query(SettlementData).filter(SettlementData.NYID == nyid).first()
return settlement is not None
def batch_import_original_data(self, db: Session, data: List) -> Dict[str, Any]:
"""
批量导入原始数据到指定账号的分表 - 性能优化版
使用批量插入替代逐条插入,大幅提升导入速度
Args:
db: 数据库会话
data: 数据列表,每条数据必须包含account_id字段
Returns:
操作结果
"""
logger = logging.getLogger(__name__)
total_count = len(data)
success_count = 0
failed_count = 0
failed_items = []
if total_count == 0:
return {
'success': False,
'message': '导入数据不能为空',
'total_count': 0,
'success_count': 0,
'failed_count': 0,
'failed_items': []
}
# 获取第一条数据的account_id
account_id = data[0].get('account_id')
if not account_id:
return {
'success': False,
'message': '数据中缺少account_id字段',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
# 验证账号是否存在
account = db.query(Account).filter(Account.id == account_id).first()
if not account:
return {
'success': False,
'message': f'账号ID {account_id} 不存在',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
# 确保表存在 - 现在使用增强的错误处理和重试机制
table_created = self._ensure_table_exists(db, account_id)
if not table_created:
return {
'success': False,
'message': '创建原始数据表失败',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
table_name = self._get_table_name(account_id)
# 记录开始前的连接池状态
pool_stats_before = get_pool_status()
logger.info(f"开始批量导入,连接池状态: {pool_stats_before}")
# 检查是否已在事务中
in_transaction = db.in_transaction()
logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
@retry(config=RetryConfig(max_attempts=2, base_delay=2.0, max_delay=10.0))
def _do_import():
"""执行导入操作的内部函数(带重试)"""
for attempt in range(2): # 最多重试1次
try:
# 只有不在事务中时才调用begin()
if not in_transaction:
logger.info(f"开始内部事务 (尝试 {attempt + 1})")
db.begin()
else:
logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
success_count = 0
failed_count = 0
failed_items = []
# 执行数据导入操作
success_count = self._execute_import(db, table_name, data, account_id)
# 只有我们开始的事务才提交
if not in_transaction:
db.commit()
logger.info(f"事务已提交")
else:
logger.info(f"使用外部事务,不提交")
logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}")
return {
'success': True,
'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
'total_count': total_count,
'success_count': success_count,
'failed_count': failed_count,
'failed_items': failed_items
}
except SQLAlchemyError as e:
# 只有我们开始的事务才回滚
if not in_transaction:
try:
db.rollback()
except:
pass
pool_stats_after = get_pool_status()
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
# 记录错误详情
logger.error(
f"错误详情: 类型={type(e).__name__}, "
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
f"SQL: {str(e)[:200]}"
)
if attempt == 1: # 最后一次重试失败
logger.error("Batch import original data failed after retries")
raise e # 抛出异常触发重试装饰器
try:
return _do_import()
except Exception as e:
return {
'success': False,
'message': f'批量导入失败: {str(e)}',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int:
"""执行数据导入操作(抽取的公共逻辑)"""
logger.info(f"Starting batch import into {table_name}, total records: {len(data)}")
# ===== 性能优化:批量查询沉降数据 =====
# 统一转换为字符串处理数据库NYID字段是VARCHAR类型
nyid_list = list(set(str(item.get('NYID')) for item in data if item.get('NYID')))
from ..models.settlement_data import SettlementData
settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(nyid_list)).all()
settlement_map = {s.NYID: s for s in settlements}
missing_nyids = set(nyid_list) - set(settlement_map.keys())
if missing_nyids:
raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')
# ===== 性能优化批量查询现有原始数据IN查询=====
# 使用组合键 (NYID, sort) 查询现有数据,过滤重复数据
existing_data = db.query(text("*")).from_statement(
text(f"SELECT * FROM `{table_name}` WHERE account_id = :account_id")
).params(account_id=account_id).all()
# 使用组合键创建查找表key = f"{NYID}_{sort}"
existing_map = {
f"{item[7]}_{item[8]}": item # NYID是第8个字段索引7sort是第9个字段索引8
for item in existing_data
}
logger.info(f"Found {len(existing_data)} existing records in {table_name}")
# ===== 批量处理插入和跳过 =====
to_insert = []
skipped_count = 0
for item_data in data:
nyid = str(item_data.get('NYID')) # 统一转换为字符串
sort = item_data.get('sort')
# 构建组合键
key = f"{nyid}_{sort}"
if key in existing_map:
# 数据已存在,跳过
# logger.info(f"Skip existing data: NYID {nyid}, sort {sort}")
skipped_count += 1
else:
# 记录需要插入的数据
to_insert.append(item_data)
# logger.info(f"Skip existing data: {skipped_count} duplicates found so far")
logger.info(f"Filtered {skipped_count} duplicate records, {len(to_insert)} new records to insert")
# ===== 执行批量插入 =====
if not to_insert:
logger.info("No new records to insert, all data already exists")
return 0
# 将数据分组每组1000条MySQL默认支持
batch_size = 1000
total_inserted = 0
for i in range(0, len(to_insert), batch_size):
batch_data = to_insert[i:i + batch_size]
# 构建批量参数
values_list = []
params = {}
for idx, item_data in enumerate(batch_data):
values_list.append(
f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
)
params.update({
f"account_id_{idx}": account_id,
f"bfpcode_{idx}": item_data.get('bfpcode'),
f"mtime_{idx}": item_data.get('mtime'),
f"bffb_{idx}": item_data.get('bffb'),
f"bfpl_{idx}": item_data.get('bfpl'),
f"bfpvalue_{idx}": item_data.get('bfpvalue'),
f"NYID_{idx}": item_data.get('NYID'),
f"sort_{idx}": item_data.get('sort')
})
# 批量插入SQL - 使用字符串拼接修复TextClause拼接问题
insert_sql = f"""
INSERT INTO `{table_name}`
(account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
VALUES {", ".join(values_list)}
"""
final_sql = text(insert_sql)
db.execute(final_sql, params)
total_inserted += len(batch_data)
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")
logger.info(f"Batch import completed: {total_inserted} records inserted, {skipped_count} duplicates skipped")
return total_inserted
def batch_import_original_data_new(self, db: Session, data: List[List[Dict[str, Any]]]) -> Dict[str, Any]:
"""
新版批量导入原始数据 - 支持分组格式 data:[[{},{},{}],[{},{}]]
里层一个[{},{}]称为一组数据,数据{}内容与旧接口一致
一组数据全部记录的NYID与account_id将会一样不同组可能不同
导入逻辑:
1. 按account_id分表存储没表就建表
2. 插入前根据NYID判断表中是否有重复数据
3. 有重复就删除表中全部同NYID数据插入新的不重复就直接插入
Args:
db: 数据库会话
data: 分组数据列表,格式为 [[{},{},{}], [{},{}]]
Returns:
操作结果
"""
logger = logging.getLogger(__name__)
total_count = 0
success_count = 0
failed_count = 0
failed_items = []
if not data or len(data) == 0:
return {
'success': False,
'message': '导入数据不能为空',
'total_count': 0,
'success_count': 0,
'failed_count': 0,
'failed_items': []
}
# 验证所有组的account_id和NYID是否一致并按account_id分组
group_validation_results = []
account_id_to_groups = {} # {account_id: [group_indices]}
for group_idx, group in enumerate(data):
if not group or len(group) == 0:
failed_items.append({
'group_index': group_idx,
'message': f'{group_idx + 1}组数据为空'
})
continue
# 获取组内第一个数据的account_id和NYID
first_item = group[0]
group_account_id = str(first_item.get('account_id'))
group_nyid = str(first_item.get('NYID'))
if not group_account_id or not group_nyid:
failed_items.append({
'group_index': group_idx,
'message': f'{group_idx + 1}组数据缺少account_id或NYID字段'
})
continue
# 验证组内所有数据的account_id和NYID是否一致
group_valid = True
for item_idx, item in enumerate(group):
item_account_id = str(item.get('account_id'))
item_nyid = str(item.get('NYID'))
if item_account_id != group_account_id:
failed_items.append({
'group_index': group_idx,
'item_index': item_idx,
'message': f'{group_idx + 1}组第{item_idx + 1}条数据account_id不一致期望:{group_account_id},实际:{item_account_id}'
})
group_valid = False
if item_nyid != group_nyid:
failed_items.append({
'group_index': group_idx,
'item_index': item_idx,
'message': f'{group_idx + 1}组第{item_idx + 1}条数据NYID不一致期望:{group_nyid},实际:{item_nyid}'
})
group_valid = False
# 记录验证结果
group_validation_results.append({
'group_index': group_idx,
'account_id': group_account_id,
'nyid': group_nyid,
'valid': group_valid,
'data': group
})
# 按account_id分组
if group_account_id not in account_id_to_groups:
account_id_to_groups[group_account_id] = []
account_id_to_groups[group_account_id].append(group_idx)
# 如果有验证错误,返回失败
if failed_items:
return {
'success': False,
'message': f'数据验证失败,发现{len(failed_items)}个错误',
'total_count': sum(len(group) for group in data if group),
'success_count': 0,
'failed_count': len(failed_items),
'failed_items': failed_items
}
# 记录总体统计
total_count = sum(len(group['data']) for group in group_validation_results if group['valid'])
logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}")
# 记录开始前的连接池状态
pool_stats_before = get_pool_status()
logger.info(f"开始新版批量导入,连接池状态: {pool_stats_before}")
# 检查是否已在事务中
in_transaction = db.in_transaction()
logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
for attempt in range(2):
try:
# 只有不在事务中时才调用begin()
if not in_transaction:
logger.info(f"开始内部事务 (尝试 {attempt + 1})")
db.begin()
else:
logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
success_count = 0
failed_count = 0
failed_items = []
# 处理每个account_id
for account_id_str, group_indices in account_id_to_groups.items():
account_id = int(account_id_str)
logger.info(f"Processing account_id: {account_id}, groups: {group_indices}")
# 验证账号是否存在
account = db.query(Account).filter(Account.id == account_id).first()
if not account:
error_msg = f'账号ID {account_id} 不存在'
logger.error(error_msg)
for group_idx in group_indices:
failed_count += len(group_validation_results[group_idx]['data'])
failed_items.append({
'group_index': group_idx,
'message': error_msg
})
continue
# 确保表存在
table_created = self._ensure_table_exists(db, account_id)
if not table_created:
error_msg = f'创建原始数据表失败 (account_id: {account_id})'
logger.error(error_msg)
for group_idx in group_indices:
failed_count += len(group_validation_results[group_idx]['data'])
failed_items.append({
'group_index': group_idx,
'message': error_msg
})
continue
# 收集该account_id的所有组数据
account_groups_data = []
for group_idx in group_indices:
account_groups_data.append(group_validation_results[group_idx]['data'])
# 执行分组导入操作
table_name = self._get_table_name(account_id)
logger.info(f"Processing {len(account_groups_data)} groups for table: {table_name}")
group_results = self._execute_import_new(db, table_name, account_groups_data, account_id)
success_count += group_results['success_count']
failed_count += group_results['failed_count']
failed_items.extend(group_results['failed_items'])
logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}")
logger.info(f"Before commit: Success={success_count}, Failed={failed_count}")
# 只有我们开始的事务才提交
if not in_transaction:
db.commit()
logger.info(f"事务已提交")
else:
logger.info(f"使用外部事务,不提交")
logger.info(f"Transaction completed successfully! Success: {success_count}, Failed: {failed_count}")
break
except SQLAlchemyError as e:
# 只有我们开始的事务才回滚
if not in_transaction:
try:
db.rollback()
except:
pass
pool_stats_after = get_pool_status()
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
# 记录错误详情
logger.error(
f"错误详情: 类型={type(e).__name__}, "
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
f"SQL: {str(e)[:200]}"
)
if attempt == 1:
logger.error("Batch import original data failed after retries")
return {
'success': False,
'message': f'批量导入失败: {str(e)}',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
return {
'success': True if failed_count == 0 else False,
'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
'total_count': total_count,
'success_count': success_count,
'failed_count': failed_count,
'failed_items': failed_items
}
def _execute_import_new(self, db: Session, table_name: str, data: List[List[Dict[str, Any]]], account_id: int) -> Dict[str, Any]:
"""
执行新版分组导入操作
Args:
db: 数据库会话
table_name: 表名
data: 分组数据列表
account_id: 账号ID
Returns:
操作结果
"""
logger.info(f"Starting batch import (new) into {table_name}, total groups: {len(data)}")
# 导入前预热表,确保表定义稳定
self._warmup_table(db, table_name)
success_count = 0
failed_count = 0
failed_items = []
# 收集所有NYID和对应数据用于批量查询沉降数据
all_nyids = set()
for group in data:
if group and len(group) > 0:
group_nyid = str(group[0].get('NYID'))
all_nyids.add(group_nyid)
# 批量查询沉降数据是否存在
from ..models.settlement_data import SettlementData
if all_nyids:
settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(list(all_nyids))).all()
settlement_map = {s.NYID: s for s in settlements}
missing_nyids = all_nyids - set(settlement_map.keys())
if missing_nyids:
raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')
logger.info(f"Found {len(all_nyids)} unique NYIDs to process: {list(all_nyids)}")
# 按NYID分组查询并处理重复数据
processed_nyids = set()
group_index = 0
for group in data:
if not group or len(group) == 0:
continue
group_nyid = str(group[0].get('NYID'))
logger.info(f"Processing group {group_index}: NYID={group_nyid}, {len(group)} records")
# 如果已经处理过这个NYID跳过并记录到failed_items
if group_nyid in processed_nyids:
logger.warning(f"NYID {group_nyid} appears in multiple groups (group {group_index}), skipping duplicate")
failed_count += len(group)
failed_items.append({
'group_index': group_index,
'group_nyid': group_nyid,
'message': f'NYID {group_nyid} 已在之前组中处理,跳过重复组'
})
group_index += 1
continue
processed_nyids.add(group_nyid)
# 添加重试机制处理表定义变更错误
max_retries = 3
for attempt in range(max_retries):
try:
# 检查是否已存在该NYID的数据
existing_query = text(f"SELECT id FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
existing_result = db.execute(existing_query, {"nyid": group_nyid, "account_id": account_id}).fetchall()
logger.debug(f"Found {len(existing_result)} existing records for NYID {group_nyid}")
# 如果存在,删除旧数据
if existing_result:
delete_query = text(f"DELETE FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
db.execute(delete_query, {"nyid": group_nyid, "account_id": account_id})
logger.info(f"Deleted {len(existing_result)} existing records for NYID {group_nyid}")
# 准备插入当前组的数据
to_insert = []
for item in group:
# 确保NYID和account_id是字符串类型
item_copy = item.copy()
item_copy['NYID'] = str(item_copy.get('NYID'))
item_copy['account_id'] = str(account_id)
to_insert.append(item_copy)
# 批量插入当前组的数据
if to_insert:
logger.info(f"Inserting {len(to_insert)} records for NYID {group_nyid} into {table_name}")
inserted_count = self._batch_insert_group(db, table_name, to_insert, account_id)
logger.info(f"Successfully inserted {inserted_count} records for NYID {group_nyid}")
success_count += inserted_count
# 成功,退出重试循环
break
except Exception as e:
error_msg = str(e)
if "Table definition has changed" in error_msg:
if attempt < max_retries - 1:
# 重试前重新预热表
import time
time.sleep(0.1 * (attempt + 1))
self._warmup_table(db, table_name)
logger.warning(f"Table definition changed for {group_nyid}, retrying (attempt {attempt + 2})...")
continue
else:
failed_count += len(group)
failed_items.append({
'group_index': group_index,
'group_nyid': group_nyid,
'message': f'处理NYID {group_nyid} 失败: 表定义变更,重试{attempt + 1}次后仍失败'
})
logger.error(f"Failed to process group with NYID {group_nyid} after {max_retries} retries: {str(e)}")
break
else:
failed_count += len(group)
failed_items.append({
'group_index': group_index,
'group_nyid': group_nyid,
'message': f'处理NYID {group_nyid} 失败: {str(e)}'
})
logger.error(f"Failed to process group with NYID {group_nyid}: {str(e)}")
break
group_index += 1
logger.info(f"Batch import (new) completed: {success_count} records inserted, {failed_count} failed")
return {
'success_count': success_count,
'failed_count': failed_count,
'failed_items': failed_items
}
def _batch_insert_group(self, db: Session, table_name: str, data: List[Dict[str, Any]], account_id: int) -> int:
"""
批量插入一组数据
Args:
db: 数据库会话
table_name: 表名
data: 要插入的数据列表
account_id: 账号ID
Returns:
插入的记录数
"""
if not data:
return 0
# 将数据分组每组1000条MySQL默认支持
batch_size = 1000
total_inserted = 0
for i in range(0, len(data), batch_size):
batch_data = data[i:i + batch_size]
# 构建批量参数
values_list = []
params = {}
logger.info(f"Preparing to insert batch of {len(batch_data)} records into {table_name}")
for idx, item_data in enumerate(batch_data):
values_list.append(
f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
)
params.update({
f"account_id_{idx}": str(account_id),
f"bfpcode_{idx}": item_data.get('bfpcode'),
f"mtime_{idx}": item_data.get('mtime'),
f"bffb_{idx}": item_data.get('bffb'),
f"bfpl_{idx}": item_data.get('bfpl'),
f"bfpvalue_{idx}": item_data.get('bfpvalue'),
f"NYID_{idx}": str(item_data.get('NYID')),
f"sort_{idx}": item_data.get('sort')
})
# 批量插入SQL
insert_sql = f"""
INSERT INTO `{table_name}`
(account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
VALUES {", ".join(values_list)}
"""
final_sql = text(insert_sql)
logger.debug(f"Final SQL preview: {insert_sql[:200]}...") # 只打印前200个字符
result = db.execute(final_sql, params)
logger.info(f"Batch insert successful: {len(batch_data)} records affected")
total_inserted += len(batch_data)
logger.debug(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")
return total_inserted