963 lines
40 KiB
Python
963 lines
40 KiB
Python
from sqlalchemy.orm import Session
|
||
from sqlalchemy import text, inspect
|
||
from sqlalchemy.exc import SQLAlchemyError, DisconnectionError, TimeoutError
|
||
from typing import List, Optional, Dict, Any
|
||
from ..models.original_data import OriginalData, get_original_data_model, get_table_name
|
||
from .base import BaseService
|
||
from ..models.settlement_data import SettlementData
|
||
from ..models.account import Account
|
||
from ..core.database import engine
|
||
from ..core.db_monitor import log_pool_status, get_pool_status
|
||
from ..core.retry import retry, circuit_breaker, RetryConfig
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class OriginalDataService(BaseService[OriginalData]):
|
||
def __init__(self):
|
||
super().__init__(OriginalData)
|
||
|
||
def _get_table_name(self, account_id: int) -> str:
|
||
"""获取原始数据表名"""
|
||
return get_table_name(account_id)
|
||
|
||
def _ensure_table_exists(self, db: Session, account_id: int) -> bool:
|
||
"""
|
||
确保指定账号的原始数据表存在,不存在则创建
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
account_id: 账号ID
|
||
|
||
Returns:
|
||
bool: 表是否存在或创建成功
|
||
"""
|
||
table_name = self._get_table_name(account_id)
|
||
inspector = inspect(engine)
|
||
|
||
# 检查表是否存在
|
||
if table_name in inspector.get_table_names():
|
||
logger.info(f"Table {table_name} already exists")
|
||
# 表已存在时,也执行预热操作
|
||
self._warmup_table(db, table_name)
|
||
return True
|
||
|
||
# 表不存在,创建表 - 添加重试机制
|
||
max_retries = 3
|
||
for attempt in range(max_retries):
|
||
try:
|
||
create_table_sql = f"""
|
||
CREATE TABLE `{table_name}` (
|
||
`id` INT AUTO_INCREMENT PRIMARY KEY,
|
||
`account_id` INT NOT NULL COMMENT '账号ID',
|
||
`bfpcode` VARCHAR(1000) NOT NULL COMMENT '前(后)视点名称',
|
||
`mtime` DATETIME NOT NULL COMMENT '测点观测时间',
|
||
`bffb` VARCHAR(1000) NOT NULL COMMENT '前(后)视标记符',
|
||
`bfpl` VARCHAR(1000) NOT NULL COMMENT '前(后)视距离(m)',
|
||
`bfpvalue` VARCHAR(1000) NOT NULL COMMENT '前(后)视尺读数(m)',
|
||
`NYID` VARCHAR(100) NOT NULL COMMENT '期数id',
|
||
`sort` INT COMMENT '序号',
|
||
INDEX `idx_nyid` (`NYID`),
|
||
INDEX `idx_account_id` (`account_id`)
|
||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='原始数据表_账号{account_id}'
|
||
"""
|
||
# 使用引擎直接执行,不依赖db会话的事务
|
||
with engine.begin() as conn: # 创建独立连接
|
||
conn.execute(text(create_table_sql))
|
||
|
||
logger.info(f"Table {table_name} created successfully on attempt {attempt + 1}")
|
||
|
||
# 表创建后延迟更长时间,避免立即查询触发"表定义变更"错误
|
||
import time
|
||
time.sleep(0.2)
|
||
|
||
# 预热表:执行一个简单的查询来稳定表定义
|
||
self._warmup_table(db, table_name)
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.warning(f"Attempt {attempt + 1} to create table {table_name} failed: {str(e)}")
|
||
|
||
# 检查是否是表已存在错误(并发情况下可能发生)
|
||
if "already exists" in str(e).lower() or "exist" in str(e).lower():
|
||
logger.info(f"Table {table_name} was created by another process")
|
||
# 延迟并预热表
|
||
import time
|
||
time.sleep(0.2)
|
||
self._warmup_table(db, table_name)
|
||
return True
|
||
|
||
if attempt == max_retries - 1:
|
||
logger.error(f"Failed to create table {table_name} after {max_retries} attempts: {str(e)}")
|
||
raise Exception(f"创建原始数据表失败: {str(e)}")
|
||
|
||
# 短暂延迟后重试
|
||
import time
|
||
time.sleep(0.1 * (attempt + 1))
|
||
|
||
return False
|
||
|
||
def _warmup_table(self, db: Session, table_name: str) -> None:
|
||
"""
|
||
预热表:执行一个简单查询来稳定表定义,避免"表定义变更"错误
|
||
"""
|
||
try:
|
||
# 执行一个简单的COUNT查询来预热表
|
||
warmup_query = text(f"SELECT COUNT(*) FROM `{table_name}` LIMIT 0")
|
||
db.execute(warmup_query)
|
||
logger.debug(f"Table {table_name} warmed up successfully")
|
||
except Exception as e:
|
||
logger.warning(f"Failed to warm up table {table_name}: {str(e)}")
|
||
# 预热失败不影响主流程,只是记录警告
|
||
|
||
def create_table_for_account(self, db: Session, account_id: int) -> Dict[str, Any]:
|
||
"""
|
||
为指定账号创建原始数据表
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
account_id: 账号ID
|
||
|
||
Returns:
|
||
操作结果
|
||
"""
|
||
try:
|
||
# 验证账号是否存在
|
||
account = db.query(Account).filter(Account.id == account_id).first()
|
||
if not account:
|
||
return {
|
||
'success': False,
|
||
'message': f'账号ID {account_id} 不存在'
|
||
}
|
||
|
||
# 创建表 - 现在使用增强的错误处理
|
||
table_created = self._ensure_table_exists(db, account_id)
|
||
|
||
if table_created:
|
||
return {
|
||
'success': True,
|
||
'message': f'原始数据表 {self._get_table_name(account_id)} 创建成功'
|
||
}
|
||
else:
|
||
return {
|
||
'success': False,
|
||
'message': f'原始数据表 {self._get_table_name(account_id)} 创建失败'
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to create table for account {account_id}: {str(e)}")
|
||
return {
|
||
'success': False,
|
||
'message': f'创建原始数据表失败: {str(e)}'
|
||
}
|
||
|
||
def get_by_nyid(self, db: Session, account_id: int, nyid: str) -> List[OriginalData]:
|
||
"""
|
||
根据期数ID获取原始数据
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
account_id: 账号ID
|
||
nyid: 期数ID
|
||
|
||
Returns:
|
||
原始数据列表
|
||
"""
|
||
try:
|
||
table_name = self._get_table_name(account_id)
|
||
# 使用原生SQL查询
|
||
query = text(f"SELECT * FROM `{table_name}` WHERE NYID = :nyid")
|
||
result = db.execute(query, {"nyid": nyid})
|
||
return result.fetchall()
|
||
except Exception as e:
|
||
logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
|
||
return []
|
||
|
||
def get_by_bfpcode(self, db: Session, account_id: int, bfpcode: str) -> List[OriginalData]:
|
||
"""
|
||
根据前(后)视点名称获取原始数据
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
account_id: 账号ID
|
||
bfpcode: 前(后)视点名称
|
||
|
||
Returns:
|
||
原始数据列表
|
||
"""
|
||
try:
|
||
table_name = self._get_table_name(account_id)
|
||
query = text(f"SELECT * FROM `{table_name}` WHERE bfpcode = :bfpcode")
|
||
result = db.execute(query, {"bfpcode": bfpcode})
|
||
return result.fetchall()
|
||
except Exception as e:
|
||
logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
|
||
return []
|
||
|
||
def get_all_original_data_tables(self, db: Session) -> List[str]:
|
||
"""
|
||
获取所有原始数据分表的表名
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
|
||
Returns:
|
||
原始数据表名列表
|
||
"""
|
||
try:
|
||
inspector = inspect(engine)
|
||
all_tables = inspector.get_table_names()
|
||
# 筛选出所有原始数据分表
|
||
original_tables = [table for table in all_tables if table.startswith('original_data_')]
|
||
logger.info(f"Found {len(original_tables)} original data tables: {original_tables}")
|
||
return original_tables
|
||
except Exception as e:
|
||
logger.error(f"Failed to get original data tables: {str(e)}")
|
||
return []
|
||
|
||
def search_original_data(self, db: Session,
|
||
account_id: Optional[int] = None,
|
||
id: Optional[int] = None,
|
||
bfpcode: Optional[str] = None,
|
||
bffb: Optional[str] = None,
|
||
nyid: Optional[str] = None,
|
||
bfpl: Optional[str] = None) -> List[Any]:
|
||
"""
|
||
根据多个条件搜索原始数据
|
||
如果未提供account_id,则遍历所有分表查询
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
account_id: 账号ID,可选。不填则查询所有分表
|
||
其他查询条件...
|
||
|
||
Returns:
|
||
原始数据列表
|
||
"""
|
||
print("搜索原始数据,参数:", {
|
||
"account_id": account_id,
|
||
"id": id,
|
||
"bfpcode": bfpcode,
|
||
"bffb": bffb,
|
||
"nyid": nyid,
|
||
"bfpl": bfpl})
|
||
try:
|
||
# 构建查询条件
|
||
conditions = []
|
||
params = {}
|
||
|
||
if id is not None:
|
||
conditions.append("id = :id")
|
||
params["id"] = id
|
||
if bfpcode is not None:
|
||
conditions.append("bfpcode = :bfpcode")
|
||
params["bfpcode"] = bfpcode
|
||
if bffb is not None:
|
||
conditions.append("bffb = :bffb")
|
||
params["bffb"] = bffb
|
||
if nyid is not None:
|
||
conditions.append("NYID = :nyid")
|
||
params["nyid"] = nyid
|
||
if bfpl is not None:
|
||
conditions.append("bfpl = :bfpl")
|
||
params["bfpl"] = bfpl
|
||
|
||
where_clause = " AND ".join(conditions) if conditions else "1=1"
|
||
|
||
# 如果指定了account_id,只查询该账号的分表
|
||
if account_id is not None:
|
||
table_name = self._get_table_name(account_id)
|
||
query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
|
||
result = db.execute(query, params)
|
||
return result.fetchall()
|
||
|
||
# 未指定account_id,遍历所有分表
|
||
all_results = []
|
||
tables = self.get_all_original_data_tables(db)
|
||
|
||
for table_name in tables:
|
||
try:
|
||
query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
|
||
result = db.execute(query, params)
|
||
rows = result.fetchall()
|
||
all_results.extend(rows)
|
||
logger.info(f"Found {len(rows)} records in table {table_name}")
|
||
except Exception as e:
|
||
logger.warning(f"Failed to query table {table_name}: {str(e)}")
|
||
continue
|
||
|
||
logger.info(f"Total found {len(all_results)} records from {len(tables)} tables")
|
||
return all_results
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to search original data: {str(e)}")
|
||
return []
|
||
|
||
def _check_settlement_exists(self, db: Session, nyid: str) -> bool:
|
||
"""检查期数id沉降数据是否存在"""
|
||
settlement = db.query(SettlementData).filter(SettlementData.NYID == nyid).first()
|
||
return settlement is not None
|
||
|
||
def batch_import_original_data(self, db: Session, data: List) -> Dict[str, Any]:
|
||
"""
|
||
批量导入原始数据到指定账号的分表 - 性能优化版
|
||
使用批量插入替代逐条插入,大幅提升导入速度
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
data: 数据列表,每条数据必须包含account_id字段
|
||
|
||
Returns:
|
||
操作结果
|
||
"""
|
||
logger = logging.getLogger(__name__)
|
||
|
||
total_count = len(data)
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_items = []
|
||
|
||
if total_count == 0:
|
||
return {
|
||
'success': False,
|
||
'message': '导入数据不能为空',
|
||
'total_count': 0,
|
||
'success_count': 0,
|
||
'failed_count': 0,
|
||
'failed_items': []
|
||
}
|
||
|
||
# 获取第一条数据的account_id
|
||
account_id = data[0].get('account_id')
|
||
if not account_id:
|
||
return {
|
||
'success': False,
|
||
'message': '数据中缺少account_id字段',
|
||
'total_count': total_count,
|
||
'success_count': 0,
|
||
'failed_count': total_count,
|
||
'failed_items': []
|
||
}
|
||
|
||
# 验证账号是否存在
|
||
account = db.query(Account).filter(Account.id == account_id).first()
|
||
if not account:
|
||
return {
|
||
'success': False,
|
||
'message': f'账号ID {account_id} 不存在',
|
||
'total_count': total_count,
|
||
'success_count': 0,
|
||
'failed_count': total_count,
|
||
'failed_items': []
|
||
}
|
||
|
||
# 确保表存在 - 现在使用增强的错误处理和重试机制
|
||
table_created = self._ensure_table_exists(db, account_id)
|
||
if not table_created:
|
||
return {
|
||
'success': False,
|
||
'message': '创建原始数据表失败',
|
||
'total_count': total_count,
|
||
'success_count': 0,
|
||
'failed_count': total_count,
|
||
'failed_items': []
|
||
}
|
||
|
||
table_name = self._get_table_name(account_id)
|
||
|
||
# 记录开始前的连接池状态
|
||
pool_stats_before = get_pool_status()
|
||
logger.info(f"开始批量导入,连接池状态: {pool_stats_before}")
|
||
|
||
# 检查是否已在事务中
|
||
in_transaction = db.in_transaction()
|
||
logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
|
||
|
||
@retry(config=RetryConfig(max_attempts=2, base_delay=2.0, max_delay=10.0))
|
||
def _do_import():
|
||
"""执行导入操作的内部函数(带重试)"""
|
||
for attempt in range(2): # 最多重试1次
|
||
try:
|
||
# 只有不在事务中时才调用begin()
|
||
if not in_transaction:
|
||
logger.info(f"开始内部事务 (尝试 {attempt + 1})")
|
||
db.begin()
|
||
else:
|
||
logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
|
||
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_items = []
|
||
|
||
# 执行数据导入操作
|
||
success_count = self._execute_import(db, table_name, data, account_id)
|
||
|
||
# 只有我们开始的事务才提交
|
||
if not in_transaction:
|
||
db.commit()
|
||
logger.info(f"事务已提交")
|
||
else:
|
||
logger.info(f"使用外部事务,不提交")
|
||
|
||
logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}")
|
||
return {
|
||
'success': True,
|
||
'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
|
||
'total_count': total_count,
|
||
'success_count': success_count,
|
||
'failed_count': failed_count,
|
||
'failed_items': failed_items
|
||
}
|
||
|
||
except SQLAlchemyError as e:
|
||
# 只有我们开始的事务才回滚
|
||
if not in_transaction:
|
||
try:
|
||
db.rollback()
|
||
except:
|
||
pass
|
||
|
||
pool_stats_after = get_pool_status()
|
||
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
|
||
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
|
||
|
||
# 记录错误详情
|
||
logger.error(
|
||
f"错误详情: 类型={type(e).__name__}, "
|
||
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
|
||
f"SQL: {str(e)[:200]}"
|
||
)
|
||
|
||
if attempt == 1: # 最后一次重试失败
|
||
logger.error("Batch import original data failed after retries")
|
||
raise e # 抛出异常触发重试装饰器
|
||
|
||
try:
|
||
return _do_import()
|
||
except Exception as e:
|
||
return {
|
||
'success': False,
|
||
'message': f'批量导入失败: {str(e)}',
|
||
'total_count': total_count,
|
||
'success_count': 0,
|
||
'failed_count': total_count,
|
||
'failed_items': []
|
||
}
|
||
|
||
def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int:
|
||
"""执行数据导入操作(抽取的公共逻辑)"""
|
||
logger.info(f"Starting batch import into {table_name}, total records: {len(data)}")
|
||
|
||
# ===== 性能优化:批量查询沉降数据 =====
|
||
# 统一转换为字符串处理(数据库NYID字段是VARCHAR类型)
|
||
nyid_list = list(set(str(item.get('NYID')) for item in data if item.get('NYID')))
|
||
from ..models.settlement_data import SettlementData
|
||
settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(nyid_list)).all()
|
||
settlement_map = {s.NYID: s for s in settlements}
|
||
missing_nyids = set(nyid_list) - set(settlement_map.keys())
|
||
|
||
if missing_nyids:
|
||
raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')
|
||
|
||
# ===== 性能优化:批量查询现有原始数据(IN查询)=====
|
||
# 使用组合键 (NYID, sort) 查询现有数据,过滤重复数据
|
||
existing_data = db.query(text("*")).from_statement(
|
||
text(f"SELECT * FROM `{table_name}` WHERE account_id = :account_id")
|
||
).params(account_id=account_id).all()
|
||
|
||
# 使用组合键创建查找表:key = f"{NYID}_{sort}"
|
||
existing_map = {
|
||
f"{item[7]}_{item[8]}": item # NYID是第8个字段(索引7),sort是第9个字段(索引8)
|
||
for item in existing_data
|
||
}
|
||
logger.info(f"Found {len(existing_data)} existing records in {table_name}")
|
||
|
||
# ===== 批量处理插入和跳过 =====
|
||
to_insert = []
|
||
skipped_count = 0
|
||
|
||
for item_data in data:
|
||
nyid = str(item_data.get('NYID')) # 统一转换为字符串
|
||
sort = item_data.get('sort')
|
||
|
||
# 构建组合键
|
||
key = f"{nyid}_{sort}"
|
||
|
||
if key in existing_map:
|
||
# 数据已存在,跳过
|
||
# logger.info(f"Skip existing data: NYID {nyid}, sort {sort}")
|
||
skipped_count += 1
|
||
else:
|
||
# 记录需要插入的数据
|
||
to_insert.append(item_data)
|
||
# logger.info(f"Skip existing data: {skipped_count} duplicates found so far")
|
||
|
||
logger.info(f"Filtered {skipped_count} duplicate records, {len(to_insert)} new records to insert")
|
||
|
||
# ===== 执行批量插入 =====
|
||
if not to_insert:
|
||
logger.info("No new records to insert, all data already exists")
|
||
return 0
|
||
|
||
# 将数据分组,每组1000条(MySQL默认支持)
|
||
batch_size = 1000
|
||
total_inserted = 0
|
||
for i in range(0, len(to_insert), batch_size):
|
||
batch_data = to_insert[i:i + batch_size]
|
||
|
||
# 构建批量参数
|
||
values_list = []
|
||
params = {}
|
||
for idx, item_data in enumerate(batch_data):
|
||
values_list.append(
|
||
f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
|
||
f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
|
||
)
|
||
params.update({
|
||
f"account_id_{idx}": account_id,
|
||
f"bfpcode_{idx}": item_data.get('bfpcode'),
|
||
f"mtime_{idx}": item_data.get('mtime'),
|
||
f"bffb_{idx}": item_data.get('bffb'),
|
||
f"bfpl_{idx}": item_data.get('bfpl'),
|
||
f"bfpvalue_{idx}": item_data.get('bfpvalue'),
|
||
f"NYID_{idx}": item_data.get('NYID'),
|
||
f"sort_{idx}": item_data.get('sort')
|
||
})
|
||
|
||
# 批量插入SQL - 使用字符串拼接(修复TextClause拼接问题)
|
||
insert_sql = f"""
|
||
INSERT INTO `{table_name}`
|
||
(account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
|
||
VALUES {", ".join(values_list)}
|
||
"""
|
||
final_sql = text(insert_sql)
|
||
db.execute(final_sql, params)
|
||
total_inserted += len(batch_data)
|
||
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")
|
||
|
||
logger.info(f"Batch import completed: {total_inserted} records inserted, {skipped_count} duplicates skipped")
|
||
return total_inserted
|
||
|
||
def batch_import_original_data_new(self, db: Session, data: List[List[Dict[str, Any]]]) -> Dict[str, Any]:
|
||
"""
|
||
新版批量导入原始数据 - 支持分组格式 data:[[{},{},{}],[{},{}]]
|
||
里层一个[{},{}]称为一组数据,数据{}内容与旧接口一致
|
||
一组数据全部记录的NYID与account_id将会一样,不同组可能不同
|
||
|
||
导入逻辑:
|
||
1. 按account_id分表存储,没表就建表
|
||
2. 插入前根据NYID判断表中是否有重复数据
|
||
3. 有重复就删除表中全部同NYID数据,插入新的,不重复就直接插入
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
data: 分组数据列表,格式为 [[{},{},{}], [{},{}]]
|
||
|
||
Returns:
|
||
操作结果
|
||
"""
|
||
logger = logging.getLogger(__name__)
|
||
|
||
total_count = 0
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_items = []
|
||
|
||
if not data or len(data) == 0:
|
||
return {
|
||
'success': False,
|
||
'message': '导入数据不能为空',
|
||
'total_count': 0,
|
||
'success_count': 0,
|
||
'failed_count': 0,
|
||
'failed_items': []
|
||
}
|
||
|
||
# 验证所有组的account_id和NYID是否一致,并按account_id分组
|
||
group_validation_results = []
|
||
account_id_to_groups = {} # {account_id: [group_indices]}
|
||
|
||
for group_idx, group in enumerate(data):
|
||
if not group or len(group) == 0:
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'message': f'第{group_idx + 1}组数据为空'
|
||
})
|
||
continue
|
||
|
||
# 获取组内第一个数据的account_id和NYID
|
||
first_item = group[0]
|
||
group_account_id = str(first_item.get('account_id'))
|
||
group_nyid = str(first_item.get('NYID'))
|
||
|
||
if not group_account_id or not group_nyid:
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'message': f'第{group_idx + 1}组数据缺少account_id或NYID字段'
|
||
})
|
||
continue
|
||
|
||
# 验证组内所有数据的account_id和NYID是否一致
|
||
group_valid = True
|
||
for item_idx, item in enumerate(group):
|
||
item_account_id = str(item.get('account_id'))
|
||
item_nyid = str(item.get('NYID'))
|
||
|
||
if item_account_id != group_account_id:
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'item_index': item_idx,
|
||
'message': f'第{group_idx + 1}组第{item_idx + 1}条数据account_id不一致,期望:{group_account_id},实际:{item_account_id}'
|
||
})
|
||
group_valid = False
|
||
if item_nyid != group_nyid:
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'item_index': item_idx,
|
||
'message': f'第{group_idx + 1}组第{item_idx + 1}条数据NYID不一致,期望:{group_nyid},实际:{item_nyid}'
|
||
})
|
||
group_valid = False
|
||
|
||
# 记录验证结果
|
||
group_validation_results.append({
|
||
'group_index': group_idx,
|
||
'account_id': group_account_id,
|
||
'nyid': group_nyid,
|
||
'valid': group_valid,
|
||
'data': group
|
||
})
|
||
|
||
# 按account_id分组
|
||
if group_account_id not in account_id_to_groups:
|
||
account_id_to_groups[group_account_id] = []
|
||
account_id_to_groups[group_account_id].append(group_idx)
|
||
|
||
# 如果有验证错误,返回失败
|
||
if failed_items:
|
||
return {
|
||
'success': False,
|
||
'message': f'数据验证失败,发现{len(failed_items)}个错误',
|
||
'total_count': sum(len(group) for group in data if group),
|
||
'success_count': 0,
|
||
'failed_count': len(failed_items),
|
||
'failed_items': failed_items
|
||
}
|
||
|
||
# 记录总体统计
|
||
total_count = sum(len(group['data']) for group in group_validation_results if group['valid'])
|
||
logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}")
|
||
|
||
# 记录开始前的连接池状态
|
||
pool_stats_before = get_pool_status()
|
||
logger.info(f"开始新版批量导入,连接池状态: {pool_stats_before}")
|
||
|
||
# 检查是否已在事务中
|
||
in_transaction = db.in_transaction()
|
||
logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
|
||
|
||
for attempt in range(2):
|
||
try:
|
||
# 只有不在事务中时才调用begin()
|
||
if not in_transaction:
|
||
logger.info(f"开始内部事务 (尝试 {attempt + 1})")
|
||
db.begin()
|
||
else:
|
||
logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
|
||
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_items = []
|
||
|
||
# 处理每个account_id
|
||
for account_id_str, group_indices in account_id_to_groups.items():
|
||
account_id = int(account_id_str)
|
||
logger.info(f"Processing account_id: {account_id}, groups: {group_indices}")
|
||
|
||
# 验证账号是否存在
|
||
account = db.query(Account).filter(Account.id == account_id).first()
|
||
if not account:
|
||
error_msg = f'账号ID {account_id} 不存在'
|
||
logger.error(error_msg)
|
||
for group_idx in group_indices:
|
||
failed_count += len(group_validation_results[group_idx]['data'])
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'message': error_msg
|
||
})
|
||
continue
|
||
|
||
# 确保表存在
|
||
table_created = self._ensure_table_exists(db, account_id)
|
||
if not table_created:
|
||
error_msg = f'创建原始数据表失败 (account_id: {account_id})'
|
||
logger.error(error_msg)
|
||
for group_idx in group_indices:
|
||
failed_count += len(group_validation_results[group_idx]['data'])
|
||
failed_items.append({
|
||
'group_index': group_idx,
|
||
'message': error_msg
|
||
})
|
||
continue
|
||
|
||
# 收集该account_id的所有组数据
|
||
account_groups_data = []
|
||
for group_idx in group_indices:
|
||
account_groups_data.append(group_validation_results[group_idx]['data'])
|
||
|
||
# 执行分组导入操作
|
||
table_name = self._get_table_name(account_id)
|
||
logger.info(f"Processing {len(account_groups_data)} groups for table: {table_name}")
|
||
group_results = self._execute_import_new(db, table_name, account_groups_data, account_id)
|
||
success_count += group_results['success_count']
|
||
failed_count += group_results['failed_count']
|
||
failed_items.extend(group_results['failed_items'])
|
||
logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}")
|
||
|
||
logger.info(f"Before commit: Success={success_count}, Failed={failed_count}")
|
||
|
||
# 只有我们开始的事务才提交
|
||
if not in_transaction:
|
||
db.commit()
|
||
logger.info(f"事务已提交")
|
||
else:
|
||
logger.info(f"使用外部事务,不提交")
|
||
|
||
logger.info(f"Transaction completed successfully! Success: {success_count}, Failed: {failed_count}")
|
||
break
|
||
|
||
except SQLAlchemyError as e:
|
||
# 只有我们开始的事务才回滚
|
||
if not in_transaction:
|
||
try:
|
||
db.rollback()
|
||
except:
|
||
pass
|
||
|
||
pool_stats_after = get_pool_status()
|
||
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
|
||
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
|
||
|
||
# 记录错误详情
|
||
logger.error(
|
||
f"错误详情: 类型={type(e).__name__}, "
|
||
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
|
||
f"SQL: {str(e)[:200]}"
|
||
)
|
||
|
||
if attempt == 1:
|
||
logger.error("Batch import original data failed after retries")
|
||
return {
|
||
'success': False,
|
||
'message': f'批量导入失败: {str(e)}',
|
||
'total_count': total_count,
|
||
'success_count': 0,
|
||
'failed_count': total_count,
|
||
'failed_items': []
|
||
}
|
||
|
||
return {
|
||
'success': True if failed_count == 0 else False,
|
||
'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
|
||
'total_count': total_count,
|
||
'success_count': success_count,
|
||
'failed_count': failed_count,
|
||
'failed_items': failed_items
|
||
}
|
||
|
||
def _execute_import_new(self, db: Session, table_name: str, data: List[List[Dict[str, Any]]], account_id: int) -> Dict[str, Any]:
|
||
"""
|
||
执行新版分组导入操作
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
table_name: 表名
|
||
data: 分组数据列表
|
||
account_id: 账号ID
|
||
|
||
Returns:
|
||
操作结果
|
||
"""
|
||
logger.info(f"Starting batch import (new) into {table_name}, total groups: {len(data)}")
|
||
|
||
# 导入前预热表,确保表定义稳定
|
||
self._warmup_table(db, table_name)
|
||
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_items = []
|
||
|
||
# 收集所有NYID和对应数据(用于批量查询沉降数据)
|
||
all_nyids = set()
|
||
|
||
for group in data:
|
||
if group and len(group) > 0:
|
||
group_nyid = str(group[0].get('NYID'))
|
||
all_nyids.add(group_nyid)
|
||
|
||
# 批量查询沉降数据是否存在
|
||
from ..models.settlement_data import SettlementData
|
||
if all_nyids:
|
||
settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(list(all_nyids))).all()
|
||
settlement_map = {s.NYID: s for s in settlements}
|
||
missing_nyids = all_nyids - set(settlement_map.keys())
|
||
|
||
if missing_nyids:
|
||
raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')
|
||
|
||
logger.info(f"Found {len(all_nyids)} unique NYIDs to process: {list(all_nyids)}")
|
||
|
||
# 按NYID分组,查询并处理重复数据
|
||
processed_nyids = set()
|
||
group_index = 0
|
||
|
||
for group in data:
|
||
if not group or len(group) == 0:
|
||
continue
|
||
|
||
group_nyid = str(group[0].get('NYID'))
|
||
logger.info(f"Processing group {group_index}: NYID={group_nyid}, {len(group)} records")
|
||
|
||
# 如果已经处理过这个NYID,跳过并记录到failed_items
|
||
if group_nyid in processed_nyids:
|
||
logger.warning(f"NYID {group_nyid} appears in multiple groups (group {group_index}), skipping duplicate")
|
||
failed_count += len(group)
|
||
failed_items.append({
|
||
'group_index': group_index,
|
||
'group_nyid': group_nyid,
|
||
'message': f'NYID {group_nyid} 已在之前组中处理,跳过重复组'
|
||
})
|
||
group_index += 1
|
||
continue
|
||
|
||
processed_nyids.add(group_nyid)
|
||
|
||
# 添加重试机制处理表定义变更错误
|
||
max_retries = 3
|
||
for attempt in range(max_retries):
|
||
try:
|
||
# 检查是否已存在该NYID的数据
|
||
existing_query = text(f"SELECT id FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
|
||
existing_result = db.execute(existing_query, {"nyid": group_nyid, "account_id": account_id}).fetchall()
|
||
logger.debug(f"Found {len(existing_result)} existing records for NYID {group_nyid}")
|
||
|
||
# 如果存在,删除旧数据
|
||
if existing_result:
|
||
delete_query = text(f"DELETE FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
|
||
db.execute(delete_query, {"nyid": group_nyid, "account_id": account_id})
|
||
logger.info(f"Deleted {len(existing_result)} existing records for NYID {group_nyid}")
|
||
|
||
# 准备插入当前组的数据
|
||
to_insert = []
|
||
for item in group:
|
||
# 确保NYID和account_id是字符串类型
|
||
item_copy = item.copy()
|
||
item_copy['NYID'] = str(item_copy.get('NYID'))
|
||
item_copy['account_id'] = str(account_id)
|
||
to_insert.append(item_copy)
|
||
|
||
# 批量插入当前组的数据
|
||
if to_insert:
|
||
logger.info(f"Inserting {len(to_insert)} records for NYID {group_nyid} into {table_name}")
|
||
inserted_count = self._batch_insert_group(db, table_name, to_insert, account_id)
|
||
logger.info(f"Successfully inserted {inserted_count} records for NYID {group_nyid}")
|
||
success_count += inserted_count
|
||
|
||
# 成功,退出重试循环
|
||
break
|
||
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "Table definition has changed" in error_msg:
|
||
if attempt < max_retries - 1:
|
||
# 重试前重新预热表
|
||
import time
|
||
time.sleep(0.1 * (attempt + 1))
|
||
self._warmup_table(db, table_name)
|
||
logger.warning(f"Table definition changed for {group_nyid}, retrying (attempt {attempt + 2})...")
|
||
continue
|
||
else:
|
||
failed_count += len(group)
|
||
failed_items.append({
|
||
'group_index': group_index,
|
||
'group_nyid': group_nyid,
|
||
'message': f'处理NYID {group_nyid} 失败: 表定义变更,重试{attempt + 1}次后仍失败'
|
||
})
|
||
logger.error(f"Failed to process group with NYID {group_nyid} after {max_retries} retries: {str(e)}")
|
||
break
|
||
else:
|
||
failed_count += len(group)
|
||
failed_items.append({
|
||
'group_index': group_index,
|
||
'group_nyid': group_nyid,
|
||
'message': f'处理NYID {group_nyid} 失败: {str(e)}'
|
||
})
|
||
logger.error(f"Failed to process group with NYID {group_nyid}: {str(e)}")
|
||
break
|
||
|
||
group_index += 1
|
||
|
||
logger.info(f"Batch import (new) completed: {success_count} records inserted, {failed_count} failed")
|
||
return {
|
||
'success_count': success_count,
|
||
'failed_count': failed_count,
|
||
'failed_items': failed_items
|
||
}
|
||
|
||
def _batch_insert_group(self, db: Session, table_name: str, data: List[Dict[str, Any]], account_id: int) -> int:
|
||
"""
|
||
批量插入一组数据
|
||
|
||
Args:
|
||
db: 数据库会话
|
||
table_name: 表名
|
||
data: 要插入的数据列表
|
||
account_id: 账号ID
|
||
|
||
Returns:
|
||
插入的记录数
|
||
"""
|
||
if not data:
|
||
return 0
|
||
|
||
# 将数据分组,每组1000条(MySQL默认支持)
|
||
batch_size = 1000
|
||
total_inserted = 0
|
||
|
||
for i in range(0, len(data), batch_size):
|
||
batch_data = data[i:i + batch_size]
|
||
|
||
# 构建批量参数
|
||
values_list = []
|
||
params = {}
|
||
logger.info(f"Preparing to insert batch of {len(batch_data)} records into {table_name}")
|
||
for idx, item_data in enumerate(batch_data):
|
||
values_list.append(
|
||
f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
|
||
f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
|
||
)
|
||
params.update({
|
||
f"account_id_{idx}": str(account_id),
|
||
f"bfpcode_{idx}": item_data.get('bfpcode'),
|
||
f"mtime_{idx}": item_data.get('mtime'),
|
||
f"bffb_{idx}": item_data.get('bffb'),
|
||
f"bfpl_{idx}": item_data.get('bfpl'),
|
||
f"bfpvalue_{idx}": item_data.get('bfpvalue'),
|
||
f"NYID_{idx}": str(item_data.get('NYID')),
|
||
f"sort_{idx}": item_data.get('sort')
|
||
})
|
||
|
||
# 批量插入SQL
|
||
insert_sql = f"""
|
||
INSERT INTO `{table_name}`
|
||
(account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
|
||
VALUES {", ".join(values_list)}
|
||
"""
|
||
final_sql = text(insert_sql)
|
||
logger.debug(f"Final SQL preview: {insert_sql[:200]}...") # 只打印前200个字符
|
||
result = db.execute(final_sql, params)
|
||
logger.info(f"Batch insert successful: {len(batch_data)} records affected")
|
||
total_inserted += len(batch_data)
|
||
logger.debug(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")
|
||
|
||
return total_inserted |