数据库监控日志、接口监控,熔断机制,提高连接池

This commit is contained in:
lhx
2025-11-29 16:02:28 +08:00
parent c82c4b1dbe
commit ae476256a9
5 changed files with 651 additions and 54 deletions

View File

@@ -1,14 +1,28 @@
from sqlalchemy import create_engine, MetaData from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import QueuePool
from .config import settings from .config import settings
engine = create_engine(settings.DATABASE_URL, pool_pre_ping=True, echo=False, pool_size=30, max_overflow=60, pool_timeout=30, pool_recycle=3600) # 创建带连接池监控的引擎
engine = create_engine(
settings.DATABASE_URL,
poolclass=QueuePool,
pool_pre_ping=True,
echo=False, # 生产环境建议关闭SQL日志
pool_size=400,
max_overflow=600,
pool_timeout=60, # 增加超时时间到60秒
pool_recycle=3600, # 1小时回收连接
pool_reset_on_return='commit' # 归还连接时重置状态
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base() Base = declarative_base()
def get_db(): def get_db():
"""数据库依赖注入函数"""
db = SessionLocal() db = SessionLocal()
try: try:
yield db yield db

252
app/core/db_monitor.py Normal file
View File

@@ -0,0 +1,252 @@
"""
数据库连接池监控模块
监控连接池状态、事务执行情况,预防雪崩效应
"""
import logging
import time
import threading
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
from sqlalchemy.pool import QueuePool
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from .database import engine
from .logging_config import get_logger
logger = get_logger(__name__)
# 全局监控数据
_pool_stats = {
'total_connections': 0,
'checked_in': 0,
'checked_out': 0,
'overflow': 0,
'invalidate_count': 0,
'transactions': [], # 事务统计
'slow_queries': [], # 慢查询
'connection_errors': [], # 连接错误
'peak_connections': 0,
'last_reset': datetime.now()
}
# 告警配置
_alert_thresholds = {
'pool_usage_percent': 80, # 连接池使用率告警阈值
'slow_query_time': 5.0, # 慢查询阈值(秒)
'max_transaction_time': 30.0, # 最大事务执行时间
'connection_error_count': 10, # 连接错误告警阈值
'alert_cooldown': 300 # 告警冷却时间(秒)
}
_last_alerts = {}
def get_pool_status() -> Dict[str, Any]:
"""获取连接池状态"""
if hasattr(engine.pool, 'status'):
pool = engine.pool
stats = {
'total': pool.size() if hasattr(pool, 'size') else 0,
'checked_in': pool.checkedin() if hasattr(pool, 'checkedin') else 0,
'checked_out': pool.checkedout() if hasattr(pool, 'checkedout') else 0,
'overflow': pool.overflow() if hasattr(pool, 'overflow') else 0,
'invalidate': pool.invalid() if hasattr(pool, 'invalid') else 0,
}
else:
# 估算值
stats = {
'total': _pool_stats.get('total_connections', 0),
'checked_in': _pool_stats.get('checked_in', 0),
'checked_out': _pool_stats.get('checked_out', 0),
'overflow': _pool_stats.get('overflow', 0),
'invalidate': _pool_stats.get('invalidate_count', 0),
}
# 计算使用率
if stats['total'] > 0:
usage_percent = (stats['checked_out'] / stats['total']) * 100
stats['usage_percent'] = round(usage_percent, 2)
else:
stats['usage_percent'] = 0
# 更新峰值
if stats['checked_out'] > _pool_stats['peak_connections']:
_pool_stats['peak_connections'] = stats['checked_out']
return stats
def check_pool_alerts():
"""检查连接池告警"""
current_time = time.time()
stats = get_pool_status()
# 连接池使用率告警
if stats.get('usage_percent', 0) >= _alert_thresholds['pool_usage_percent']:
alert_key = 'pool_usage'
if alert_key not in _last_alerts or (current_time - _last_alerts.get(alert_key, 0)) > _alert_thresholds['alert_cooldown']:
logger.warning(
f"🚨 数据库连接池告警: 使用率 {stats['usage_percent']}% 超过阈值 {_alert_thresholds['pool_usage_percent']}% "
f"(已使用: {stats['checked_out']}/{stats['total']})"
)
_last_alerts[alert_key] = current_time
# 连接错误告警
error_count = len(_pool_stats['connection_errors'])
if error_count >= _alert_thresholds['connection_error_count']:
alert_key = 'connection_errors'
if alert_key not in _last_alerts or (current_time - _last_alerts.get(alert_key, 0)) > _alert_thresholds['alert_cooldown']:
recent_errors = [e for e in _pool_stats['connection_errors'] if (current_time - e['timestamp']) < 300]
logger.warning(
f"🚨 数据库连接错误告警: 近5分钟内发生 {len(recent_errors)} 次连接错误"
)
_last_alerts[alert_key] = current_time
# 慢查询告警
slow_queries = [q for q in _pool_stats['slow_queries'] if (current_time - q['timestamp']) < 300]
if len(slow_queries) >= 5:
alert_key = 'slow_queries'
if alert_key not in _last_alerts or (current_time - _last_alerts.get(alert_key, 0)) > _alert_thresholds['alert_cooldown']:
avg_time = sum(q['duration'] for q in slow_queries) / len(slow_queries)
logger.warning(
f"🚨 慢查询告警: 近5分钟内 {len(slow_queries)} 个慢查询,平均耗时 {avg_time:.2f}s"
)
_last_alerts[alert_key] = current_time
def log_transaction_start(sql: str, params: Optional[Dict] = None):
"""记录事务开始"""
transaction_info = {
'sql': sql[:100] + '...' if len(sql) > 100 else sql,
'params': params,
'start_time': time.time(),
'thread_id': threading.get_ident()
}
_pool_stats['transactions'].append(transaction_info)
def log_transaction_end(success: bool = True, error: Optional[str] = None):
"""记录事务结束"""
if not _pool_stats['transactions']:
return
current_time = time.time()
transaction = _pool_stats['transactions'][-1]
duration = current_time - transaction['start_time']
# 慢事务告警
if duration >= _alert_thresholds['max_transaction_time']:
logger.warning(
f"🐌 慢事务告警: 执行时间 {duration:.2f}s 超过阈值 {_alert_thresholds['max_transaction_time']}s "
f"SQL: {transaction['sql']}"
)
# 记录慢查询
if duration >= _alert_thresholds['slow_query_time']:
_pool_stats['slow_queries'].append({
'sql': transaction['sql'],
'duration': duration,
'timestamp': current_time
})
# 清理旧记录保留最近1000条
if len(_pool_stats['slow_queries']) > 1000:
_pool_stats['slow_queries'] = _pool_stats['slow_queries'][-1000:]
_pool_stats['transactions'].pop()
def log_connection_error(error: str, sql: Optional[str] = None):
"""记录连接错误"""
_pool_stats['connection_errors'].append({
'error': error,
'sql': sql,
'timestamp': time.time()
})
# 清理旧记录保留最近100条
if len(_pool_stats['connection_errors']) > 100:
_pool_stats['connection_errors'] = _pool_stats['connection_errors'][-100:]
def reset_stats():
"""重置统计信息"""
global _pool_stats
_pool_stats = {
'total_connections': 0,
'checked_in': 0,
'checked_out': 0,
'overflow': 0,
'invalidate_count': 0,
'transactions': [],
'slow_queries': [],
'connection_errors': [],
'peak_connections': 0,
'last_reset': datetime.now()
}
logger.info("数据库监控统计已重置")
def get_monitoring_report() -> Dict[str, Any]:
"""获取监控报告"""
stats = get_pool_status()
current_time = time.time()
# 计算最近5分钟的数据
recent_slow_queries = [q for q in _pool_stats['slow_queries'] if (current_time - q['timestamp']) < 300]
recent_errors = [e for e in _pool_stats['connection_errors'] if (current_time - e['timestamp']) < 300]
report = {
'timestamp': datetime.now().isoformat(),
'pool_status': stats,
'peak_connections': _pool_stats['peak_connections'],
'recent_5min': {
'slow_queries_count': len(recent_slow_queries),
'connection_errors_count': len(recent_errors),
'avg_slow_query_time': sum(q['duration'] for q in recent_slow_queries) / len(recent_slow_queries) if recent_slow_queries else 0
},
'slow_queries': recent_slow_queries[-10:], # 最近10条慢查询
'connection_errors': recent_errors[-10:], # 最近10条连接错误
'last_reset': _pool_stats['last_reset'].isoformat()
}
return report
# 定时监控任务
def monitoring_task():
"""定时监控任务"""
while True:
try:
check_pool_alerts()
time.sleep(30) # 每30秒检查一次
except Exception as e:
logger.error(f"数据库监控任务异常: {e}")
time.sleep(60) # 异常时等待更长时间
# 启动后台监控线程
def start_monitoring():
"""启动后台监控"""
monitor_thread = threading.Thread(target=monitoring_task, daemon=True)
monitor_thread.start()
logger.info("数据库连接池监控已启动")
# SQLAlchemy事件监听
@event.listens_for(Engine, "before_cursor_execute")
def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
"""SQL执行前监听"""
log_transaction_start(statement, params)
@event.listens_for(Engine, "after_cursor_execute")
def receive_after_cursor_execute(conn, cursor, statement, params, context, executemany):
"""SQL执行后监听"""
log_transaction_end(success=True)
@event.listens_for(Engine, "handle_error")
def receive_handle_error(exception, context):
"""错误监听"""
error_msg = str(exception)
sql = context.statement if context and hasattr(context, 'statement') else None
log_connection_error(error_msg, sql)
log_transaction_end(success=False, error=error_msg)
def log_pool_status():
"""记录连接池状态到日志"""
stats = get_pool_status()
logger.info(
f"数据库连接池状态: 使用率 {stats['usage_percent']}% "
f"(已用: {stats['checked_out']}, 空闲: {stats['checked_in']}, 总计: {stats['total']}) "
f"峰值: {_pool_stats['peak_connections']}"
)

211
app/core/retry.py Normal file
View File

@@ -0,0 +1,211 @@
"""
重试机制和雪崩效应防护
提供指数退避、熔断器、重试装饰器等功能
"""
import logging
import time
import functools
from typing import Callable, Any, Optional
from enum import Enum
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
class CircuitBreakerState(Enum):
"""熔断器状态"""
CLOSED = "closed" # 正常状态
OPEN = "open" # 熔断状态
HALF_OPEN = "half_open" # 半开状态
class CircuitBreaker:
"""熔断器实现"""
def __init__(self, failure_threshold: int = 5, recovery_timeout: int = 60):
"""
初始化熔断器
Args:
failure_threshold: 失败阈值,达到此数量后触发熔断
recovery_timeout: 恢复超时时间(秒)
"""
self.failure_threshold = failure_threshold
self.recovery_timeout = recovery_timeout
self.failure_count = 0
self.last_failure_time = None
self.state = CircuitBreakerState.CLOSED
def call(self, func: Callable, *args, **kwargs):
"""通过熔断器执行函数"""
if self.state == CircuitBreakerState.OPEN:
# 检查是否应该进入半开状态
if self.last_failure_time and \
(datetime.now() - self.last_failure_time).seconds >= self.recovery_timeout:
self.state = CircuitBreakerState.HALF_OPEN
logger.info("熔断器进入半开状态")
else:
raise Exception("熔断器开启,直接拒绝请求")
try:
result = func(*args, **kwargs)
# 执行成功,重置状态
if self.state == CircuitBreakerState.HALF_OPEN:
self.state = CircuitBreakerState.CLOSED
self.failure_count = 0
logger.info("熔断器关闭,恢复正常")
return result
except Exception as e:
self.failure_count += 1
self.last_failure_time = datetime.now()
if self.failure_count >= self.failure_threshold:
self.state = CircuitBreakerState.OPEN
logger.warning(f"熔断器开启,失败次数: {self.failure_count}")
raise e
class RetryConfig:
"""重试配置"""
def __init__(
self,
max_attempts: int = 3,
base_delay: float = 1.0,
max_delay: float = 60.0,
exponential_base: float = 2.0,
jitter: bool = True
):
"""
初始化重试配置
Args:
max_attempts: 最大重试次数
base_delay: 基础延迟时间(秒)
max_delay: 最大延迟时间(秒)
exponential_base: 指数退避基数
jitter: 是否添加随机抖动
"""
self.max_attempts = max_attempts
self.base_delay = base_delay
self.max_delay = max_delay
self.exponential_base = exponential_base
self.jitter = jitter
def retry(
config: Optional[RetryConfig] = None,
exceptions: tuple = (Exception,)
):
"""
重试装饰器
Args:
config: 重试配置如果为None则使用默认配置
exceptions: 需要重试的异常类型
"""
if config is None:
config = RetryConfig()
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(config.max_attempts):
try:
return func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt == config.max_attempts - 1:
# 最后一次尝试失败,抛出异常
logger.error(
f"函数 {func.__name__} 经过 {config.max_attempts} 次重试后仍然失败: {str(e)}"
)
raise e
# 计算延迟时间
delay = min(
config.base_delay * (config.exponential_base ** attempt),
config.max_delay
)
# 添加抖动
if config.jitter:
import random
delay = delay * (0.5 + random.random() * 0.5)
logger.warning(
f"函数 {func.__name__}{attempt + 1} 次尝试失败: {str(e)}, "
f"{delay:.2f} 秒后重试"
)
time.sleep(delay)
# 理论上不会到达这里,但为了安全起见
if last_exception:
raise last_exception
return wrapper
return decorator
# 全局熔断器实例
_default_circuit_breaker = CircuitBreaker()
def circuit_breaker(
failure_threshold: int = 5,
recovery_timeout: int = 60
):
"""
熔断器装饰器
Args:
failure_threshold: 失败阈值
recovery_timeout: 恢复超时时间
"""
def decorator(func: Callable) -> Callable:
breaker = CircuitBreaker(failure_threshold, recovery_timeout)
@functools.wraps(func)
def wrapper(*args, **kwargs):
return breaker.call(func, *args, **kwargs)
# 将熔断器实例附加到函数上,方便外部查看状态
wrapper.circuit_breaker = breaker
return wrapper
return decorator
def with_circuit_breaker(func: Callable, *args, **kwargs):
"""使用默认熔断器执行函数"""
return _default_circuit_breaker.call(func, *args, **kwargs)
# 预定义的重试配置
RETRY_CONFIG_FAST = RetryConfig(
max_attempts=3,
base_delay=0.5,
max_delay=5.0,
exponential_base=2.0,
jitter=True
)
RETRY_CONFIG_SLOW = RetryConfig(
max_attempts=5,
base_delay=2.0,
max_delay=60.0,
exponential_base=2.0,
jitter=True
)
RETRY_CONFIG_DB = RetryConfig(
max_attempts=3,
base_delay=1.0,
max_delay=10.0,
exponential_base=2.0,
jitter=True
)
# 数据库操作重试装饰器
def retry_db_operation(max_attempts: int = 3):
"""数据库操作重试装饰器"""
return retry(
config=RETRY_CONFIG_DB,
exceptions=(Exception,)
)

View File

@@ -6,6 +6,7 @@ import logging
from .core.config import settings from .core.config import settings
from .core.logging_config import setup_logging, get_logger from .core.logging_config import setup_logging, get_logger
from .core.database import init_db from .core.database import init_db
from .core.db_monitor import start_monitoring, log_pool_status, get_pool_status
from .api.account import router as account_router from .api.account import router as account_router
from .api.database import router as database_router from .api.database import router as database_router
from .api.task import router as task_router from .api.task import router as task_router
@@ -34,6 +35,17 @@ async def lifespan(app: FastAPI):
except Exception as e: except Exception as e:
logger.error(f"数据库初始化失败: {e}") logger.error(f"数据库初始化失败: {e}")
# 启动数据库监控
try:
start_monitoring()
logger.info("数据库连接池监控已启动")
# 记录初始连接池状态
pool_stats = get_pool_status()
logger.info(f"初始连接池状态: {pool_stats}")
except Exception as e:
logger.error(f"数据库监控启动失败: {e}")
# 启动定时任务调度器 # 启动定时任务调度器
try: try:
task_scheduler.start() task_scheduler.start()
@@ -45,6 +57,14 @@ async def lifespan(app: FastAPI):
# 关闭时执行 # 关闭时执行
logger.info("应用关闭中...") logger.info("应用关闭中...")
# 记录最终连接池状态
try:
pool_stats = get_pool_status()
logger.info(f"最终连接池状态: {pool_stats}")
except Exception as e:
logger.error(f"获取最终连接池状态失败: {e}")
try: try:
task_scheduler.shutdown() task_scheduler.shutdown()
logger.info("定时任务调度器已关闭") logger.info("定时任务调度器已关闭")
@@ -101,11 +121,47 @@ async def health_check():
"scheduler": "running" if task_scheduler.scheduler.running else "stopped" "scheduler": "running" if task_scheduler.scheduler.running else "stopped"
} }
# 数据库监控端点
@app.get("/api/monitor/database")
async def get_database_monitor():
"""获取数据库连接池监控信息"""
from .core.db_monitor import get_monitoring_report
return get_monitoring_report()
# 连接池状态端点
@app.get("/api/monitor/pool")
async def get_pool_status():
"""获取连接池状态"""
from .core.db_monitor import get_pool_status
return get_pool_status()
# 全局异常处理 # 全局异常处理
@app.exception_handler(Exception) @app.exception_handler(Exception)
async def global_exception_handler(request, exc): async def global_exception_handler(request, exc):
logger.error(f"全局异常: {str(exc)}") """全局异常处理"""
import traceback
from .core.db_monitor import get_pool_status
# 获取异常详情
error_msg = str(exc)
error_type = type(exc).__name__
stack_trace = traceback.format_exc()
# 获取当前连接池状态
try:
pool_stats = get_pool_status()
pool_info = f", 连接池使用率: {pool_stats.get('usage_percent', 'N/A')}%"
except:
pool_info = ""
# 记录详细错误日志
logger.error(
f"🚨 全局异常: {error_type}: {error_msg} "
f"请求路径: {request.url.path}, 方法: {request.method}{pool_info}\n"
f"堆栈跟踪:\n{stack_trace}"
)
return HTTPException( return HTTPException(
status_code=500, status_code=500,
detail="服务器内部错误" detail=f"服务器内部错误: {error_type}"
) )

View File

@@ -1,11 +1,14 @@
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import text, inspect from sqlalchemy import text, inspect
from sqlalchemy.exc import SQLAlchemyError, DisconnectionError, TimeoutError
from typing import List, Optional, Dict, Any from typing import List, Optional, Dict, Any
from ..models.original_data import OriginalData, get_original_data_model, get_table_name from ..models.original_data import OriginalData, get_original_data_model, get_table_name
from .base import BaseService from .base import BaseService
from ..models.settlement_data import SettlementData from ..models.settlement_data import SettlementData
from ..models.account import Account from ..models.account import Account
from ..core.database import engine from ..core.database import engine
from ..core.db_monitor import log_pool_status, get_pool_status
from ..core.retry import retry, circuit_breaker, RetryConfig
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -363,49 +366,84 @@ class OriginalDataService(BaseService[OriginalData]):
table_name = self._get_table_name(account_id) table_name = self._get_table_name(account_id)
# **重要**: 始终使用内部事务,确保数据能正确提交 # 记录开始前的连接池状态
# 这是为了解决外部事务可能不提交的问题 pool_stats_before = get_pool_status()
# in_transaction = db.in_transaction() logger.info(f"开始批量导入,连接池状态: {pool_stats_before}")
# 始终创建内部事务 # 检查是否已在事务
for attempt in range(2): # 最多重试1次 in_transaction = db.in_transaction()
try: logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
db.begin()
success_count = 0
failed_count = 0
failed_items = []
# 执行数据导入操作 @retry(config=RetryConfig(max_attempts=2, base_delay=2.0, max_delay=10.0))
success_count = self._execute_import(db, table_name, data, account_id) def _do_import():
db.commit() """执行导入操作的内部函数(带重试)"""
logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}") for attempt in range(2): # 最多重试1次
break
except Exception as e:
try: try:
db.rollback() # 只有不在事务中时才调用begin()
except: if not in_transaction:
pass # 如果回滚失败,忽略错误 logger.info(f"开始内部事务 (尝试 {attempt + 1})")
logger.warning(f"Batch import attempt {attempt + 1} failed: {str(e)}") db.begin()
if attempt == 1: # 最后一次重试失败 else:
logger.error("Batch import original data failed after retries") logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
success_count = 0
failed_count = 0
failed_items = []
# 执行数据导入操作
success_count = self._execute_import(db, table_name, data, account_id)
# 只有我们开始的事务才提交
if not in_transaction:
db.commit()
logger.info(f"事务已提交")
else:
logger.info(f"使用外部事务,不提交")
logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}")
return { return {
'success': False, 'success': True,
'message': f'批量导入失败: {str(e)}', 'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
'total_count': total_count, 'total_count': total_count,
'success_count': 0, 'success_count': success_count,
'failed_count': total_count, 'failed_count': failed_count,
'failed_items': failed_items 'failed_items': failed_items
} }
return { except SQLAlchemyError as e:
'success': True, # 只有我们开始的事务才回滚
'message': '批量导入完成' if failed_count == 0 else f'部分导入失败', if not in_transaction:
'total_count': total_count, try:
'success_count': success_count, db.rollback()
'failed_count': failed_count, except:
'failed_items': failed_items pass
}
pool_stats_after = get_pool_status()
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
# 记录错误详情
logger.error(
f"错误详情: 类型={type(e).__name__}, "
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
f"SQL: {str(e)[:200]}"
)
if attempt == 1: # 最后一次重试失败
logger.error("Batch import original data failed after retries")
raise e # 抛出异常触发重试装饰器
try:
return _do_import()
except Exception as e:
return {
'success': False,
'message': f'批量导入失败: {str(e)}',
'total_count': total_count,
'success_count': 0,
'failed_count': total_count,
'failed_items': []
}
def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int: def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int:
"""执行数据导入操作(抽取的公共逻辑)""" """执行数据导入操作(抽取的公共逻辑)"""
@@ -610,16 +648,23 @@ class OriginalDataService(BaseService[OriginalData]):
total_count = sum(len(group['data']) for group in group_validation_results if group['valid']) total_count = sum(len(group['data']) for group in group_validation_results if group['valid'])
logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}") logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}")
# **重要**: 始终使用内部事务,确保数据能正确提交 # 记录开始前的连接池状态
# 这是为了解决外部事务可能不提交的问题 pool_stats_before = get_pool_status()
# in_transaction = db.in_transaction() logger.info(f"开始新版批量导入,连接池状态: {pool_stats_before}")
# logger.info(f"Original transaction status: {'in_transaction' if in_transaction else 'not in_transaction'}")
# 检查是否已在事务中
in_transaction = db.in_transaction()
logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")
# 始终创建内部事务
for attempt in range(2): for attempt in range(2):
try: try:
logger.info(f"Starting internal transaction (attempt {attempt + 1})") # 只有不在事务中时才调用begin()
db.begin() if not in_transaction:
logger.info(f"开始内部事务 (尝试 {attempt + 1})")
db.begin()
else:
logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")
success_count = 0 success_count = 0
failed_count = 0 failed_count = 0
failed_items = [] failed_items = []
@@ -670,17 +715,36 @@ class OriginalDataService(BaseService[OriginalData]):
logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}") logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}")
logger.info(f"Before commit: Success={success_count}, Failed={failed_count}") logger.info(f"Before commit: Success={success_count}, Failed={failed_count}")
db.commit()
logger.info(f"Transaction committed successfully! Success: {success_count}, Failed: {failed_count}") # 只有我们开始的事务才提交
if not in_transaction:
db.commit()
logger.info(f"事务已提交")
else:
logger.info(f"使用外部事务,不提交")
logger.info(f"Transaction completed successfully! Success: {success_count}, Failed: {failed_count}")
break break
except Exception as e: except SQLAlchemyError as e:
logger.error(f"Transaction rollback due to: {str(e)}") # 只有我们开始的事务才回滚
try: if not in_transaction:
db.rollback() try:
except: db.rollback()
pass # 如果回滚失败,忽略错误 except:
logger.warning(f"Batch import attempt {attempt + 1} failed: {str(e)}") pass
pool_stats_after = get_pool_status()
error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")
# 记录错误详情
logger.error(
f"错误详情: 类型={type(e).__name__}, "
f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
f"SQL: {str(e)[:200]}"
)
if attempt == 1: if attempt == 1:
logger.error("Batch import original data failed after retries") logger.error("Batch import original data failed after retries")
return { return {