railway_cloud/app/services/original_data.py

from sqlalchemy.orm import Session
from sqlalchemy import text, inspect
from sqlalchemy.exc import SQLAlchemyError, DisconnectionError, TimeoutError
from typing import List, Optional, Dict, Any
from ..models.original_data import OriginalData, get_original_data_model, get_table_name
from .base import BaseService
from ..models.settlement_data import SettlementData
from ..models.account import Account
from ..core.database import engine
from ..core.db_monitor import log_pool_status, get_pool_status
from ..core.retry import retry, circuit_breaker, RetryConfig
import logging

logger = logging.getLogger(__name__)

class OriginalDataService(BaseService[OriginalData]):
    def __init__(self):
        super().__init__(OriginalData)

    def _get_table_name(self, account_id: int) -> str:
        """获取原始数据表名"""
        return get_table_name(account_id)

    def _ensure_table_exists(self, db: Session, account_id: int) -> bool:
        """
        确保指定账号的原始数据表存在,不存在则创建

        Args:
            db: 数据库会话
            account_id: 账号ID

        Returns:
            bool: 表是否存在或创建成功
        """
        table_name = self._get_table_name(account_id)
        inspector = inspect(engine)

        # 检查表是否存在
        if table_name in inspector.get_table_names():
            logger.info(f"Table {table_name} already exists")
            # 表已存在时，也执行预热操作
            self._warmup_table(db, table_name)
            return True

        # 表不存在,创建表 - 添加重试机制
        max_retries = 3
        for attempt in range(max_retries):
            try:
                create_table_sql = f"""
                    CREATE TABLE `{table_name}` (
                        `id` INT AUTO_INCREMENT PRIMARY KEY,
                        `account_id` INT NOT NULL COMMENT '账号ID',
                        `bfpcode` VARCHAR(1000) NOT NULL COMMENT '前（后）视点名称',
                        `mtime` DATETIME NOT NULL COMMENT '测点观测时间',
                        `bffb` VARCHAR(1000) NOT NULL COMMENT '前（后）视标记符',
                        `bfpl` VARCHAR(1000) NOT NULL COMMENT '前（后）视距离(m)',
                        `bfpvalue` VARCHAR(1000) NOT NULL COMMENT '前（后）视尺读数(m)',
                        `NYID` VARCHAR(100) NOT NULL COMMENT '期数id',
                        `sort` INT COMMENT '序号',
                        INDEX `idx_nyid` (`NYID`),
                        INDEX `idx_account_id` (`account_id`)
                    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='原始数据表_账号{account_id}'
                """
                # 使用引擎直接执行，不依赖db会话的事务
                with engine.begin() as conn:  # 创建独立连接
                    conn.execute(text(create_table_sql))

                logger.info(f"Table {table_name} created successfully on attempt {attempt + 1}")

                # 表创建后延迟更长时间，避免立即查询触发"表定义变更"错误
                import time
                time.sleep(0.2)

                # 预热表：执行一个简单的查询来稳定表定义
                self._warmup_table(db, table_name)

                return True

            except Exception as e:
                logger.warning(f"Attempt {attempt + 1} to create table {table_name} failed: {str(e)}")

                # 检查是否是表已存在错误（并发情况下可能发生）
                if "already exists" in str(e).lower() or "exist" in str(e).lower():
                    logger.info(f"Table {table_name} was created by another process")
                    # 延迟并预热表
                    import time
                    time.sleep(0.2)
                    self._warmup_table(db, table_name)
                    return True

                if attempt == max_retries - 1:
                    logger.error(f"Failed to create table {table_name} after {max_retries} attempts: {str(e)}")
                    raise Exception(f"创建原始数据表失败: {str(e)}")

                # 短暂延迟后重试
                import time
                time.sleep(0.1 * (attempt + 1))

        return False

    def _warmup_table(self, db: Session, table_name: str) -> None:
        """
        预热表：执行一个简单查询来稳定表定义，避免"表定义变更"错误
        """
        try:
            # 执行一个简单的COUNT查询来预热表
            warmup_query = text(f"SELECT COUNT(*) FROM `{table_name}` LIMIT 0")
            db.execute(warmup_query)
            logger.debug(f"Table {table_name} warmed up successfully")
        except Exception as e:
            logger.warning(f"Failed to warm up table {table_name}: {str(e)}")
            # 预热失败不影响主流程，只是记录警告

    def create_table_for_account(self, db: Session, account_id: int) -> Dict[str, Any]:
        """
        为指定账号创建原始数据表

        Args:
            db: 数据库会话
            account_id: 账号ID

        Returns:
            操作结果
        """
        try:
            # 验证账号是否存在
            account = db.query(Account).filter(Account.id == account_id).first()
            if not account:
                return {
                    'success': False,
                    'message': f'账号ID {account_id} 不存在'
                }

            # 创建表 - 现在使用增强的错误处理
            table_created = self._ensure_table_exists(db, account_id)

            if table_created:
                return {
                    'success': True,
                    'message': f'原始数据表 {self._get_table_name(account_id)} 创建成功'
                }
            else:
                return {
                    'success': False,
                    'message': f'原始数据表 {self._get_table_name(account_id)} 创建失败'
                }

        except Exception as e:
            logger.error(f"Failed to create table for account {account_id}: {str(e)}")
            return {
                'success': False,
                'message': f'创建原始数据表失败: {str(e)}'
            }

    def get_by_nyid(self, db: Session, account_id: int, nyid: str) -> List[OriginalData]:
        """
        根据期数ID获取原始数据

        Args:
            db: 数据库会话
            account_id: 账号ID
            nyid: 期数ID

        Returns:
            原始数据列表
        """
        try:
            table_name = self._get_table_name(account_id)
            # 使用原生SQL查询
            query = text(f"SELECT * FROM `{table_name}` WHERE NYID = :nyid")
            result = db.execute(query, {"nyid": nyid})
            return result.fetchall()
        except Exception as e:
            logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
            return []

    def get_by_bfpcode(self, db: Session, account_id: int, bfpcode: str) -> List[OriginalData]:
        """
        根据前（后）视点名称获取原始数据

        Args:
            db: 数据库会话
            account_id: 账号ID
            bfpcode: 前（后）视点名称

        Returns:
            原始数据列表
        """
        try:
            table_name = self._get_table_name(account_id)
            query = text(f"SELECT * FROM `{table_name}` WHERE bfpcode = :bfpcode")
            result = db.execute(query, {"bfpcode": bfpcode})
            return result.fetchall()
        except Exception as e:
            logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}")
            return []

    def get_all_original_data_tables(self, db: Session) -> List[str]:
        """
        获取所有原始数据分表的表名

        Args:
            db: 数据库会话

        Returns:
            原始数据表名列表
        """
        try:
            inspector = inspect(engine)
            all_tables = inspector.get_table_names()
            # 筛选出所有原始数据分表
            original_tables = [table for table in all_tables if table.startswith('original_data_')]
            logger.info(f"Found {len(original_tables)} original data tables: {original_tables}")
            return original_tables
        except Exception as e:
            logger.error(f"Failed to get original data tables: {str(e)}")
            return []

    def search_original_data(self, db: Session,
                             account_id: Optional[int] = None,
                             id: Optional[int] = None,
                           bfpcode: Optional[str] = None,
                           bffb: Optional[str] = None,
                           nyid: Optional[str] = None,
                           bfpl: Optional[str] = None) -> List[Any]:
        """
        根据多个条件搜索原始数据
        如果未提供account_id，则遍历所有分表查询

        Args:
            db: 数据库会话
            account_id: 账号ID，可选。不填则查询所有分表
            其他查询条件...

        Returns:
            原始数据列表
        """
        print("搜索原始数据，参数:", {
            "account_id": account_id,
            "id": id,
            "bfpcode": bfpcode,
            "bffb": bffb,
            "nyid": nyid,
            "bfpl": bfpl})
        try:
            # 构建查询条件
            conditions = []
            params = {}

            if id is not None:
                conditions.append("id = :id")
                params["id"] = id
            if bfpcode is not None:
                conditions.append("bfpcode = :bfpcode")
                params["bfpcode"] = bfpcode
            if bffb is not None:
                conditions.append("bffb = :bffb")
                params["bffb"] = bffb
            if nyid is not None:
                conditions.append("NYID = :nyid")
                params["nyid"] = nyid
            if bfpl is not None:
                conditions.append("bfpl = :bfpl")
                params["bfpl"] = bfpl

            where_clause = " AND ".join(conditions) if conditions else "1=1"

            # 如果指定了account_id，只查询该账号的分表
            if account_id is not None:
                table_name = self._get_table_name(account_id)
                query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
                result = db.execute(query, params)
                return result.fetchall()

            # 未指定account_id，遍历所有分表
            all_results = []
            tables = self.get_all_original_data_tables(db)

            for table_name in tables:
                try:
                    query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}")
                    result = db.execute(query, params)
                    rows = result.fetchall()
                    all_results.extend(rows)
                    logger.info(f"Found {len(rows)} records in table {table_name}")
                except Exception as e:
                    logger.warning(f"Failed to query table {table_name}: {str(e)}")
                    continue

            logger.info(f"Total found {len(all_results)} records from {len(tables)} tables")
            return all_results

        except Exception as e:
            logger.error(f"Failed to search original data: {str(e)}")
            return []

    def _check_settlement_exists(self, db: Session, nyid: str) -> bool:
        """检查期数id沉降数据是否存在"""
        settlement = db.query(SettlementData).filter(SettlementData.NYID == nyid).first()
        return settlement is not None

    def batch_import_original_data(self, db: Session, data: List) -> Dict[str, Any]:
        """
        批量导入原始数据到指定账号的分表 - 性能优化版
        使用批量插入替代逐条插入，大幅提升导入速度

        Args:
            db: 数据库会话
            data: 数据列表,每条数据必须包含account_id字段

        Returns:
            操作结果
        """
        logger = logging.getLogger(__name__)

        total_count = len(data)
        success_count = 0
        failed_count = 0
        failed_items = []

        if total_count == 0:
            return {
                'success': False,
                'message': '导入数据不能为空',
                'total_count': 0,
                'success_count': 0,
                'failed_count': 0,
                'failed_items': []
            }

        # 获取第一条数据的account_id
        account_id = data[0].get('account_id')
        if not account_id:
            return {
                'success': False,
                'message': '数据中缺少account_id字段',
                'total_count': total_count,
                'success_count': 0,
                'failed_count': total_count,
                'failed_items': []
            }

        # 验证账号是否存在
        account = db.query(Account).filter(Account.id == account_id).first()
        if not account:
            return {
                'success': False,
                'message': f'账号ID {account_id} 不存在',
                'total_count': total_count,
                'success_count': 0,
                'failed_count': total_count,
                'failed_items': []
            }

        # 确保表存在 - 现在使用增强的错误处理和重试机制
        table_created = self._ensure_table_exists(db, account_id)
        if not table_created:
            return {
                'success': False,
                'message': '创建原始数据表失败',
                'total_count': total_count,
                'success_count': 0,
                'failed_count': total_count,
                'failed_items': []
            }

        table_name = self._get_table_name(account_id)

        # 记录开始前的连接池状态
        pool_stats_before = get_pool_status()
        logger.info(f"开始批量导入，连接池状态: {pool_stats_before}")

        # 检查是否已在事务中
        in_transaction = db.in_transaction()
        logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")

        @retry(config=RetryConfig(max_attempts=2, base_delay=2.0, max_delay=10.0))
        def _do_import():
            """执行导入操作的内部函数（带重试）"""
            for attempt in range(2):  # 最多重试1次
                try:
                    # 只有不在事务中时才调用begin()
                    if not in_transaction:
                        logger.info(f"开始内部事务 (尝试 {attempt + 1})")
                        db.begin()
                    else:
                        logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")

                    success_count = 0
                    failed_count = 0
                    failed_items = []

                    # 执行数据导入操作
                    success_count = self._execute_import(db, table_name, data, account_id)

                    # 只有我们开始的事务才提交
                    if not in_transaction:
                        db.commit()
                        logger.info(f"事务已提交")
                    else:
                        logger.info(f"使用外部事务，不提交")

                    logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}")
                    return {
                        'success': True,
                        'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
                        'total_count': total_count,
                        'success_count': success_count,
                        'failed_count': failed_count,
                        'failed_items': failed_items
                    }

                except SQLAlchemyError as e:
                    # 只有我们开始的事务才回滚
                    if not in_transaction:
                        try:
                            db.rollback()
                        except:
                            pass

                    pool_stats_after = get_pool_status()
                    error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
                    logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")

                    # 记录错误详情
                    logger.error(
                        f"错误详情: 类型={type(e).__name__}, "
                        f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
                        f"SQL: {str(e)[:200]}"
                    )

                    if attempt == 1:  # 最后一次重试失败
                        logger.error("Batch import original data failed after retries")
                        raise e  # 抛出异常触发重试装饰器

        try:
            return _do_import()
        except Exception as e:
            return {
                'success': False,
                'message': f'批量导入失败: {str(e)}',
                'total_count': total_count,
                'success_count': 0,
                'failed_count': total_count,
                'failed_items': []
            }

    def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int:
        """执行数据导入操作（抽取的公共逻辑）"""
        logger.info(f"Starting batch import into {table_name}, total records: {len(data)}")

        # ===== 性能优化：批量查询沉降数据 =====
        # 统一转换为字符串处理（数据库NYID字段是VARCHAR类型）
        nyid_list = list(set(str(item.get('NYID')) for item in data if item.get('NYID')))
        from ..models.settlement_data import SettlementData
        settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(nyid_list)).all()
        settlement_map = {s.NYID: s for s in settlements}
        missing_nyids = set(nyid_list) - set(settlement_map.keys())

        if missing_nyids:
            raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')

        # ===== 性能优化：批量查询现有原始数据（IN查询）=====
        # 使用组合键 (NYID, sort) 查询现有数据，过滤重复数据
        existing_data = db.query(text("*")).from_statement(
            text(f"SELECT * FROM `{table_name}` WHERE account_id = :account_id")
        ).params(account_id=account_id).all()

        # 使用组合键创建查找表：key = f"{NYID}_{sort}"
        existing_map = {
            f"{item[7]}_{item[8]}": item  # NYID是第8个字段（索引7），sort是第9个字段（索引8）
            for item in existing_data
        }
        logger.info(f"Found {len(existing_data)} existing records in {table_name}")

        # ===== 批量处理插入和跳过 =====
        to_insert = []
        skipped_count = 0

        for item_data in data:
            nyid = str(item_data.get('NYID'))  # 统一转换为字符串
            sort = item_data.get('sort')

            # 构建组合键
            key = f"{nyid}_{sort}"

            if key in existing_map:
                # 数据已存在，跳过
                # logger.info(f"Skip existing data: NYID {nyid}, sort {sort}")
                skipped_count += 1
            else:
                # 记录需要插入的数据
                to_insert.append(item_data)
            # logger.info(f"Skip existing data: {skipped_count} duplicates found so far")

        logger.info(f"Filtered {skipped_count} duplicate records, {len(to_insert)} new records to insert")

        # ===== 执行批量插入 =====
        if not to_insert:
            logger.info("No new records to insert, all data already exists")
            return 0

        # 将数据分组，每组1000条（MySQL默认支持）
        batch_size = 1000
        total_inserted = 0
        for i in range(0, len(to_insert), batch_size):
            batch_data = to_insert[i:i + batch_size]

            # 构建批量参数
            values_list = []
            params = {}
            for idx, item_data in enumerate(batch_data):
                values_list.append(
                    f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
                    f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
                )
                params.update({
                    f"account_id_{idx}": account_id,
                    f"bfpcode_{idx}": item_data.get('bfpcode'),
                    f"mtime_{idx}": item_data.get('mtime'),
                    f"bffb_{idx}": item_data.get('bffb'),
                    f"bfpl_{idx}": item_data.get('bfpl'),
                    f"bfpvalue_{idx}": item_data.get('bfpvalue'),
                    f"NYID_{idx}": item_data.get('NYID'),
                    f"sort_{idx}": item_data.get('sort')
                })

            # 批量插入SQL - 使用字符串拼接（修复TextClause拼接问题）
            insert_sql = f"""
                INSERT INTO `{table_name}`
                (account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
                VALUES {", ".join(values_list)}
            """
            final_sql = text(insert_sql)
            db.execute(final_sql, params)
            total_inserted += len(batch_data)
            logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")

        logger.info(f"Batch import completed: {total_inserted} records inserted, {skipped_count} duplicates skipped")
        return total_inserted

    def batch_import_original_data_new(self, db: Session, data: List[List[Dict[str, Any]]]) -> Dict[str, Any]:
        """
        新版批量导入原始数据 - 支持分组格式 data:[[{},{},{}],[{},{}]]
        里层一个[{},{}]称为一组数据，数据{}内容与旧接口一致
        一组数据全部记录的NYID与account_id将会一样，不同组可能不同

        导入逻辑：
        1. 按account_id分表存储，没表就建表
        2. 插入前根据NYID判断表中是否有重复数据
        3. 有重复就删除表中全部同NYID数据，插入新的，不重复就直接插入

        Args:
            db: 数据库会话
            data: 分组数据列表，格式为 [[{},{},{}], [{},{}]]

        Returns:
            操作结果
        """
        logger = logging.getLogger(__name__)

        total_count = 0
        success_count = 0
        failed_count = 0
        failed_items = []

        if not data or len(data) == 0:
            return {
                'success': False,
                'message': '导入数据不能为空',
                'total_count': 0,
                'success_count': 0,
                'failed_count': 0,
                'failed_items': []
            }

        # 验证所有组的account_id和NYID是否一致，并按account_id分组
        group_validation_results = []
        account_id_to_groups = {}  # {account_id: [group_indices]}

        for group_idx, group in enumerate(data):
            if not group or len(group) == 0:
                failed_items.append({
                    'group_index': group_idx,
                    'message': f'第{group_idx + 1}组数据为空'
                })
                continue

            # 获取组内第一个数据的account_id和NYID
            first_item = group[0]
            group_account_id = str(first_item.get('account_id'))
            group_nyid = str(first_item.get('NYID'))

            if not group_account_id or not group_nyid:
                failed_items.append({
                    'group_index': group_idx,
                    'message': f'第{group_idx + 1}组数据缺少account_id或NYID字段'
                })
                continue

            # 验证组内所有数据的account_id和NYID是否一致
            group_valid = True
            for item_idx, item in enumerate(group):
                item_account_id = str(item.get('account_id'))
                item_nyid = str(item.get('NYID'))

                if item_account_id != group_account_id:
                    failed_items.append({
                        'group_index': group_idx,
                        'item_index': item_idx,
                        'message': f'第{group_idx + 1}组第{item_idx + 1}条数据account_id不一致，期望:{group_account_id}，实际:{item_account_id}'
                    })
                    group_valid = False
                if item_nyid != group_nyid:
                    failed_items.append({
                        'group_index': group_idx,
                        'item_index': item_idx,
                        'message': f'第{group_idx + 1}组第{item_idx + 1}条数据NYID不一致，期望:{group_nyid}，实际:{item_nyid}'
                    })
                    group_valid = False

            # 记录验证结果
            group_validation_results.append({
                'group_index': group_idx,
                'account_id': group_account_id,
                'nyid': group_nyid,
                'valid': group_valid,
                'data': group
            })

            # 按account_id分组
            if group_account_id not in account_id_to_groups:
                account_id_to_groups[group_account_id] = []
            account_id_to_groups[group_account_id].append(group_idx)

        # 如果有验证错误，返回失败
        if failed_items:
            return {
                'success': False,
                'message': f'数据验证失败，发现{len(failed_items)}个错误',
                'total_count': sum(len(group) for group in data if group),
                'success_count': 0,
                'failed_count': len(failed_items),
                'failed_items': failed_items
            }

        # 记录总体统计
        total_count = sum(len(group['data']) for group in group_validation_results if group['valid'])
        logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}")

        # 记录开始前的连接池状态
        pool_stats_before = get_pool_status()
        logger.info(f"开始新版批量导入，连接池状态: {pool_stats_before}")

        # 检查是否已在事务中
        in_transaction = db.in_transaction()
        logger.info(f"当前事务状态: {'已在事务中' if in_transaction else '无事务'}")

        for attempt in range(2):
            try:
                # 只有不在事务中时才调用begin()
                if not in_transaction:
                    logger.info(f"开始内部事务 (尝试 {attempt + 1})")
                    db.begin()
                else:
                    logger.info(f"使用外部事务执行导入 (尝试 {attempt + 1})")

                success_count = 0
                failed_count = 0
                failed_items = []

                # 处理每个account_id
                for account_id_str, group_indices in account_id_to_groups.items():
                    account_id = int(account_id_str)
                    logger.info(f"Processing account_id: {account_id}, groups: {group_indices}")

                    # 验证账号是否存在
                    account = db.query(Account).filter(Account.id == account_id).first()
                    if not account:
                        error_msg = f'账号ID {account_id} 不存在'
                        logger.error(error_msg)
                        for group_idx in group_indices:
                            failed_count += len(group_validation_results[group_idx]['data'])
                            failed_items.append({
                                'group_index': group_idx,
                                'message': error_msg
                            })
                        continue

                    # 确保表存在
                    table_created = self._ensure_table_exists(db, account_id)
                    if not table_created:
                        error_msg = f'创建原始数据表失败 (account_id: {account_id})'
                        logger.error(error_msg)
                        for group_idx in group_indices:
                            failed_count += len(group_validation_results[group_idx]['data'])
                            failed_items.append({
                                'group_index': group_idx,
                                'message': error_msg
                            })
                        continue

                    # 收集该account_id的所有组数据
                    account_groups_data = []
                    for group_idx in group_indices:
                        account_groups_data.append(group_validation_results[group_idx]['data'])

                    # 执行分组导入操作
                    table_name = self._get_table_name(account_id)
                    logger.info(f"Processing {len(account_groups_data)} groups for table: {table_name}")
                    group_results = self._execute_import_new(db, table_name, account_groups_data, account_id)
                    success_count += group_results['success_count']
                    failed_count += group_results['failed_count']
                    failed_items.extend(group_results['failed_items'])
                    logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}")

                logger.info(f"Before commit: Success={success_count}, Failed={failed_count}")

                # 只有我们开始的事务才提交
                if not in_transaction:
                    db.commit()
                    logger.info(f"事务已提交")
                else:
                    logger.info(f"使用外部事务，不提交")

                logger.info(f"Transaction completed successfully! Success: {success_count}, Failed: {failed_count}")
                break

            except SQLAlchemyError as e:
                # 只有我们开始的事务才回滚
                if not in_transaction:
                    try:
                        db.rollback()
                    except:
                        pass

                pool_stats_after = get_pool_status()
                error_msg = f"数据库错误 (尝试 {attempt + 1}): {str(e)}"
                logger.error(f"{error_msg}, 连接池状态: {pool_stats_after}")

                # 记录错误详情
                logger.error(
                    f"错误详情: 类型={type(e).__name__}, "
                    f"连接池使用率={pool_stats_after.get('usage_percent', 0)}%, "
                    f"SQL: {str(e)[:200]}"
                )

                if attempt == 1:
                    logger.error("Batch import original data failed after retries")
                    return {
                        'success': False,
                        'message': f'批量导入失败: {str(e)}',
                        'total_count': total_count,
                        'success_count': 0,
                        'failed_count': total_count,
                        'failed_items': []
                    }

        return {
            'success': True if failed_count == 0 else False,
            'message': '批量导入完成' if failed_count == 0 else f'部分导入失败',
            'total_count': total_count,
            'success_count': success_count,
            'failed_count': failed_count,
            'failed_items': failed_items
        }

    def _execute_import_new(self, db: Session, table_name: str, data: List[List[Dict[str, Any]]], account_id: int) -> Dict[str, Any]:
        """
        执行新版分组导入操作

        Args:
            db: 数据库会话
            table_name: 表名
            data: 分组数据列表
            account_id: 账号ID

        Returns:
            操作结果
        """
        logger.info(f"Starting batch import (new) into {table_name}, total groups: {len(data)}")

        # 导入前预热表，确保表定义稳定
        self._warmup_table(db, table_name)

        success_count = 0
        failed_count = 0
        failed_items = []

        # 收集所有NYID和对应数据（用于批量查询沉降数据）
        all_nyids = set()

        for group in data:
            if group and len(group) > 0:
                group_nyid = str(group[0].get('NYID'))
                all_nyids.add(group_nyid)

        # 批量查询沉降数据是否存在
        from ..models.settlement_data import SettlementData
        if all_nyids:
            settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(list(all_nyids))).all()
            settlement_map = {s.NYID: s for s in settlements}
            missing_nyids = all_nyids - set(settlement_map.keys())

            if missing_nyids:
                raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}')

        logger.info(f"Found {len(all_nyids)} unique NYIDs to process: {list(all_nyids)}")

        # 按NYID分组，查询并处理重复数据
        processed_nyids = set()
        group_index = 0

        for group in data:
            if not group or len(group) == 0:
                continue

            group_nyid = str(group[0].get('NYID'))
            logger.info(f"Processing group {group_index}: NYID={group_nyid}, {len(group)} records")

            # 如果已经处理过这个NYID，跳过并记录到failed_items
            if group_nyid in processed_nyids:
                logger.warning(f"NYID {group_nyid} appears in multiple groups (group {group_index}), skipping duplicate")
                failed_count += len(group)
                failed_items.append({
                    'group_index': group_index,
                    'group_nyid': group_nyid,
                    'message': f'NYID {group_nyid} 已在之前组中处理，跳过重复组'
                })
                group_index += 1
                continue

            processed_nyids.add(group_nyid)

            # 添加重试机制处理表定义变更错误
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    # 检查是否已存在该NYID的数据
                    existing_query = text(f"SELECT id FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
                    existing_result = db.execute(existing_query, {"nyid": group_nyid, "account_id": account_id}).fetchall()
                    logger.debug(f"Found {len(existing_result)} existing records for NYID {group_nyid}")

                    # 如果存在，删除旧数据
                    if existing_result:
                        delete_query = text(f"DELETE FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id")
                        db.execute(delete_query, {"nyid": group_nyid, "account_id": account_id})
                        logger.info(f"Deleted {len(existing_result)} existing records for NYID {group_nyid}")

                    # 准备插入当前组的数据
                    to_insert = []
                    for item in group:
                        # 确保NYID和account_id是字符串类型
                        item_copy = item.copy()
                        item_copy['NYID'] = str(item_copy.get('NYID'))
                        item_copy['account_id'] = str(account_id)
                        to_insert.append(item_copy)

                    # 批量插入当前组的数据
                    if to_insert:
                        logger.info(f"Inserting {len(to_insert)} records for NYID {group_nyid} into {table_name}")
                        inserted_count = self._batch_insert_group(db, table_name, to_insert, account_id)
                        logger.info(f"Successfully inserted {inserted_count} records for NYID {group_nyid}")
                        success_count += inserted_count

                    # 成功，退出重试循环
                    break

                except Exception as e:
                    error_msg = str(e)
                    if "Table definition has changed" in error_msg:
                        if attempt < max_retries - 1:
                            # 重试前重新预热表
                            import time
                            time.sleep(0.1 * (attempt + 1))
                            self._warmup_table(db, table_name)
                            logger.warning(f"Table definition changed for {group_nyid}, retrying (attempt {attempt + 2})...")
                            continue
                        else:
                            failed_count += len(group)
                            failed_items.append({
                                'group_index': group_index,
                                'group_nyid': group_nyid,
                                'message': f'处理NYID {group_nyid} 失败: 表定义变更，重试{attempt + 1}次后仍失败'
                            })
                            logger.error(f"Failed to process group with NYID {group_nyid} after {max_retries} retries: {str(e)}")
                            break
                    else:
                        failed_count += len(group)
                        failed_items.append({
                            'group_index': group_index,
                            'group_nyid': group_nyid,
                            'message': f'处理NYID {group_nyid} 失败: {str(e)}'
                        })
                        logger.error(f"Failed to process group with NYID {group_nyid}: {str(e)}")
                        break

            group_index += 1

        logger.info(f"Batch import (new) completed: {success_count} records inserted, {failed_count} failed")
        return {
            'success_count': success_count,
            'failed_count': failed_count,
            'failed_items': failed_items
        }

    def _batch_insert_group(self, db: Session, table_name: str, data: List[Dict[str, Any]], account_id: int) -> int:
        """
        批量插入一组数据

        Args:
            db: 数据库会话
            table_name: 表名
            data: 要插入的数据列表
            account_id: 账号ID

        Returns:
            插入的记录数
        """
        if not data:
            return 0

        # 将数据分组，每组1000条（MySQL默认支持）
        batch_size = 1000
        total_inserted = 0

        for i in range(0, len(data), batch_size):
            batch_data = data[i:i + batch_size]

            # 构建批量参数
            values_list = []
            params = {}
            logger.info(f"Preparing to insert batch of {len(batch_data)} records into {table_name}")
            for idx, item_data in enumerate(batch_data):
                values_list.append(
                    f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, "
                    f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})"
                )
                params.update({
                    f"account_id_{idx}": str(account_id),
                    f"bfpcode_{idx}": item_data.get('bfpcode'),
                    f"mtime_{idx}": item_data.get('mtime'),
                    f"bffb_{idx}": item_data.get('bffb'),
                    f"bfpl_{idx}": item_data.get('bfpl'),
                    f"bfpvalue_{idx}": item_data.get('bfpvalue'),
                    f"NYID_{idx}": str(item_data.get('NYID')),
                    f"sort_{idx}": item_data.get('sort')
                })

            # 批量插入SQL
            insert_sql = f"""
                INSERT INTO `{table_name}`
                (account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort)
                VALUES {", ".join(values_list)}
            """
            final_sql = text(insert_sql)
            logger.debug(f"Final SQL preview: {insert_sql[:200]}...")  # 只打印前200个字符
            result = db.execute(final_sql, params)
            logger.info(f"Batch insert successful: {len(batch_data)} records affected")
            total_inserted += len(batch_data)
            logger.debug(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records")

        return total_inserted