from sqlalchemy.orm import Session from sqlalchemy import text, inspect from typing import List, Optional, Dict, Any from ..models.original_data import OriginalData, get_original_data_model, get_table_name from .base import BaseService from ..models.settlement_data import SettlementData from ..models.account import Account from ..core.database import engine import logging logger = logging.getLogger(__name__) class OriginalDataService(BaseService[OriginalData]): def __init__(self): super().__init__(OriginalData) def _get_table_name(self, account_id: int) -> str: """获取原始数据表名""" return get_table_name(account_id) def _ensure_table_exists(self, db: Session, account_id: int) -> bool: """ 确保指定账号的原始数据表存在,不存在则创建 Args: db: 数据库会话 account_id: 账号ID Returns: bool: 表是否存在或创建成功 """ table_name = self._get_table_name(account_id) inspector = inspect(engine) # 检查表是否存在 if table_name in inspector.get_table_names(): logger.info(f"Table {table_name} already exists") # 表已存在时,也执行预热操作 self._warmup_table(db, table_name) return True # 表不存在,创建表 - 添加重试机制 max_retries = 3 for attempt in range(max_retries): try: create_table_sql = f""" CREATE TABLE `{table_name}` ( `id` INT AUTO_INCREMENT PRIMARY KEY, `account_id` INT NOT NULL COMMENT '账号ID', `bfpcode` VARCHAR(1000) NOT NULL COMMENT '前(后)视点名称', `mtime` DATETIME NOT NULL COMMENT '测点观测时间', `bffb` VARCHAR(1000) NOT NULL COMMENT '前(后)视标记符', `bfpl` VARCHAR(1000) NOT NULL COMMENT '前(后)视距离(m)', `bfpvalue` VARCHAR(1000) NOT NULL COMMENT '前(后)视尺读数(m)', `NYID` VARCHAR(100) NOT NULL COMMENT '期数id', `sort` INT COMMENT '序号', INDEX `idx_nyid` (`NYID`), INDEX `idx_account_id` (`account_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='原始数据表_账号{account_id}' """ # 使用引擎直接执行,不依赖db会话的事务 with engine.begin() as conn: # 创建独立连接 conn.execute(text(create_table_sql)) logger.info(f"Table {table_name} created successfully on attempt {attempt + 1}") # 表创建后延迟更长时间,避免立即查询触发"表定义变更"错误 import time time.sleep(0.2) # 预热表:执行一个简单的查询来稳定表定义 self._warmup_table(db, table_name) return True except Exception as e: logger.warning(f"Attempt {attempt + 1} to create table {table_name} failed: {str(e)}") # 检查是否是表已存在错误(并发情况下可能发生) if "already exists" in str(e).lower() or "exist" in str(e).lower(): logger.info(f"Table {table_name} was created by another process") # 延迟并预热表 import time time.sleep(0.2) self._warmup_table(db, table_name) return True if attempt == max_retries - 1: logger.error(f"Failed to create table {table_name} after {max_retries} attempts: {str(e)}") raise Exception(f"创建原始数据表失败: {str(e)}") # 短暂延迟后重试 import time time.sleep(0.1 * (attempt + 1)) return False def _warmup_table(self, db: Session, table_name: str) -> None: """ 预热表:执行一个简单查询来稳定表定义,避免"表定义变更"错误 """ try: # 执行一个简单的COUNT查询来预热表 warmup_query = text(f"SELECT COUNT(*) FROM `{table_name}` LIMIT 0") db.execute(warmup_query) logger.debug(f"Table {table_name} warmed up successfully") except Exception as e: logger.warning(f"Failed to warm up table {table_name}: {str(e)}") # 预热失败不影响主流程,只是记录警告 def create_table_for_account(self, db: Session, account_id: int) -> Dict[str, Any]: """ 为指定账号创建原始数据表 Args: db: 数据库会话 account_id: 账号ID Returns: 操作结果 """ try: # 验证账号是否存在 account = db.query(Account).filter(Account.id == account_id).first() if not account: return { 'success': False, 'message': f'账号ID {account_id} 不存在' } # 创建表 - 现在使用增强的错误处理 table_created = self._ensure_table_exists(db, account_id) if table_created: return { 'success': True, 'message': f'原始数据表 {self._get_table_name(account_id)} 创建成功' } else: return { 'success': False, 'message': f'原始数据表 {self._get_table_name(account_id)} 创建失败' } except Exception as e: logger.error(f"Failed to create table for account {account_id}: {str(e)}") return { 'success': False, 'message': f'创建原始数据表失败: {str(e)}' } def get_by_nyid(self, db: Session, account_id: int, nyid: str) -> List[OriginalData]: """ 根据期数ID获取原始数据 Args: db: 数据库会话 account_id: 账号ID nyid: 期数ID Returns: 原始数据列表 """ try: table_name = self._get_table_name(account_id) # 使用原生SQL查询 query = text(f"SELECT * FROM `{table_name}` WHERE NYID = :nyid") result = db.execute(query, {"nyid": nyid}) return result.fetchall() except Exception as e: logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}") return [] def get_by_bfpcode(self, db: Session, account_id: int, bfpcode: str) -> List[OriginalData]: """ 根据前(后)视点名称获取原始数据 Args: db: 数据库会话 account_id: 账号ID bfpcode: 前(后)视点名称 Returns: 原始数据列表 """ try: table_name = self._get_table_name(account_id) query = text(f"SELECT * FROM `{table_name}` WHERE bfpcode = :bfpcode") result = db.execute(query, {"bfpcode": bfpcode}) return result.fetchall() except Exception as e: logger.error(f"Failed to query data from {self._get_table_name(account_id)}: {str(e)}") return [] def get_all_original_data_tables(self, db: Session) -> List[str]: """ 获取所有原始数据分表的表名 Args: db: 数据库会话 Returns: 原始数据表名列表 """ try: inspector = inspect(engine) all_tables = inspector.get_table_names() # 筛选出所有原始数据分表 original_tables = [table for table in all_tables if table.startswith('original_data_')] logger.info(f"Found {len(original_tables)} original data tables: {original_tables}") return original_tables except Exception as e: logger.error(f"Failed to get original data tables: {str(e)}") return [] def search_original_data(self, db: Session, account_id: Optional[int] = None, id: Optional[int] = None, bfpcode: Optional[str] = None, bffb: Optional[str] = None, nyid: Optional[str] = None, bfpl: Optional[str] = None) -> List[Any]: """ 根据多个条件搜索原始数据 如果未提供account_id,则遍历所有分表查询 Args: db: 数据库会话 account_id: 账号ID,可选。不填则查询所有分表 其他查询条件... Returns: 原始数据列表 """ print("搜索原始数据,参数:", { "account_id": account_id, "id": id, "bfpcode": bfpcode, "bffb": bffb, "nyid": nyid, "bfpl": bfpl}) try: # 构建查询条件 conditions = [] params = {} if id is not None: conditions.append("id = :id") params["id"] = id if bfpcode is not None: conditions.append("bfpcode = :bfpcode") params["bfpcode"] = bfpcode if bffb is not None: conditions.append("bffb = :bffb") params["bffb"] = bffb if nyid is not None: conditions.append("NYID = :nyid") params["nyid"] = nyid if bfpl is not None: conditions.append("bfpl = :bfpl") params["bfpl"] = bfpl where_clause = " AND ".join(conditions) if conditions else "1=1" # 如果指定了account_id,只查询该账号的分表 if account_id is not None: table_name = self._get_table_name(account_id) query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}") result = db.execute(query, params) return result.fetchall() # 未指定account_id,遍历所有分表 all_results = [] tables = self.get_all_original_data_tables(db) for table_name in tables: try: query = text(f"SELECT * FROM `{table_name}` WHERE {where_clause}") result = db.execute(query, params) rows = result.fetchall() all_results.extend(rows) logger.info(f"Found {len(rows)} records in table {table_name}") except Exception as e: logger.warning(f"Failed to query table {table_name}: {str(e)}") continue logger.info(f"Total found {len(all_results)} records from {len(tables)} tables") return all_results except Exception as e: logger.error(f"Failed to search original data: {str(e)}") return [] def _check_settlement_exists(self, db: Session, nyid: str) -> bool: """检查期数id沉降数据是否存在""" settlement = db.query(SettlementData).filter(SettlementData.NYID == nyid).first() return settlement is not None def batch_import_original_data(self, db: Session, data: List) -> Dict[str, Any]: """ 批量导入原始数据到指定账号的分表 - 性能优化版 使用批量插入替代逐条插入,大幅提升导入速度 Args: db: 数据库会话 data: 数据列表,每条数据必须包含account_id字段 Returns: 操作结果 """ logger = logging.getLogger(__name__) total_count = len(data) success_count = 0 failed_count = 0 failed_items = [] if total_count == 0: return { 'success': False, 'message': '导入数据不能为空', 'total_count': 0, 'success_count': 0, 'failed_count': 0, 'failed_items': [] } # 获取第一条数据的account_id account_id = data[0].get('account_id') if not account_id: return { 'success': False, 'message': '数据中缺少account_id字段', 'total_count': total_count, 'success_count': 0, 'failed_count': total_count, 'failed_items': [] } # 验证账号是否存在 account = db.query(Account).filter(Account.id == account_id).first() if not account: return { 'success': False, 'message': f'账号ID {account_id} 不存在', 'total_count': total_count, 'success_count': 0, 'failed_count': total_count, 'failed_items': [] } # 确保表存在 - 现在使用增强的错误处理和重试机制 table_created = self._ensure_table_exists(db, account_id) if not table_created: return { 'success': False, 'message': '创建原始数据表失败', 'total_count': total_count, 'success_count': 0, 'failed_count': total_count, 'failed_items': [] } table_name = self._get_table_name(account_id) # **重要**: 始终使用内部事务,确保数据能正确提交 # 这是为了解决外部事务可能不提交的问题 # in_transaction = db.in_transaction() # 始终创建内部事务 for attempt in range(2): # 最多重试1次 try: db.begin() success_count = 0 failed_count = 0 failed_items = [] # 执行数据导入操作 success_count = self._execute_import(db, table_name, data, account_id) db.commit() logger.info(f"Batch import original data completed. Success: {success_count}, Failed: {failed_count}") break except Exception as e: try: db.rollback() except: pass # 如果回滚失败,忽略错误 logger.warning(f"Batch import attempt {attempt + 1} failed: {str(e)}") if attempt == 1: # 最后一次重试失败 logger.error("Batch import original data failed after retries") return { 'success': False, 'message': f'批量导入失败: {str(e)}', 'total_count': total_count, 'success_count': 0, 'failed_count': total_count, 'failed_items': failed_items } return { 'success': True, 'message': '批量导入完成' if failed_count == 0 else f'部分导入失败', 'total_count': total_count, 'success_count': success_count, 'failed_count': failed_count, 'failed_items': failed_items } def _execute_import(self, db: Session, table_name: str, data: List, account_id: int) -> int: """执行数据导入操作(抽取的公共逻辑)""" logger.info(f"Starting batch import into {table_name}, total records: {len(data)}") # ===== 性能优化:批量查询沉降数据 ===== # 统一转换为字符串处理(数据库NYID字段是VARCHAR类型) nyid_list = list(set(str(item.get('NYID')) for item in data if item.get('NYID'))) from ..models.settlement_data import SettlementData settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(nyid_list)).all() settlement_map = {s.NYID: s for s in settlements} missing_nyids = set(nyid_list) - set(settlement_map.keys()) if missing_nyids: raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}') # ===== 性能优化:批量查询现有原始数据(IN查询)===== # 使用组合键 (NYID, sort) 查询现有数据,过滤重复数据 existing_data = db.query(text("*")).from_statement( text(f"SELECT * FROM `{table_name}` WHERE account_id = :account_id") ).params(account_id=account_id).all() # 使用组合键创建查找表:key = f"{NYID}_{sort}" existing_map = { f"{item[7]}_{item[8]}": item # NYID是第8个字段(索引7),sort是第9个字段(索引8) for item in existing_data } logger.info(f"Found {len(existing_data)} existing records in {table_name}") # ===== 批量处理插入和跳过 ===== to_insert = [] skipped_count = 0 for item_data in data: nyid = str(item_data.get('NYID')) # 统一转换为字符串 sort = item_data.get('sort') # 构建组合键 key = f"{nyid}_{sort}" if key in existing_map: # 数据已存在,跳过 # logger.info(f"Skip existing data: NYID {nyid}, sort {sort}") skipped_count += 1 else: # 记录需要插入的数据 to_insert.append(item_data) # logger.info(f"Skip existing data: {skipped_count} duplicates found so far") logger.info(f"Filtered {skipped_count} duplicate records, {len(to_insert)} new records to insert") # ===== 执行批量插入 ===== if not to_insert: logger.info("No new records to insert, all data already exists") return 0 # 将数据分组,每组1000条(MySQL默认支持) batch_size = 1000 total_inserted = 0 for i in range(0, len(to_insert), batch_size): batch_data = to_insert[i:i + batch_size] # 构建批量参数 values_list = [] params = {} for idx, item_data in enumerate(batch_data): values_list.append( f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, " f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})" ) params.update({ f"account_id_{idx}": account_id, f"bfpcode_{idx}": item_data.get('bfpcode'), f"mtime_{idx}": item_data.get('mtime'), f"bffb_{idx}": item_data.get('bffb'), f"bfpl_{idx}": item_data.get('bfpl'), f"bfpvalue_{idx}": item_data.get('bfpvalue'), f"NYID_{idx}": item_data.get('NYID'), f"sort_{idx}": item_data.get('sort') }) # 批量插入SQL - 使用字符串拼接(修复TextClause拼接问题) insert_sql = f""" INSERT INTO `{table_name}` (account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort) VALUES {", ".join(values_list)} """ final_sql = text(insert_sql) db.execute(final_sql, params) total_inserted += len(batch_data) logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records") logger.info(f"Batch import completed: {total_inserted} records inserted, {skipped_count} duplicates skipped") return total_inserted def batch_import_original_data_new(self, db: Session, data: List[List[Dict[str, Any]]]) -> Dict[str, Any]: """ 新版批量导入原始数据 - 支持分组格式 data:[[{},{},{}],[{},{}]] 里层一个[{},{}]称为一组数据,数据{}内容与旧接口一致 一组数据全部记录的NYID与account_id将会一样,不同组可能不同 导入逻辑: 1. 按account_id分表存储,没表就建表 2. 插入前根据NYID判断表中是否有重复数据 3. 有重复就删除表中全部同NYID数据,插入新的,不重复就直接插入 Args: db: 数据库会话 data: 分组数据列表,格式为 [[{},{},{}], [{},{}]] Returns: 操作结果 """ logger = logging.getLogger(__name__) total_count = 0 success_count = 0 failed_count = 0 failed_items = [] if not data or len(data) == 0: return { 'success': False, 'message': '导入数据不能为空', 'total_count': 0, 'success_count': 0, 'failed_count': 0, 'failed_items': [] } # 验证所有组的account_id和NYID是否一致,并按account_id分组 group_validation_results = [] account_id_to_groups = {} # {account_id: [group_indices]} for group_idx, group in enumerate(data): if not group or len(group) == 0: failed_items.append({ 'group_index': group_idx, 'message': f'第{group_idx + 1}组数据为空' }) continue # 获取组内第一个数据的account_id和NYID first_item = group[0] group_account_id = str(first_item.get('account_id')) group_nyid = str(first_item.get('NYID')) if not group_account_id or not group_nyid: failed_items.append({ 'group_index': group_idx, 'message': f'第{group_idx + 1}组数据缺少account_id或NYID字段' }) continue # 验证组内所有数据的account_id和NYID是否一致 group_valid = True for item_idx, item in enumerate(group): item_account_id = str(item.get('account_id')) item_nyid = str(item.get('NYID')) if item_account_id != group_account_id: failed_items.append({ 'group_index': group_idx, 'item_index': item_idx, 'message': f'第{group_idx + 1}组第{item_idx + 1}条数据account_id不一致,期望:{group_account_id},实际:{item_account_id}' }) group_valid = False if item_nyid != group_nyid: failed_items.append({ 'group_index': group_idx, 'item_index': item_idx, 'message': f'第{group_idx + 1}组第{item_idx + 1}条数据NYID不一致,期望:{group_nyid},实际:{item_nyid}' }) group_valid = False # 记录验证结果 group_validation_results.append({ 'group_index': group_idx, 'account_id': group_account_id, 'nyid': group_nyid, 'valid': group_valid, 'data': group }) # 按account_id分组 if group_account_id not in account_id_to_groups: account_id_to_groups[group_account_id] = [] account_id_to_groups[group_account_id].append(group_idx) # 如果有验证错误,返回失败 if failed_items: return { 'success': False, 'message': f'数据验证失败,发现{len(failed_items)}个错误', 'total_count': sum(len(group) for group in data if group), 'success_count': 0, 'failed_count': len(failed_items), 'failed_items': failed_items } # 记录总体统计 total_count = sum(len(group['data']) for group in group_validation_results if group['valid']) logger.info(f"Total valid groups: {len(group_validation_results)}, Total records: {total_count}") # **重要**: 始终使用内部事务,确保数据能正确提交 # 这是为了解决外部事务可能不提交的问题 # in_transaction = db.in_transaction() # logger.info(f"Original transaction status: {'in_transaction' if in_transaction else 'not in_transaction'}") # 始终创建内部事务 for attempt in range(2): try: logger.info(f"Starting internal transaction (attempt {attempt + 1})") db.begin() success_count = 0 failed_count = 0 failed_items = [] # 处理每个account_id for account_id_str, group_indices in account_id_to_groups.items(): account_id = int(account_id_str) logger.info(f"Processing account_id: {account_id}, groups: {group_indices}") # 验证账号是否存在 account = db.query(Account).filter(Account.id == account_id).first() if not account: error_msg = f'账号ID {account_id} 不存在' logger.error(error_msg) for group_idx in group_indices: failed_count += len(group_validation_results[group_idx]['data']) failed_items.append({ 'group_index': group_idx, 'message': error_msg }) continue # 确保表存在 table_created = self._ensure_table_exists(db, account_id) if not table_created: error_msg = f'创建原始数据表失败 (account_id: {account_id})' logger.error(error_msg) for group_idx in group_indices: failed_count += len(group_validation_results[group_idx]['data']) failed_items.append({ 'group_index': group_idx, 'message': error_msg }) continue # 收集该account_id的所有组数据 account_groups_data = [] for group_idx in group_indices: account_groups_data.append(group_validation_results[group_idx]['data']) # 执行分组导入操作 table_name = self._get_table_name(account_id) logger.info(f"Processing {len(account_groups_data)} groups for table: {table_name}") group_results = self._execute_import_new(db, table_name, account_groups_data, account_id) success_count += group_results['success_count'] failed_count += group_results['failed_count'] failed_items.extend(group_results['failed_items']) logger.info(f"Account {account_id} completed: Success={group_results['success_count']}, Failed={group_results['failed_count']}") logger.info(f"Before commit: Success={success_count}, Failed={failed_count}") db.commit() logger.info(f"Transaction committed successfully! Success: {success_count}, Failed: {failed_count}") break except Exception as e: logger.error(f"Transaction rollback due to: {str(e)}") try: db.rollback() except: pass # 如果回滚失败,忽略错误 logger.warning(f"Batch import attempt {attempt + 1} failed: {str(e)}") if attempt == 1: logger.error("Batch import original data failed after retries") return { 'success': False, 'message': f'批量导入失败: {str(e)}', 'total_count': total_count, 'success_count': 0, 'failed_count': total_count, 'failed_items': [] } return { 'success': True if failed_count == 0 else False, 'message': '批量导入完成' if failed_count == 0 else f'部分导入失败', 'total_count': total_count, 'success_count': success_count, 'failed_count': failed_count, 'failed_items': failed_items } def _execute_import_new(self, db: Session, table_name: str, data: List[List[Dict[str, Any]]], account_id: int) -> Dict[str, Any]: """ 执行新版分组导入操作 Args: db: 数据库会话 table_name: 表名 data: 分组数据列表 account_id: 账号ID Returns: 操作结果 """ logger.info(f"Starting batch import (new) into {table_name}, total groups: {len(data)}") # 导入前预热表,确保表定义稳定 self._warmup_table(db, table_name) success_count = 0 failed_count = 0 failed_items = [] # 收集所有NYID和对应数据(用于批量查询沉降数据) all_nyids = set() for group in data: if group and len(group) > 0: group_nyid = str(group[0].get('NYID')) all_nyids.add(group_nyid) # 批量查询沉降数据是否存在 from ..models.settlement_data import SettlementData if all_nyids: settlements = db.query(SettlementData).filter(SettlementData.NYID.in_(list(all_nyids))).all() settlement_map = {s.NYID: s for s in settlements} missing_nyids = all_nyids - set(settlement_map.keys()) if missing_nyids: raise Exception(f'以下期数在沉降表中不存在: {list(missing_nyids)}') logger.info(f"Found {len(all_nyids)} unique NYIDs to process: {list(all_nyids)}") # 按NYID分组,查询并处理重复数据 processed_nyids = set() group_index = 0 for group in data: if not group or len(group) == 0: continue group_nyid = str(group[0].get('NYID')) logger.info(f"Processing group {group_index}: NYID={group_nyid}, {len(group)} records") # 如果已经处理过这个NYID,跳过并记录到failed_items if group_nyid in processed_nyids: logger.warning(f"NYID {group_nyid} appears in multiple groups (group {group_index}), skipping duplicate") failed_count += len(group) failed_items.append({ 'group_index': group_index, 'group_nyid': group_nyid, 'message': f'NYID {group_nyid} 已在之前组中处理,跳过重复组' }) group_index += 1 continue processed_nyids.add(group_nyid) # 添加重试机制处理表定义变更错误 max_retries = 3 for attempt in range(max_retries): try: # 检查是否已存在该NYID的数据 existing_query = text(f"SELECT id FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id") existing_result = db.execute(existing_query, {"nyid": group_nyid, "account_id": account_id}).fetchall() logger.debug(f"Found {len(existing_result)} existing records for NYID {group_nyid}") # 如果存在,删除旧数据 if existing_result: delete_query = text(f"DELETE FROM `{table_name}` WHERE NYID = :nyid AND account_id = :account_id") db.execute(delete_query, {"nyid": group_nyid, "account_id": account_id}) logger.info(f"Deleted {len(existing_result)} existing records for NYID {group_nyid}") # 准备插入当前组的数据 to_insert = [] for item in group: # 确保NYID和account_id是字符串类型 item_copy = item.copy() item_copy['NYID'] = str(item_copy.get('NYID')) item_copy['account_id'] = str(account_id) to_insert.append(item_copy) # 批量插入当前组的数据 if to_insert: logger.info(f"Inserting {len(to_insert)} records for NYID {group_nyid} into {table_name}") inserted_count = self._batch_insert_group(db, table_name, to_insert, account_id) logger.info(f"Successfully inserted {inserted_count} records for NYID {group_nyid}") success_count += inserted_count # 成功,退出重试循环 break except Exception as e: error_msg = str(e) if "Table definition has changed" in error_msg: if attempt < max_retries - 1: # 重试前重新预热表 import time time.sleep(0.1 * (attempt + 1)) self._warmup_table(db, table_name) logger.warning(f"Table definition changed for {group_nyid}, retrying (attempt {attempt + 2})...") continue else: failed_count += len(group) failed_items.append({ 'group_index': group_index, 'group_nyid': group_nyid, 'message': f'处理NYID {group_nyid} 失败: 表定义变更,重试{attempt + 1}次后仍失败' }) logger.error(f"Failed to process group with NYID {group_nyid} after {max_retries} retries: {str(e)}") break else: failed_count += len(group) failed_items.append({ 'group_index': group_index, 'group_nyid': group_nyid, 'message': f'处理NYID {group_nyid} 失败: {str(e)}' }) logger.error(f"Failed to process group with NYID {group_nyid}: {str(e)}") break group_index += 1 logger.info(f"Batch import (new) completed: {success_count} records inserted, {failed_count} failed") return { 'success_count': success_count, 'failed_count': failed_count, 'failed_items': failed_items } def _batch_insert_group(self, db: Session, table_name: str, data: List[Dict[str, Any]], account_id: int) -> int: """ 批量插入一组数据 Args: db: 数据库会话 table_name: 表名 data: 要插入的数据列表 account_id: 账号ID Returns: 插入的记录数 """ if not data: return 0 # 将数据分组,每组1000条(MySQL默认支持) batch_size = 1000 total_inserted = 0 for i in range(0, len(data), batch_size): batch_data = data[i:i + batch_size] # 构建批量参数 values_list = [] params = {} logger.info(f"Preparing to insert batch of {len(batch_data)} records into {table_name}") for idx, item_data in enumerate(batch_data): values_list.append( f"(:account_id_{idx}, :bfpcode_{idx}, :mtime_{idx}, :bffb_{idx}, " f":bfpl_{idx}, :bfpvalue_{idx}, :NYID_{idx}, :sort_{idx})" ) params.update({ f"account_id_{idx}": str(account_id), f"bfpcode_{idx}": item_data.get('bfpcode'), f"mtime_{idx}": item_data.get('mtime'), f"bffb_{idx}": item_data.get('bffb'), f"bfpl_{idx}": item_data.get('bfpl'), f"bfpvalue_{idx}": item_data.get('bfpvalue'), f"NYID_{idx}": str(item_data.get('NYID')), f"sort_{idx}": item_data.get('sort') }) # 批量插入SQL insert_sql = f""" INSERT INTO `{table_name}` (account_id, bfpcode, mtime, bffb, bfpl, bfpvalue, NYID, sort) VALUES {", ".join(values_list)} """ final_sql = text(insert_sql) logger.debug(f"Final SQL preview: {insert_sql[:200]}...") # 只打印前200个字符 result = db.execute(final_sql, params) logger.info(f"Batch insert successful: {len(batch_data)} records affected") total_inserted += len(batch_data) logger.debug(f"Inserted batch {i//batch_size + 1}: {len(batch_data)} records") return total_inserted