批量导入优化
This commit is contained in:
@@ -61,7 +61,8 @@ class LevelDataService(BaseService[LevelData]):
|
||||
"""
|
||||
批量导入水准数据 - 性能优化版
|
||||
使用批量查询和批量操作,大幅提升导入速度
|
||||
根据期数ID和线路编码判断是否重复,重复数据改为更新操作
|
||||
1.根据期数ID和线路编码判断是否重复,跳过重复数据,不进行更新
|
||||
2.判断沉降数据是否存在,不存在则记录并跳过插入操作
|
||||
支持事务回滚,失败时重试一次
|
||||
"""
|
||||
import logging
|
||||
@@ -96,101 +97,99 @@ class LevelDataService(BaseService[LevelData]):
|
||||
settlement_map = {s.NYID: s for s in settlements}
|
||||
missing_nyids = set(nyid_list) - set(settlement_map.keys())
|
||||
|
||||
if missing_nyids:
|
||||
# 记录缺失的NYID
|
||||
for item_data in data:
|
||||
nyid = str(item_data.get('NYID')) # 统一转换为字符串
|
||||
if nyid in missing_nyids:
|
||||
failed_count += 1
|
||||
failed_items.append({
|
||||
'data': item_data,
|
||||
'error': '期数ID在沉降表中不存在,跳过插入操作'
|
||||
})
|
||||
|
||||
# 如果所有数据都失败,直接返回
|
||||
if failed_count == total_count:
|
||||
db.rollback()
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'以下期数在沉降表中不存在: {list(missing_nyids)}',
|
||||
'message': '所有期数ID在沉降表中都不存在',
|
||||
'total_count': total_count,
|
||||
'success_count': 0,
|
||||
'failed_count': total_count,
|
||||
'failed_items': []
|
||||
'failed_items': failed_items
|
||||
}
|
||||
|
||||
# ===== 性能优化2:批量查询现有水准数据(IN查询) =====
|
||||
# 构建 (NYID, linecode) 组合键来查找重复数据
|
||||
existing_data = db.query(LevelData).filter(
|
||||
LevelData.NYID.in_(nyid_list)
|
||||
).all()
|
||||
# 只查询有效的NYID数据
|
||||
valid_items = [item for item in data if str(item.get('NYID')) not in missing_nyids]
|
||||
if valid_items:
|
||||
# 构建 (NYID, linecode) 组合键来查找重复数据
|
||||
existing_data = db.query(LevelData).filter(
|
||||
LevelData.NYID.in_(nyid_list)
|
||||
).all()
|
||||
|
||||
# 使用组合键创建查找表:key = f"{NYID}_{linecode}"
|
||||
existing_map = {
|
||||
f"{item.NYID}_{item.linecode}": item
|
||||
for item in existing_data
|
||||
}
|
||||
logger.info(f"Found {len(existing_data)} existing level records")
|
||||
# 使用组合键创建查找表:key = f"{NYID}_{linecode}"
|
||||
existing_map = {
|
||||
f"{item.NYID}_{item.linecode}": item
|
||||
for item in existing_data
|
||||
}
|
||||
logger.info(f"Found {len(existing_data)} existing level records")
|
||||
|
||||
# ===== 性能优化3:批量处理插入和更新 =====
|
||||
to_update = []
|
||||
to_insert = []
|
||||
# ===== 性能优化3:批量处理插入和跳过 =====
|
||||
to_insert = []
|
||||
|
||||
for item_data in data:
|
||||
nyid = str(item_data.get('NYID'))
|
||||
linecode = item_data.get('linecode')
|
||||
for item_data in valid_items:
|
||||
nyid = str(item_data.get('NYID')) # 统一转换为字符串
|
||||
linecode = item_data.get('linecode')
|
||||
|
||||
# 构建组合键
|
||||
key = f"{nyid}_{linecode}"
|
||||
# 构建组合键
|
||||
key = f"{nyid}_{linecode}"
|
||||
|
||||
if key in existing_map:
|
||||
# 记录需要更新的数据
|
||||
existing_item = existing_map[key]
|
||||
to_update.append((existing_item, item_data))
|
||||
else:
|
||||
# 记录需要插入的数据
|
||||
to_insert.append(item_data)
|
||||
|
||||
# ===== 执行批量更新 =====
|
||||
if to_update:
|
||||
logger.info(f"Updating {len(to_update)} existing records")
|
||||
for existing_item, item_data in to_update:
|
||||
try:
|
||||
existing_item.benchmarkids = item_data.get('benchmarkids')
|
||||
existing_item.wsphigh = item_data.get('wsphigh')
|
||||
existing_item.mtype = item_data.get('mtype')
|
||||
existing_item.createDate = item_data.get('createDate')
|
||||
success_count += 1
|
||||
except Exception as e:
|
||||
if key in existing_map:
|
||||
# 数据已存在,跳过
|
||||
logger.info(f"Continue level data: {nyid}-{linecode}")
|
||||
failed_count += 1
|
||||
failed_items.append({
|
||||
'data': item_data,
|
||||
'error': f'更新失败: {str(e)}'
|
||||
'error': '数据已存在,跳过插入操作'
|
||||
})
|
||||
logger.error(f"Failed to update level data: {str(e)}")
|
||||
raise e
|
||||
else:
|
||||
# 记录需要插入的数据
|
||||
to_insert.append(item_data)
|
||||
|
||||
# ===== 执行批量插入 =====
|
||||
if to_insert:
|
||||
logger.info(f"Inserting {len(to_insert)} new records")
|
||||
# 分批插入,每批500条(避免SQL过长)
|
||||
batch_size = 500
|
||||
for i in range(0, len(to_insert), batch_size):
|
||||
batch = to_insert[i:i + batch_size]
|
||||
try:
|
||||
level_data_list = [
|
||||
LevelData(
|
||||
linecode=str(item.get('linecode')), # 统一转换为字符串
|
||||
benchmarkids=item.get('benchmarkids'),
|
||||
wsphigh=item.get('wsphigh'),
|
||||
mtype=item.get('mtype'),
|
||||
NYID=str(item.get('NYID')),
|
||||
createDate=item.get('createDate')
|
||||
)
|
||||
for item in batch
|
||||
]
|
||||
db.add_all(level_data_list)
|
||||
success_count += len(batch)
|
||||
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch)} records")
|
||||
except Exception as e:
|
||||
failed_count += len(batch)
|
||||
failed_items.extend([
|
||||
{
|
||||
'data': item,
|
||||
'error': f'插入失败: {str(e)}'
|
||||
}
|
||||
for item in batch
|
||||
])
|
||||
logger.error(f"Failed to insert batch: {str(e)}")
|
||||
raise e
|
||||
# ===== 执行批量插入 =====
|
||||
if to_insert:
|
||||
logger.info(f"Inserting {len(to_insert)} new records")
|
||||
# 分批插入,每批500条(避免SQL过长)
|
||||
batch_size = 500
|
||||
for i in range(0, len(to_insert), batch_size):
|
||||
batch = to_insert[i:i + batch_size]
|
||||
try:
|
||||
level_data_list = [
|
||||
LevelData(
|
||||
linecode=str(item.get('linecode')), # 统一转换为字符串
|
||||
benchmarkids=item.get('benchmarkids'),
|
||||
wsphigh=item.get('wsphigh'),
|
||||
mtype=item.get('mtype'),
|
||||
NYID=str(item.get('NYID')),
|
||||
createDate=item.get('createDate')
|
||||
)
|
||||
for item in batch
|
||||
]
|
||||
db.add_all(level_data_list)
|
||||
success_count += len(batch)
|
||||
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch)} records")
|
||||
except Exception as e:
|
||||
failed_count += len(batch)
|
||||
failed_items.extend([
|
||||
{
|
||||
'data': item,
|
||||
'error': f'插入失败: {str(e)}'
|
||||
}
|
||||
for item in batch
|
||||
])
|
||||
logger.error(f"Failed to insert batch: {str(e)}")
|
||||
raise e
|
||||
|
||||
# 如果有失败记录,不提交事务
|
||||
if failed_items:
|
||||
|
||||
Reference in New Issue
Block a user