批量导入优化
This commit is contained in:
@@ -37,8 +37,10 @@ class CheckpointService(BaseService[Checkpoint]):
|
|||||||
|
|
||||||
def batch_import_checkpoints(self, db: Session, data: List) -> Dict[str, Any]:
|
def batch_import_checkpoints(self, db: Session, data: List) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
批量导入观测点数据,根据观测点ID判断是否重复,重复数据改为更新操作
|
批量导入观测点数据 - 性能优化版
|
||||||
判断断面id是否存在,不存在则全部不导入
|
使用批量查询和批量操作,大幅提升导入速度
|
||||||
|
1.判断断面id是否存在,不存在则跳过该条数据
|
||||||
|
2.根据观测点ID判断是否重复,重复数据跳过,不进行更新操作
|
||||||
支持事务回滚,失败时重试一次
|
支持事务回滚,失败时重试一次
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
@@ -49,6 +51,16 @@ class CheckpointService(BaseService[Checkpoint]):
|
|||||||
failed_count = 0
|
failed_count = 0
|
||||||
failed_items = []
|
failed_items = []
|
||||||
|
|
||||||
|
if total_count == 0:
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': '导入数据不能为空',
|
||||||
|
'total_count': 0,
|
||||||
|
'success_count': 0,
|
||||||
|
'failed_count': 0,
|
||||||
|
'failed_items': []
|
||||||
|
}
|
||||||
|
|
||||||
for attempt in range(2): # 最多重试1次
|
for attempt in range(2): # 最多重试1次
|
||||||
try:
|
try:
|
||||||
db.begin()
|
db.begin()
|
||||||
@@ -56,41 +68,113 @@ class CheckpointService(BaseService[Checkpoint]):
|
|||||||
failed_count = 0
|
failed_count = 0
|
||||||
failed_items = []
|
failed_items = []
|
||||||
|
|
||||||
|
# ===== 性能优化1:批量查询断面数据(IN查询) =====
|
||||||
|
# 统一转换为字符串处理(数据库section_id字段是VARCHAR类型)
|
||||||
|
section_id_list = list(set(str(item.get('section_id')) for item in data if item.get('section_id')))
|
||||||
|
logger.info(f"Checking {len(section_id_list)} unique section_ids in section data")
|
||||||
|
sections = db.query(SectionData).filter(SectionData.section_id.in_(section_id_list)).all()
|
||||||
|
section_map = {s.section_id: s for s in sections}
|
||||||
|
missing_section_ids = set(section_id_list) - set(section_map.keys())
|
||||||
|
|
||||||
|
# 记录缺失的断面
|
||||||
for item_data in data:
|
for item_data in data:
|
||||||
try:
|
section_id = str(item_data.get('section_id')) # 统一转换为字符串
|
||||||
# 判断断面id是否存在
|
if section_id in missing_section_ids:
|
||||||
if not self._check_section_exists(db, item_data.get('section_id')):
|
|
||||||
logger.error(f"Section {item_data.get('section_id')} not found")
|
|
||||||
raise Exception(f"Section {item_data.get('section_id')} not found")
|
|
||||||
|
|
||||||
checkpoint = self.get_by_point_id(db, item_data.get('point_id'))
|
|
||||||
if checkpoint:
|
|
||||||
# 更新操作
|
|
||||||
checkpoint.aname = item_data.get('aname')
|
|
||||||
checkpoint.section_id = item_data.get('section_id')
|
|
||||||
checkpoint.burial_date = item_data.get('burial_date')
|
|
||||||
logger.info(f"Updated checkpoint: {item_data.get('point_id')}")
|
|
||||||
else:
|
|
||||||
# 新增操作
|
|
||||||
checkpoint = Checkpoint(
|
|
||||||
point_id=item_data.get('point_id'),
|
|
||||||
aname=item_data.get('aname'),
|
|
||||||
section_id=item_data.get('section_id'),
|
|
||||||
burial_date=item_data.get('burial_date'),
|
|
||||||
)
|
|
||||||
db.add(checkpoint)
|
|
||||||
logger.info(f"Created checkpoint: {item_data.get('point_id')}")
|
|
||||||
|
|
||||||
success_count += 1
|
|
||||||
except Exception as e:
|
|
||||||
failed_count += 1
|
failed_count += 1
|
||||||
failed_items.append({
|
failed_items.append({
|
||||||
'data': item_data,
|
'data': item_data,
|
||||||
'error': str(e)
|
'error': '断面ID不存在,跳过插入操作'
|
||||||
})
|
})
|
||||||
logger.error(f"Failed to process checkpoint {item_data.get('point_id')}: {str(e)}")
|
|
||||||
|
# 如果所有数据都失败,直接返回
|
||||||
|
if failed_count == total_count:
|
||||||
|
db.rollback()
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': '所有断面ID都不存在',
|
||||||
|
'total_count': total_count,
|
||||||
|
'success_count': 0,
|
||||||
|
'failed_count': total_count,
|
||||||
|
'failed_items': failed_items
|
||||||
|
}
|
||||||
|
|
||||||
|
# ===== 性能优化2:批量查询现有观测点数据(IN查询) =====
|
||||||
|
# 只查询有效的断面数据
|
||||||
|
valid_items = [item for item in data if str(item.get('section_id')) not in missing_section_ids]
|
||||||
|
if valid_items:
|
||||||
|
# 统一转换为字符串处理(数据库point_id字段是VARCHAR类型)
|
||||||
|
point_id_list = list(set(str(item.get('point_id')) for item in valid_items if item.get('point_id')))
|
||||||
|
existing_checkpoints = db.query(Checkpoint).filter(Checkpoint.point_id.in_(point_id_list)).all()
|
||||||
|
|
||||||
|
# 使用point_id创建查找表
|
||||||
|
existing_map = {
|
||||||
|
checkpoint.point_id: checkpoint
|
||||||
|
for checkpoint in existing_checkpoints
|
||||||
|
}
|
||||||
|
logger.info(f"Found {len(existing_checkpoints)} existing checkpoints")
|
||||||
|
|
||||||
|
# ===== 性能优化3:批量处理插入和跳过 =====
|
||||||
|
to_insert = []
|
||||||
|
|
||||||
|
for item_data in valid_items:
|
||||||
|
point_id = str(item_data.get('point_id')) # 统一转换为字符串
|
||||||
|
|
||||||
|
if point_id in existing_map:
|
||||||
|
# 数据已存在,跳过
|
||||||
|
logger.info(f"Continue checkpoint data: {point_id}")
|
||||||
|
failed_count += 1
|
||||||
|
failed_items.append({
|
||||||
|
'data': item_data,
|
||||||
|
'error': '数据已存在,跳过插入操作'
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# 记录需要插入的数据
|
||||||
|
to_insert.append(item_data)
|
||||||
|
|
||||||
|
# ===== 执行批量插入 =====
|
||||||
|
if to_insert:
|
||||||
|
logger.info(f"Inserting {len(to_insert)} new records")
|
||||||
|
# 分批插入,每批500条(避免SQL过长)
|
||||||
|
batch_size = 500
|
||||||
|
for i in range(0, len(to_insert), batch_size):
|
||||||
|
batch = to_insert[i:i + batch_size]
|
||||||
|
try:
|
||||||
|
checkpoint_list = [
|
||||||
|
Checkpoint(
|
||||||
|
point_id=str(item.get('point_id')), # 统一转换为字符串
|
||||||
|
aname=item.get('aname'),
|
||||||
|
section_id=str(item.get('section_id')), # 统一转换为字符串
|
||||||
|
burial_date=item.get('burial_date')
|
||||||
|
)
|
||||||
|
for item in batch
|
||||||
|
]
|
||||||
|
db.add_all(checkpoint_list)
|
||||||
|
success_count += len(batch)
|
||||||
|
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch)} records")
|
||||||
|
except Exception as e:
|
||||||
|
failed_count += len(batch)
|
||||||
|
failed_items.extend([
|
||||||
|
{
|
||||||
|
'data': item,
|
||||||
|
'error': f'插入失败: {str(e)}'
|
||||||
|
}
|
||||||
|
for item in batch
|
||||||
|
])
|
||||||
|
logger.error(f"Failed to insert batch: {str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
# 如果有失败记录,不提交事务
|
||||||
|
if failed_items:
|
||||||
|
db.rollback()
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': f'批量导入失败: {len(failed_items)}条记录处理失败',
|
||||||
|
'total_count': total_count,
|
||||||
|
'success_count': success_count,
|
||||||
|
'failed_count': failed_count,
|
||||||
|
'failed_items': failed_items
|
||||||
|
}
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
logger.info(f"Batch import checkpoints completed. Success: {success_count}, Failed: {failed_count}")
|
logger.info(f"Batch import checkpoints completed. Success: {success_count}, Failed: {failed_count}")
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -61,7 +61,8 @@ class LevelDataService(BaseService[LevelData]):
|
|||||||
"""
|
"""
|
||||||
批量导入水准数据 - 性能优化版
|
批量导入水准数据 - 性能优化版
|
||||||
使用批量查询和批量操作,大幅提升导入速度
|
使用批量查询和批量操作,大幅提升导入速度
|
||||||
根据期数ID和线路编码判断是否重复,重复数据改为更新操作
|
1.根据期数ID和线路编码判断是否重复,跳过重复数据,不进行更新
|
||||||
|
2.判断沉降数据是否存在,不存在则记录并跳过插入操作
|
||||||
支持事务回滚,失败时重试一次
|
支持事务回滚,失败时重试一次
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
@@ -96,18 +97,32 @@ class LevelDataService(BaseService[LevelData]):
|
|||||||
settlement_map = {s.NYID: s for s in settlements}
|
settlement_map = {s.NYID: s for s in settlements}
|
||||||
missing_nyids = set(nyid_list) - set(settlement_map.keys())
|
missing_nyids = set(nyid_list) - set(settlement_map.keys())
|
||||||
|
|
||||||
if missing_nyids:
|
# 记录缺失的NYID
|
||||||
|
for item_data in data:
|
||||||
|
nyid = str(item_data.get('NYID')) # 统一转换为字符串
|
||||||
|
if nyid in missing_nyids:
|
||||||
|
failed_count += 1
|
||||||
|
failed_items.append({
|
||||||
|
'data': item_data,
|
||||||
|
'error': '期数ID在沉降表中不存在,跳过插入操作'
|
||||||
|
})
|
||||||
|
|
||||||
|
# 如果所有数据都失败,直接返回
|
||||||
|
if failed_count == total_count:
|
||||||
db.rollback()
|
db.rollback()
|
||||||
return {
|
return {
|
||||||
'success': False,
|
'success': False,
|
||||||
'message': f'以下期数在沉降表中不存在: {list(missing_nyids)}',
|
'message': '所有期数ID在沉降表中都不存在',
|
||||||
'total_count': total_count,
|
'total_count': total_count,
|
||||||
'success_count': 0,
|
'success_count': 0,
|
||||||
'failed_count': total_count,
|
'failed_count': total_count,
|
||||||
'failed_items': []
|
'failed_items': failed_items
|
||||||
}
|
}
|
||||||
|
|
||||||
# ===== 性能优化2:批量查询现有水准数据(IN查询) =====
|
# ===== 性能优化2:批量查询现有水准数据(IN查询) =====
|
||||||
|
# 只查询有效的NYID数据
|
||||||
|
valid_items = [item for item in data if str(item.get('NYID')) not in missing_nyids]
|
||||||
|
if valid_items:
|
||||||
# 构建 (NYID, linecode) 组合键来查找重复数据
|
# 构建 (NYID, linecode) 组合键来查找重复数据
|
||||||
existing_data = db.query(LevelData).filter(
|
existing_data = db.query(LevelData).filter(
|
||||||
LevelData.NYID.in_(nyid_list)
|
LevelData.NYID.in_(nyid_list)
|
||||||
@@ -120,43 +135,27 @@ class LevelDataService(BaseService[LevelData]):
|
|||||||
}
|
}
|
||||||
logger.info(f"Found {len(existing_data)} existing level records")
|
logger.info(f"Found {len(existing_data)} existing level records")
|
||||||
|
|
||||||
# ===== 性能优化3:批量处理插入和更新 =====
|
# ===== 性能优化3:批量处理插入和跳过 =====
|
||||||
to_update = []
|
|
||||||
to_insert = []
|
to_insert = []
|
||||||
|
|
||||||
for item_data in data:
|
for item_data in valid_items:
|
||||||
nyid = str(item_data.get('NYID'))
|
nyid = str(item_data.get('NYID')) # 统一转换为字符串
|
||||||
linecode = item_data.get('linecode')
|
linecode = item_data.get('linecode')
|
||||||
|
|
||||||
# 构建组合键
|
# 构建组合键
|
||||||
key = f"{nyid}_{linecode}"
|
key = f"{nyid}_{linecode}"
|
||||||
|
|
||||||
if key in existing_map:
|
if key in existing_map:
|
||||||
# 记录需要更新的数据
|
# 数据已存在,跳过
|
||||||
existing_item = existing_map[key]
|
logger.info(f"Continue level data: {nyid}-{linecode}")
|
||||||
to_update.append((existing_item, item_data))
|
|
||||||
else:
|
|
||||||
# 记录需要插入的数据
|
|
||||||
to_insert.append(item_data)
|
|
||||||
|
|
||||||
# ===== 执行批量更新 =====
|
|
||||||
if to_update:
|
|
||||||
logger.info(f"Updating {len(to_update)} existing records")
|
|
||||||
for existing_item, item_data in to_update:
|
|
||||||
try:
|
|
||||||
existing_item.benchmarkids = item_data.get('benchmarkids')
|
|
||||||
existing_item.wsphigh = item_data.get('wsphigh')
|
|
||||||
existing_item.mtype = item_data.get('mtype')
|
|
||||||
existing_item.createDate = item_data.get('createDate')
|
|
||||||
success_count += 1
|
|
||||||
except Exception as e:
|
|
||||||
failed_count += 1
|
failed_count += 1
|
||||||
failed_items.append({
|
failed_items.append({
|
||||||
'data': item_data,
|
'data': item_data,
|
||||||
'error': f'更新失败: {str(e)}'
|
'error': '数据已存在,跳过插入操作'
|
||||||
})
|
})
|
||||||
logger.error(f"Failed to update level data: {str(e)}")
|
else:
|
||||||
raise e
|
# 记录需要插入的数据
|
||||||
|
to_insert.append(item_data)
|
||||||
|
|
||||||
# ===== 执行批量插入 =====
|
# ===== 执行批量插入 =====
|
||||||
if to_insert:
|
if to_insert:
|
||||||
|
|||||||
@@ -251,7 +251,9 @@ class SectionDataService(BaseService[SectionData]):
|
|||||||
|
|
||||||
def batch_import_sections(self, db: Session, data: List) -> Dict[str, Any]:
|
def batch_import_sections(self, db: Session, data: List) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
批量导入断面数据,根据断面id判断是否重复,重复数据改为更新操作
|
批量导入断面数据 - 性能优化版
|
||||||
|
使用批量查询和批量操作,大幅提升导入速度
|
||||||
|
根据断面ID判断是否重复,重复数据跳过,不进行更新操作
|
||||||
支持事务回滚,失败时重试一次
|
支持事务回滚,失败时重试一次
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
@@ -262,6 +264,16 @@ class SectionDataService(BaseService[SectionData]):
|
|||||||
failed_count = 0
|
failed_count = 0
|
||||||
failed_items = []
|
failed_items = []
|
||||||
|
|
||||||
|
if total_count == 0:
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': '导入数据不能为空',
|
||||||
|
'total_count': 0,
|
||||||
|
'success_count': 0,
|
||||||
|
'failed_count': 0,
|
||||||
|
'failed_items': []
|
||||||
|
}
|
||||||
|
|
||||||
for attempt in range(2): # 最多重试1次
|
for attempt in range(2): # 最多重试1次
|
||||||
try:
|
try:
|
||||||
db.begin()
|
db.begin()
|
||||||
@@ -269,57 +281,91 @@ class SectionDataService(BaseService[SectionData]):
|
|||||||
failed_count = 0
|
failed_count = 0
|
||||||
failed_items = []
|
failed_items = []
|
||||||
|
|
||||||
for item_data in data:
|
# ===== 性能优化1:批量查询现有断面数据(IN查询) =====
|
||||||
try:
|
# 统一转换为字符串处理(数据库section_id字段是VARCHAR类型)
|
||||||
section = self.get_by_section_id(db, item_data.get('section_id'))
|
section_id_list = list(set(str(item.get('section_id')) for item in data if item.get('section_id')))
|
||||||
if section:
|
logger.info(f"Checking {len(section_id_list)} unique section_ids")
|
||||||
# 更新操作
|
existing_sections = db.query(SectionData).filter(SectionData.section_id.in_(section_id_list)).all()
|
||||||
section.mileage = item_data.get('mileage')
|
|
||||||
section.work_site = item_data.get('work_site')
|
|
||||||
section.basic_types = item_data.get('basic_types')
|
|
||||||
section.height = item_data.get('height')
|
|
||||||
section.status = item_data.get('status')
|
|
||||||
section.number = item_data.get('number')
|
|
||||||
section.transition_paragraph = item_data.get('transition_paragraph')
|
|
||||||
section.design_fill_height = item_data.get('design_fill_height')
|
|
||||||
section.compression_layer_thickness = item_data.get('compression_layer_thickness')
|
|
||||||
section.treatment_depth = item_data.get('treatment_depth')
|
|
||||||
section.foundation_treatment_method = item_data.get('foundation_treatment_method')
|
|
||||||
section.rock_mass_classification = item_data.get('rock_mass_classification')
|
|
||||||
section.account_id = item_data.get('account_id')
|
|
||||||
logger.info(f"Updated section: {item_data.get('section_id')}")
|
|
||||||
else:
|
|
||||||
# 新增操作
|
|
||||||
from ..models.section_data import SectionData
|
|
||||||
section = SectionData(
|
|
||||||
section_id=item_data.get('section_id'),
|
|
||||||
mileage=item_data.get('mileage'),
|
|
||||||
work_site=item_data.get('work_site'),
|
|
||||||
basic_types=item_data.get('basic_types'),
|
|
||||||
height=item_data.get('height'),
|
|
||||||
status=item_data.get('status'),
|
|
||||||
number=item_data.get('number'),
|
|
||||||
transition_paragraph=item_data.get('transition_paragraph'),
|
|
||||||
design_fill_height=item_data.get('design_fill_height'),
|
|
||||||
compression_layer_thickness=item_data.get('compression_layer_thickness'),
|
|
||||||
treatment_depth=item_data.get('treatment_depth'),
|
|
||||||
foundation_treatment_method=item_data.get('foundation_treatment_method'),
|
|
||||||
rock_mass_classification=item_data.get('rock_mass_classification'),
|
|
||||||
account_id=item_data.get('account_id')
|
|
||||||
)
|
|
||||||
db.add(section)
|
|
||||||
logger.info(f"Created section: {item_data.get('section_id')}")
|
|
||||||
|
|
||||||
success_count += 1
|
# 使用section_id创建查找表
|
||||||
except Exception as e:
|
existing_map = {
|
||||||
|
section.section_id: section
|
||||||
|
for section in existing_sections
|
||||||
|
}
|
||||||
|
logger.info(f"Found {len(existing_sections)} existing sections")
|
||||||
|
|
||||||
|
# ===== 性能优化2:批量处理插入和跳过 =====
|
||||||
|
to_insert = []
|
||||||
|
|
||||||
|
for item_data in data:
|
||||||
|
section_id = str(item_data.get('section_id')) # 统一转换为字符串
|
||||||
|
|
||||||
|
if section_id in existing_map:
|
||||||
|
# 数据已存在,跳过
|
||||||
|
logger.info(f"Continue section data: {section_id}")
|
||||||
failed_count += 1
|
failed_count += 1
|
||||||
failed_items.append({
|
failed_items.append({
|
||||||
'data': item_data,
|
'data': item_data,
|
||||||
'error': str(e)
|
'error': '数据已存在,跳过插入操作'
|
||||||
})
|
})
|
||||||
logger.error(f"Failed to process section {item_data.get('section_id')}: {str(e)}")
|
else:
|
||||||
|
# 记录需要插入的数据
|
||||||
|
to_insert.append(item_data)
|
||||||
|
|
||||||
|
# ===== 执行批量插入 =====
|
||||||
|
if to_insert:
|
||||||
|
logger.info(f"Inserting {len(to_insert)} new records")
|
||||||
|
# 分批插入,每批500条(避免SQL过长)
|
||||||
|
batch_size = 500
|
||||||
|
for i in range(0, len(to_insert), batch_size):
|
||||||
|
batch = to_insert[i:i + batch_size]
|
||||||
|
try:
|
||||||
|
section_data_list = [
|
||||||
|
SectionData(
|
||||||
|
section_id=str(item.get('section_id')), # 统一转换为字符串
|
||||||
|
mileage=item.get('mileage'),
|
||||||
|
work_site=item.get('work_site'),
|
||||||
|
basic_types=item.get('basic_types'),
|
||||||
|
height=item.get('height'),
|
||||||
|
status=item.get('status'),
|
||||||
|
number=str(item.get('number')) if item.get('number') else None, # 统一转换为字符串
|
||||||
|
transition_paragraph=item.get('transition_paragraph'),
|
||||||
|
design_fill_height=item.get('design_fill_height'),
|
||||||
|
compression_layer_thickness=item.get('compression_layer_thickness'),
|
||||||
|
treatment_depth=item.get('treatment_depth'),
|
||||||
|
foundation_treatment_method=item.get('foundation_treatment_method'),
|
||||||
|
rock_mass_classification=item.get('rock_mass_classification'),
|
||||||
|
account_id=str(item.get('account_id')) if item.get('account_id') else None # 统一转换为字符串
|
||||||
|
)
|
||||||
|
for item in batch
|
||||||
|
]
|
||||||
|
db.add_all(section_data_list)
|
||||||
|
success_count += len(batch)
|
||||||
|
logger.info(f"Inserted batch {i//batch_size + 1}: {len(batch)} records")
|
||||||
|
except Exception as e:
|
||||||
|
failed_count += len(batch)
|
||||||
|
failed_items.extend([
|
||||||
|
{
|
||||||
|
'data': item,
|
||||||
|
'error': f'插入失败: {str(e)}'
|
||||||
|
}
|
||||||
|
for item in batch
|
||||||
|
])
|
||||||
|
logger.error(f"Failed to insert batch: {str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
# 如果有失败记录,不提交事务
|
||||||
|
if failed_items:
|
||||||
|
db.rollback()
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': f'批量导入失败: {len(failed_items)}条记录处理失败',
|
||||||
|
'total_count': total_count,
|
||||||
|
'success_count': success_count,
|
||||||
|
'failed_count': failed_count,
|
||||||
|
'failed_items': failed_items
|
||||||
|
}
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
logger.info(f"Batch import sections completed. Success: {success_count}, Failed: {failed_count}")
|
logger.info(f"Batch import sections completed. Success: {success_count}, Failed: {failed_count}")
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user