导出优化,上传端代码
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,3 +10,5 @@ temp/
|
|||||||
|
|
||||||
!README.md
|
!README.md
|
||||||
|
|
||||||
|
upload_app/data/
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from sqlalchemy.orm import Session
|
|||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any
|
||||||
from ..core.database import get_db
|
from ..core.database import get_db
|
||||||
from ..core.response_code import ResponseCode, ResponseMessage
|
from ..core.response_code import ResponseCode, ResponseMessage
|
||||||
|
from ..core.exceptions import BusinessException, DataNotFoundException, AccountNotFoundException
|
||||||
from ..schemas.export_excel import ExportExcelRequest, ExportSettlementRequest
|
from ..schemas.export_excel import ExportExcelRequest, ExportSettlementRequest
|
||||||
from ..services.section_data import SectionDataService
|
from ..services.section_data import SectionDataService
|
||||||
from ..services.export_excel import ExportExcelService
|
from ..services.export_excel import ExportExcelService
|
||||||
@@ -133,18 +134,44 @@ def export_settlement_data(
|
|||||||
media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||||
)
|
)
|
||||||
|
|
||||||
except ValueError as ve:
|
except AccountNotFoundException as e:
|
||||||
logger.warning(f"导出沉降数据失败: {str(ve)}")
|
logger.warning(f"账号不存在: {str(e)}")
|
||||||
return {
|
raise HTTPException(
|
||||||
"code": ResponseCode.PARAM_ERROR,
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
"message": str(ve),
|
detail={
|
||||||
"data": None
|
"code": e.code,
|
||||||
}
|
"message": e.message,
|
||||||
|
"data": None
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except DataNotFoundException as e:
|
||||||
|
logger.warning(f"数据不存在: {str(e)}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail={
|
||||||
|
"code": e.code,
|
||||||
|
"message": e.message,
|
||||||
|
"data": None
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except BusinessException as e:
|
||||||
|
logger.warning(f"业务异常: {str(e)}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail={
|
||||||
|
"code": e.code,
|
||||||
|
"message": e.message,
|
||||||
|
"data": None
|
||||||
|
}
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"导出沉降数据失败: {str(e)}", exc_info=True)
|
logger.error(f"导出沉降数据失败: {str(e)}", exc_info=True)
|
||||||
return {
|
raise HTTPException(
|
||||||
"code": ResponseCode.EXPORT_FAILED,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
"message": f"{ResponseMessage.EXPORT_FAILED}: {str(e)}",
|
detail={
|
||||||
"data": None
|
"code": ResponseCode.EXPORT_FAILED,
|
||||||
}
|
"message": f"{ResponseMessage.EXPORT_FAILED}: {str(e)}",
|
||||||
|
"data": None
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|||||||
59
app/core/exceptions.py
Normal file
59
app/core/exceptions.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
自定义业务异常类
|
||||||
|
用于区分业务逻辑错误和系统错误
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .response_code import ResponseCode, ResponseMessage
|
||||||
|
|
||||||
|
|
||||||
|
class BusinessException(Exception):
|
||||||
|
"""业务异常基类"""
|
||||||
|
def __init__(self, message: str, code: int = None):
|
||||||
|
self.message = message
|
||||||
|
self.code = code or ResponseCode.INTERNAL_ERROR
|
||||||
|
super().__init__(self.message)
|
||||||
|
|
||||||
|
|
||||||
|
class DataNotFoundException(BusinessException):
|
||||||
|
"""数据不存在异常"""
|
||||||
|
def __init__(self, message: str = None):
|
||||||
|
super().__init__(
|
||||||
|
message or ResponseMessage.DATA_NOT_FOUND,
|
||||||
|
ResponseCode.DATA_NOT_FOUND
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AccountNotFoundException(BusinessException):
|
||||||
|
"""账号不存在异常"""
|
||||||
|
def __init__(self, message: str = None):
|
||||||
|
super().__init__(
|
||||||
|
message or ResponseMessage.ACCOUNT_NOT_FOUND,
|
||||||
|
ResponseCode.ACCOUNT_NOT_FOUND
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DataExistsException(BusinessException):
|
||||||
|
"""数据已存在异常"""
|
||||||
|
def __init__(self, message: str = None):
|
||||||
|
super().__init__(
|
||||||
|
message or ResponseMessage.DATA_EXISTS,
|
||||||
|
ResponseCode.DATA_EXISTS
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationException(BusinessException):
|
||||||
|
"""数据验证异常"""
|
||||||
|
def __init__(self, message: str = None):
|
||||||
|
super().__init__(
|
||||||
|
message or ResponseMessage.BAD_REQUEST,
|
||||||
|
ResponseCode.VALIDATION_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExportException(BusinessException):
|
||||||
|
"""导出异常"""
|
||||||
|
def __init__(self, message: str = None):
|
||||||
|
super().__init__(
|
||||||
|
message or ResponseMessage.EXPORT_FAILED,
|
||||||
|
ResponseCode.EXPORT_FAILED
|
||||||
|
)
|
||||||
@@ -10,6 +10,7 @@ from ..services.checkpoint import CheckpointService
|
|||||||
from ..services.settlement_data import SettlementDataService
|
from ..services.settlement_data import SettlementDataService
|
||||||
from ..services.level_data import LevelDataService
|
from ..services.level_data import LevelDataService
|
||||||
from ..services.account import AccountService
|
from ..services.account import AccountService
|
||||||
|
from ..core.exceptions import DataNotFoundException, AccountNotFoundException
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -37,7 +38,7 @@ class ExportExcelService:
|
|||||||
settlement_data: SettlementData,
|
settlement_data: SettlementData,
|
||||||
section_data: SectionData,
|
section_data: SectionData,
|
||||||
checkpoint_data: Checkpoint,
|
checkpoint_data: Checkpoint,
|
||||||
level_data: LevelData) -> Dict[str, Any]:
|
level_data: Optional[LevelData]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
合并沉降数据与关联数据,去除重复和id字段
|
合并沉降数据与关联数据,去除重复和id字段
|
||||||
"""
|
"""
|
||||||
@@ -75,20 +76,21 @@ class ExportExcelService:
|
|||||||
result[f"观测点_{key}"] = value
|
result[f"观测点_{key}"] = value
|
||||||
|
|
||||||
# 水准数据字段映射(添加前缀)
|
# 水准数据字段映射(添加前缀)
|
||||||
level_comments = self.get_field_comments(LevelData)
|
if level_data is not None:
|
||||||
level_dict = level_data.to_dict()
|
level_comments = self.get_field_comments(LevelData)
|
||||||
for field_name, value in level_dict.items():
|
level_dict = level_data.to_dict()
|
||||||
# 跳过id和NYID字段(NYID可能重复)
|
for field_name, value in level_dict.items():
|
||||||
if field_name in ['id', 'NYID']:
|
# 跳过id和NYID字段(NYID可能重复)
|
||||||
continue
|
if field_name in ['id', 'NYID']:
|
||||||
key = level_comments.get(field_name, field_name)
|
continue
|
||||||
result[f"水准_{key}"] = value
|
key = level_comments.get(field_name, field_name)
|
||||||
|
result[f"水准_{key}"] = value
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def export_settlement_data_to_file(self, db: Session, project_name: str, file_path: str):
|
def export_settlement_data_to_file(self, db: Session, project_name: str, file_path: str):
|
||||||
"""
|
"""
|
||||||
根据项目名称导出沉降数据Excel文件到指定路径
|
根据项目名称导出沉降数据Excel文件到指定路径(批量查询优化版本)
|
||||||
"""
|
"""
|
||||||
logger.info(f"开始导出项目 '{project_name}' 的沉降数据到文件: {file_path}")
|
logger.info(f"开始导出项目 '{project_name}' 的沉降数据到文件: {file_path}")
|
||||||
|
|
||||||
@@ -96,63 +98,92 @@ class ExportExcelService:
|
|||||||
account_responses = self.account_service.search_accounts(db, project_name=project_name)
|
account_responses = self.account_service.search_accounts(db, project_name=project_name)
|
||||||
if not account_responses:
|
if not account_responses:
|
||||||
logger.warning(f"未找到项目名称为 '{project_name}' 的账号")
|
logger.warning(f"未找到项目名称为 '{project_name}' 的账号")
|
||||||
raise ValueError(f"未找到项目名称为 '{project_name}' 的账号")
|
raise AccountNotFoundException(f"未找到项目名称为 '{project_name}' 的账号")
|
||||||
|
|
||||||
account_response = account_responses[0]
|
account_response = account_responses[0]
|
||||||
account_id = str(account_response.id)
|
account_id = str(account_response.account_id)
|
||||||
logger.info(f"找到账号 ID: {account_id}")
|
logger.info(f"找到账号 ID: {account_id}")
|
||||||
|
|
||||||
# 2. 通过 account_id 查询断面数据
|
# 2. 通过 account_id 查询断面数据
|
||||||
sections = self.section_service.search_section_data(db, account_id=account_id, limit=10000)
|
sections = self.section_service.search_section_data(db, account_id=account_id, limit=10000)
|
||||||
if not sections:
|
if not sections:
|
||||||
logger.warning(f"账号 {account_id} 下未找到断面数据")
|
logger.warning(f"账号 {account_id} 下未找到断面数据")
|
||||||
raise ValueError(f"账号 {account_id} 下未找到断面数据")
|
raise DataNotFoundException(f"账号 {account_id} 下未找到断面数据")
|
||||||
|
|
||||||
logger.info(f"找到 {len(sections)} 个断面")
|
logger.info(f"找到 {len(sections)} 个断面")
|
||||||
|
|
||||||
all_settlement_records = []
|
# 3. 收集所有观测点数据,建立断面->观测点映射
|
||||||
|
section_dict = {section.section_id: section for section in sections}
|
||||||
|
section_checkpoint_map = {} # section_id -> [checkpoints]
|
||||||
|
all_checkpoints = []
|
||||||
|
|
||||||
# 3-6. 遍历断面数据,查询关联数据
|
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_id = section.section_id
|
checkpoints = self.checkpoint_service.get_by_section_id(db, section.section_id)
|
||||||
logger.debug(f"处理断面: {section_id}")
|
if checkpoints:
|
||||||
|
section_checkpoint_map[section.section_id] = checkpoints
|
||||||
|
all_checkpoints.extend(checkpoints)
|
||||||
|
|
||||||
# 3. 通过断面数据的section_id查询观测点数据
|
if not all_checkpoints:
|
||||||
checkpoints = self.checkpoint_service.get_by_section_id(db, section_id)
|
logger.warning("未找到任何观测点数据")
|
||||||
if not checkpoints:
|
raise DataNotFoundException("未找到任何观测点数据")
|
||||||
logger.debug(f"断面 {section_id} 下未找到观测点数据")
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
logger.info(f"找到 {len(all_checkpoints)} 个观测点")
|
||||||
|
|
||||||
|
# 4. 批量查询沉降数据(关键优化点)
|
||||||
|
point_ids = [cp.point_id for cp in all_checkpoints]
|
||||||
|
logger.info(f"开始批量查询 {len(point_ids)} 个观测点的沉降数据")
|
||||||
|
all_settlements = self.settlement_service.get_by_point_ids(db, point_ids)
|
||||||
|
|
||||||
|
if not all_settlements:
|
||||||
|
logger.warning("未找到任何沉降数据")
|
||||||
|
logger.info(f"观测点id集合{point_ids}")
|
||||||
|
raise DataNotFoundException("未找到任何沉降数据")
|
||||||
|
|
||||||
|
logger.info(f"批量查询到 {len(all_settlements)} 条沉降数据")
|
||||||
|
|
||||||
|
# 5. 建立观测点->沉降数据映射
|
||||||
|
checkpoint_dict = {cp.point_id: cp for cp in all_checkpoints}
|
||||||
|
point_settlement_map = {} # point_id -> [settlements]
|
||||||
|
nyid_set = set()
|
||||||
|
|
||||||
|
for settlement in all_settlements:
|
||||||
|
if settlement.point_id not in point_settlement_map:
|
||||||
|
point_settlement_map[settlement.point_id] = []
|
||||||
|
point_settlement_map[settlement.point_id].append(settlement)
|
||||||
|
if settlement.NYID:
|
||||||
|
nyid_set.add(settlement.NYID)
|
||||||
|
|
||||||
|
# 6. 批量查询水准数据(关键优化点)
|
||||||
|
nyid_list = list(nyid_set)
|
||||||
|
logger.info(f"开始批量查询 {len(nyid_list)} 个期数的水准数据")
|
||||||
|
all_level_data = self.level_service.get_by_nyids(db, nyid_list)
|
||||||
|
logger.info(f"批量查询到 {len(all_level_data)} 条水准数据")
|
||||||
|
|
||||||
|
# 建立NYID->水准数据映射
|
||||||
|
nyid_level_map = {}
|
||||||
|
for level_data in all_level_data:
|
||||||
|
if level_data.NYID not in nyid_level_map:
|
||||||
|
nyid_level_map[level_data.NYID] = level_data
|
||||||
|
|
||||||
|
# 7. 合并数据
|
||||||
|
all_settlement_records = []
|
||||||
|
for section in sections:
|
||||||
|
checkpoints = section_checkpoint_map.get(section.section_id, [])
|
||||||
for checkpoint in checkpoints:
|
for checkpoint in checkpoints:
|
||||||
point_id = checkpoint.point_id
|
settlements = point_settlement_map.get(checkpoint.point_id, [])
|
||||||
|
|
||||||
# 4. 通过观测点数据的point_id查询沉降数据集
|
|
||||||
settlements = self.settlement_service.get_by_point_id(db, point_id)
|
|
||||||
if not settlements:
|
|
||||||
logger.debug(f"观测点 {point_id} 下未找到沉降数据")
|
|
||||||
continue
|
|
||||||
|
|
||||||
for settlement in settlements:
|
for settlement in settlements:
|
||||||
nyid = settlement.NYID
|
# 从映射中获取水准数据
|
||||||
|
level_data = nyid_level_map.get(settlement.NYID)
|
||||||
|
|
||||||
# 5. 通过沉降数据的NYID查询水准数据
|
# 合并数据
|
||||||
level_data_list = self.level_service.get_by_nyid(db, nyid)
|
|
||||||
if not level_data_list:
|
|
||||||
logger.warning(f"期数 {nyid} 下未找到水准数据")
|
|
||||||
# 即使没有水准数据,也继续处理
|
|
||||||
level_data = None
|
|
||||||
else:
|
|
||||||
level_data = level_data_list[0]
|
|
||||||
|
|
||||||
# 6. 合并数据
|
|
||||||
merged_record = self.merge_settlement_with_related_data(
|
merged_record = self.merge_settlement_with_related_data(
|
||||||
db, settlement, section, checkpoint, level_data
|
db, settlement, section, checkpoint, level_data
|
||||||
)
|
)
|
||||||
all_settlement_records.append(merged_record)
|
all_settlement_records.append(merged_record)
|
||||||
|
|
||||||
if not all_settlement_records:
|
if not all_settlement_records:
|
||||||
logger.warning("未找到任何沉降数据")
|
logger.warning("未能合并任何数据记录")
|
||||||
raise ValueError("未找到任何沉降数据")
|
raise DataNotFoundException("未能合并任何数据记录")
|
||||||
|
|
||||||
logger.info(f"共找到 {len(all_settlement_records)} 条沉降数据记录")
|
logger.info(f"共找到 {len(all_settlement_records)} 条沉降数据记录")
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,12 @@ class SettlementDataService(BaseService[SettlementData]):
|
|||||||
"""根据观测点ID获取沉降数据"""
|
"""根据观测点ID获取沉降数据"""
|
||||||
return self.get_by_field(db, "point_id", point_id)
|
return self.get_by_field(db, "point_id", point_id)
|
||||||
|
|
||||||
|
def get_by_point_ids(self, db: Session, point_ids: List[str]) -> List[SettlementData]:
|
||||||
|
"""根据观测点ID列表批量获取沉降数据"""
|
||||||
|
if not point_ids:
|
||||||
|
return []
|
||||||
|
return db.query(SettlementData).filter(SettlementData.point_id.in_(point_ids)).all()
|
||||||
|
|
||||||
def get_by_nyid(self, db: Session, nyid: str) -> List[SettlementData]:
|
def get_by_nyid(self, db: Session, nyid: str) -> List[SettlementData]:
|
||||||
"""根据期数ID获取沉降数据"""
|
"""根据期数ID获取沉降数据"""
|
||||||
return self.get_by_field(db, "NYID", nyid)
|
return self.get_by_field(db, "NYID", nyid)
|
||||||
|
|||||||
448
upload_app/insert_all.py
Normal file
448
upload_app/insert_all.py
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
from insert_data_online import (
|
||||||
|
batch_import_sections,
|
||||||
|
batch_import_checkpoints,
|
||||||
|
batch_import_settlement_data,
|
||||||
|
batch_import_level_data,
|
||||||
|
batch_import_original_data
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------ 核心配置 ------------------------------
|
||||||
|
DATA_TYPE_MAPPING = {
|
||||||
|
"section": (
|
||||||
|
"断面数据表",
|
||||||
|
"section_",
|
||||||
|
batch_import_sections,
|
||||||
|
["account_id"]
|
||||||
|
),
|
||||||
|
"checkpoint": (
|
||||||
|
"观测点数据表",
|
||||||
|
"point_",
|
||||||
|
batch_import_checkpoints,
|
||||||
|
[]
|
||||||
|
),
|
||||||
|
"settlement": (
|
||||||
|
"沉降数据表",
|
||||||
|
"settlement_",
|
||||||
|
batch_import_settlement_data,
|
||||||
|
[]
|
||||||
|
),
|
||||||
|
"level": (
|
||||||
|
"水准数据表",
|
||||||
|
"level_",
|
||||||
|
batch_import_level_data,
|
||||||
|
[]
|
||||||
|
),
|
||||||
|
"original": (
|
||||||
|
"原始数据表",
|
||||||
|
"original_",
|
||||||
|
batch_import_original_data,
|
||||||
|
[]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# 全局配置(根据实际情况修改)
|
||||||
|
DATA_ROOT = "./data"
|
||||||
|
BATCH_SIZE = 50 # 批次大小
|
||||||
|
MAX_RETRY = 5 # 最大重试次数
|
||||||
|
RETRY_DELAY = 3 # 基础重试延迟(秒)
|
||||||
|
DEFAULT_ACCOUNT_ID = 1 # 替换为实际业务的account_id
|
||||||
|
SUCCESS_CODE = 0 # 接口成功标识(code:0 为成功)
|
||||||
|
|
||||||
|
# 断点续传配置
|
||||||
|
RESUME_PROGRESS_FILE = "./data_import_progress.json" # 进度记录文件路径
|
||||||
|
RESUME_ENABLE = True # 是否开启断点续传(True=开启,False=关闭)
|
||||||
|
|
||||||
|
USER_AGENTS = [
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.67"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------ 工具函数 ------------------------------
|
||||||
|
def get_random_ua():
|
||||||
|
"""获取随机User-Agent"""
|
||||||
|
return random.choice(USER_AGENTS)
|
||||||
|
|
||||||
|
def scan_all_parquet(root_dir):
|
||||||
|
"""递归扫描并分类Parquet文件,过滤空文件"""
|
||||||
|
classified_files = {data_type: [] for data_type in DATA_TYPE_MAPPING.keys()}
|
||||||
|
|
||||||
|
print("递归扫描并分类Parquet文件,过滤空文件", root_dir)
|
||||||
|
for root, dirs, files in os.walk(root_dir):
|
||||||
|
# 匹配目录关键词
|
||||||
|
matched_data_type = None
|
||||||
|
for data_type, (dir_keyword, _, _, _) in DATA_TYPE_MAPPING.items():
|
||||||
|
if dir_keyword in root:
|
||||||
|
matched_data_type = data_type
|
||||||
|
print(f"[扫描] 目录匹配:{root} → 类型:{data_type}")
|
||||||
|
break
|
||||||
|
if not matched_data_type:
|
||||||
|
print("跳过", root)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 匹配文件关键词并过滤空文件
|
||||||
|
print("匹配文件关键词并过滤空文件",matched_data_type)
|
||||||
|
_, file_keyword, _, _ = DATA_TYPE_MAPPING[matched_data_type]
|
||||||
|
print(file_keyword)
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
print("检查文件", file)
|
||||||
|
if file.endswith(".parquet") and file_keyword in file:
|
||||||
|
print("匹配文件", file)
|
||||||
|
file_path = os.path.abspath(os.path.join(root, file))
|
||||||
|
if os.path.getsize(file_path) > 1024: # 过滤<1KB的空文件
|
||||||
|
classified_files[matched_data_type].append(file_path)
|
||||||
|
print(f"[扫描] 有效文件:{file_path}")
|
||||||
|
else:
|
||||||
|
print(f"[扫描] 跳过空文件:{file_path}")
|
||||||
|
|
||||||
|
# 打印完整扫描结果
|
||||||
|
print(f"\n=== 扫描完成(完整统计)===")
|
||||||
|
for data_type, paths in classified_files.items():
|
||||||
|
print(f" {data_type} 数据:{len(paths)} 个文件")
|
||||||
|
return classified_files
|
||||||
|
|
||||||
|
def read_parquet_by_type(file_paths, data_type):
|
||||||
|
"""读取Parquet文件,处理空值和字段补充"""
|
||||||
|
data_list = []
|
||||||
|
_, _, _, required_supplement = DATA_TYPE_MAPPING[data_type]
|
||||||
|
# 核心字段校验配置
|
||||||
|
critical_fields = {
|
||||||
|
"section": ["section_id", "account_id", "mileage", "work_site"],
|
||||||
|
"checkpoint": ["point_id", "section_id", "aname", "burial_date"],
|
||||||
|
"settlement": ["NYID", "point_id", "sjName"],
|
||||||
|
"level": ["NYID", "linecode", "wsphigh", "createDate"],
|
||||||
|
"original": ["NYID", "bfpcode", "mtime", "bfpvalue", "sort"]
|
||||||
|
}.get(data_type, [])
|
||||||
|
|
||||||
|
for file_path in file_paths:
|
||||||
|
try:
|
||||||
|
# 读取并处理空值
|
||||||
|
df = pd.read_parquet(file_path)
|
||||||
|
df = df.fillna("")
|
||||||
|
file_basename = os.path.basename(file_path)
|
||||||
|
|
||||||
|
# 1. 打印文件实际列名(方便核对字段)
|
||||||
|
actual_columns = df.columns.tolist()
|
||||||
|
print(f"[读取] {file_basename} 实际列名:{actual_columns}")
|
||||||
|
|
||||||
|
# 2. 校验核心字段是否存在
|
||||||
|
missing_fields = [f for f in critical_fields if f not in actual_columns]
|
||||||
|
if missing_fields:
|
||||||
|
core_relation_fields = {
|
||||||
|
"settlement": ["NYID", "point_id"],
|
||||||
|
"section": ["section_id", "account_id"],
|
||||||
|
"checkpoint": ["point_id", "section_id"],
|
||||||
|
"level": ["NYID"],
|
||||||
|
"original": ["NYID", "bfpcode"]
|
||||||
|
}.get(data_type, [])
|
||||||
|
missing_core = [f for f in core_relation_fields if f not in actual_columns]
|
||||||
|
|
||||||
|
if missing_core:
|
||||||
|
print(f"[读取] {file_basename} 缺失核心关联字段:{missing_core} → 跳过")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print(f"[读取] {file_basename} 缺失普通字段:{missing_fields} → 继续处理")
|
||||||
|
|
||||||
|
# 3. 转换为字典列表并过滤空记录
|
||||||
|
records = df.to_dict("records")
|
||||||
|
valid_records = [r for r in records if any(r.values())] # 过滤全空记录
|
||||||
|
if not valid_records:
|
||||||
|
print(f"[读取] {file_basename} 无有效记录 → 跳过")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 4. 字段格式化(仅处理存在的字段)
|
||||||
|
for record in valid_records:
|
||||||
|
# 补充必填字段(如account_id)
|
||||||
|
if "account_id" in required_supplement and "account_id" not in record:
|
||||||
|
record["account_id"] = DEFAULT_ACCOUNT_ID
|
||||||
|
print(f"[读取] {file_basename} 补充 account_id={DEFAULT_ACCOUNT_ID}")
|
||||||
|
|
||||||
|
# 数值型字段强制转换
|
||||||
|
if data_type == "section" and "section_id" in record:
|
||||||
|
record["section_id"] = int(record["section_id"]) if str(record["section_id"]).isdigit() else 0
|
||||||
|
if data_type == "checkpoint" and "point_id" in record:
|
||||||
|
record["point_id"] = int(record["point_id"]) if str(record["point_id"]).isdigit() else 0
|
||||||
|
if data_type == "settlement" and "NYID" in record:
|
||||||
|
record["NYID"] = str(record["NYID"]) # 沉降NYID转为字符串
|
||||||
|
|
||||||
|
# 5. 累加数据并打印日志
|
||||||
|
data_list.extend(valid_records)
|
||||||
|
print(f"[读取] {file_basename} 处理完成 → 有效记录:{len(valid_records)}条,累计:{len(data_list)}条")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[读取] {os.path.basename(file_path)} 读取失败:{str(e)} → 跳过")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 沉降数据为空时的提示
|
||||||
|
if data_type == "settlement" and not data_list:
|
||||||
|
print(f"\n⚠️ 【沉降数据读取异常】未读取到有效数据,请检查文件字段和内容")
|
||||||
|
|
||||||
|
print(f"\n=== {data_type} 数据读取总结 ===")
|
||||||
|
print(f" 总文件数:{len(file_paths)} 个")
|
||||||
|
print(f" 有效记录数:{len(data_list)} 条")
|
||||||
|
return data_list
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------ 断点续传工具函数 ------------------------------
|
||||||
|
def init_progress():
|
||||||
|
"""初始化进度结构"""
|
||||||
|
return {
|
||||||
|
"last_update_time": "", # 最后更新时间
|
||||||
|
"processed_files": {data_type: [] for data_type in DATA_TYPE_MAPPING.keys()}, # 各类型已处理文件路径
|
||||||
|
"processed_batches": {data_type: [] for data_type in DATA_TYPE_MAPPING.keys()}, # 各类型已处理批次号
|
||||||
|
"settlement_nyids": [] # 已成功入库的沉降NYID
|
||||||
|
}
|
||||||
|
|
||||||
|
def load_progress():
|
||||||
|
"""加载进度记录(无文件则初始化)"""
|
||||||
|
if not RESUME_ENABLE:
|
||||||
|
return init_progress()
|
||||||
|
try:
|
||||||
|
if os.path.exists(RESUME_PROGRESS_FILE):
|
||||||
|
with open(RESUME_PROGRESS_FILE, "r", encoding="utf-8") as f:
|
||||||
|
progress = json.load(f)
|
||||||
|
# 兼容旧进度结构(补全缺失字段)
|
||||||
|
default_progress = init_progress()
|
||||||
|
for key in default_progress.keys():
|
||||||
|
if key not in progress:
|
||||||
|
progress[key] = default_progress[key]
|
||||||
|
print(f"[断点续传] 成功加载进度记录:{RESUME_PROGRESS_FILE}")
|
||||||
|
return progress
|
||||||
|
else:
|
||||||
|
print(f"[断点续传] 未找到进度文件,将创建新记录:{RESUME_PROGRESS_FILE}")
|
||||||
|
return init_progress()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[断点续传] 加载进度失败,重新初始化:{str(e)}")
|
||||||
|
return init_progress()
|
||||||
|
|
||||||
|
def save_progress(progress):
|
||||||
|
"""保存进度记录到本地文件"""
|
||||||
|
if not RESUME_ENABLE:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
progress["last_update_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
with open(RESUME_PROGRESS_FILE, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(progress, f, ensure_ascii=False, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[断点续传] 保存进度失败:{str(e)}")
|
||||||
|
|
||||||
|
def filter_unprocessed_files(file_paths, data_type, progress):
|
||||||
|
"""过滤已处理的文件,仅保留未处理文件"""
|
||||||
|
processed_files = progress["processed_files"][data_type]
|
||||||
|
unprocessed = [path for path in file_paths if path not in processed_files]
|
||||||
|
if processed_files:
|
||||||
|
print(f"[断点续传] {data_type} 已处理文件:{len(processed_files)} 个 → 跳过")
|
||||||
|
print(f"[断点续传] {data_type} 待处理文件:{len(unprocessed)} 个")
|
||||||
|
return unprocessed
|
||||||
|
|
||||||
|
def filter_unprocessed_batches(total_batches, data_type, progress):
|
||||||
|
"""过滤已处理的批次,返回未处理的批次索引范围"""
|
||||||
|
processed_batches = progress["processed_batches"][data_type]
|
||||||
|
all_batch_nums = set(range(1, total_batches + 1)) # 批次号从1开始
|
||||||
|
unprocessed_batch_nums = all_batch_nums - set(processed_batches)
|
||||||
|
if processed_batches:
|
||||||
|
print(f"[断点续传] {data_type} 已处理批次:{sorted(processed_batches)} → 跳过")
|
||||||
|
print(f"[断点续传] {data_type} 待处理批次:{sorted(unprocessed_batch_nums)}")
|
||||||
|
# 转换为批次索引范围(start_idx, end_idx)
|
||||||
|
unprocessed_ranges = []
|
||||||
|
for batch_num in sorted(unprocessed_batch_nums):
|
||||||
|
start_idx = (batch_num - 1) * BATCH_SIZE
|
||||||
|
end_idx = start_idx + BATCH_SIZE
|
||||||
|
unprocessed_ranges.append((start_idx, end_idx))
|
||||||
|
return unprocessed_ranges
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------ 批量入库函数 ------------------------------
|
||||||
|
def batch_import(data_list, data_type, settlement_nyids=None, progress=None):
|
||||||
|
"""批量入库,支持断点续传"""
|
||||||
|
if not data_list:
|
||||||
|
print(f"[入库] 无 {data_type} 数据 → 跳过")
|
||||||
|
return True, []
|
||||||
|
|
||||||
|
_, _, import_func, _ = DATA_TYPE_MAPPING[data_type]
|
||||||
|
total = len(data_list)
|
||||||
|
success_flag = True
|
||||||
|
success_nyids = []
|
||||||
|
total_batches = (total + BATCH_SIZE - 1) // BATCH_SIZE # 总批次数
|
||||||
|
|
||||||
|
# 获取未处理批次范围
|
||||||
|
unprocessed_ranges = filter_unprocessed_batches(total_batches, data_type, progress)
|
||||||
|
if not unprocessed_ranges:
|
||||||
|
print(f"[入库] {data_type} 无待处理批次 → 跳过")
|
||||||
|
return True, success_nyids
|
||||||
|
|
||||||
|
# 处理未完成批次
|
||||||
|
for (batch_start, batch_end) in unprocessed_ranges:
|
||||||
|
batch_data = data_list[batch_start:batch_end]
|
||||||
|
batch_num = (batch_start // BATCH_SIZE) + 1 # 当前批次号
|
||||||
|
batch_len = len(batch_data)
|
||||||
|
print(f"\n=== [入库] {data_type} 第 {batch_num} 批(共{total}条,当前{batch_len}条)===")
|
||||||
|
|
||||||
|
# 水准数据过滤:仅保留沉降已存在的NYID
|
||||||
|
# if data_type == "level" and settlement_nyids is not None:
|
||||||
|
# valid_batch = [
|
||||||
|
# item for item in batch_data
|
||||||
|
# if str(item.get("NYID", "")) in settlement_nyids
|
||||||
|
# ]
|
||||||
|
# invalid_count = batch_len - len(valid_batch)
|
||||||
|
# if invalid_count > 0:
|
||||||
|
# print(f"[入库] 过滤 {invalid_count} 条无效水准数据(NYID不在沉降列表中)")
|
||||||
|
# batch_data = valid_batch
|
||||||
|
# batch_len = len(batch_data)
|
||||||
|
# if batch_len == 0:
|
||||||
|
# print(f"[入库] 第 {batch_num} 批无有效数据 → 跳过")
|
||||||
|
# # 标记为空批次已处理
|
||||||
|
# progress["processed_batches"][data_type].append(batch_num)
|
||||||
|
# save_progress(progress)
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# 重试机制
|
||||||
|
retry_count = 0
|
||||||
|
while retry_count < MAX_RETRY:
|
||||||
|
try:
|
||||||
|
result = import_func(batch_data)
|
||||||
|
print(f"[入库] 第 {batch_num} 批接口返回:{json.dumps(result, ensure_ascii=False, indent=2)}")
|
||||||
|
|
||||||
|
# 解析返回结果
|
||||||
|
success = True
|
||||||
|
if isinstance(result, tuple):
|
||||||
|
# 处理 (status, msg) 格式
|
||||||
|
status, msg = result
|
||||||
|
if status:
|
||||||
|
success = True
|
||||||
|
elif isinstance(result, dict):
|
||||||
|
# 处理字典格式(code=0或特定消息为成功)
|
||||||
|
if result.get("code") == SUCCESS_CODE or result.get("message") == "批量导入完成":
|
||||||
|
success = True
|
||||||
|
|
||||||
|
if success:
|
||||||
|
print(f"[入库] 第 {batch_num} 批成功({retry_count+1}/{MAX_RETRY})")
|
||||||
|
# 标记批次为已处理
|
||||||
|
progress["processed_batches"][data_type].append(batch_num)
|
||||||
|
save_progress(progress)
|
||||||
|
# 记录沉降NYID
|
||||||
|
if data_type == "settlement":
|
||||||
|
success_nyids.extend([str(item["NYID"]) for item in batch_data])
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
print(f"[入库] 第 {batch_num} 批失败({retry_count+1}/{MAX_RETRY})")
|
||||||
|
|
||||||
|
# 指数退避重试
|
||||||
|
delay = RETRY_DELAY * (retry_count + 1)
|
||||||
|
print(f"[入库] 重试延迟 {delay} 秒...")
|
||||||
|
time.sleep(delay)
|
||||||
|
retry_count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[入库] 第 {batch_num} 批异常({retry_count+1}/{MAX_RETRY}):{str(e)}")
|
||||||
|
delay = RETRY_DELAY * (retry_count + 1)
|
||||||
|
print(f"[入库] 重试延迟 {delay} 秒...")
|
||||||
|
time.sleep(delay)
|
||||||
|
retry_count += 1
|
||||||
|
|
||||||
|
# 多次重试失败处理
|
||||||
|
if retry_count >= MAX_RETRY:
|
||||||
|
print(f"\n[入库] 第 {batch_num} 批经 {MAX_RETRY} 次重试仍失败 → 终止该类型入库")
|
||||||
|
success_flag = False
|
||||||
|
break
|
||||||
|
|
||||||
|
return success_flag, success_nyids
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------ 主逻辑 ------------------------------
|
||||||
|
def main():
|
||||||
|
print(f"=== 【Parquet数据批量入库程序】启动 ===")
|
||||||
|
print(f"启动时间:{time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print(f"关键配置:")
|
||||||
|
print(f" 数据根目录:{os.path.abspath(DATA_ROOT)}")
|
||||||
|
print(f" 断点续传:{'开启' if RESUME_ENABLE else '关闭'}(进度文件:{RESUME_PROGRESS_FILE})")
|
||||||
|
print(f" 接口成功标识:code={SUCCESS_CODE}")
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# 加载断点续传进度
|
||||||
|
progress = load_progress()
|
||||||
|
# 恢复已入库的沉降NYID
|
||||||
|
settlement_nyids = set(progress.get("settlement_nyids", []))
|
||||||
|
if settlement_nyids:
|
||||||
|
print(f"[断点续传] 恢复已入库沉降NYID:{len(settlement_nyids)} 个")
|
||||||
|
|
||||||
|
# 1. 扫描所有Parquet文件
|
||||||
|
print(f"\n=== 第一步:扫描数据文件 ===")
|
||||||
|
classified_files = scan_all_parquet(DATA_ROOT)
|
||||||
|
if not any(classified_files.values()):
|
||||||
|
print(f"\n❌ 未找到任何有效Parquet文件 → 终止程序")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 2. 按依赖顺序入库(断面→测点→沉降→水准→原始)
|
||||||
|
print(f"\n=== 第二步:按依赖顺序入库 ===")
|
||||||
|
data_type_order = [
|
||||||
|
("section", "断面数据"),
|
||||||
|
("checkpoint", "测点数据"),
|
||||||
|
("settlement", "沉降数据"),
|
||||||
|
("level", "水准数据"),
|
||||||
|
("original", "原始数据")
|
||||||
|
]
|
||||||
|
|
||||||
|
for data_type, data_name in data_type_order:
|
||||||
|
print(f"\n=====================================")
|
||||||
|
print(f"处理【{data_name}】(类型:{data_type})")
|
||||||
|
print(f"=====================================")
|
||||||
|
|
||||||
|
# 获取文件路径并过滤已处理文件
|
||||||
|
file_paths = classified_files.get(data_type, [])
|
||||||
|
unprocessed_files = filter_unprocessed_files(file_paths, data_type, progress)
|
||||||
|
if not unprocessed_files:
|
||||||
|
print(f"[主逻辑] 【{data_name}】无待处理文件 → 跳过")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 读取未处理文件的数据
|
||||||
|
data_list = read_parquet_by_type(unprocessed_files, data_type)
|
||||||
|
if not data_list:
|
||||||
|
print(f"\n❌ 【{data_name}】无有效数据 → 终止程序(后续数据依赖该类型)")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 批量入库
|
||||||
|
print(f"\n[主逻辑] 开始入库:{len(data_list)} 条数据,分 {len(unprocessed_files)} 个文件")
|
||||||
|
if data_type == "level":
|
||||||
|
success, _ = batch_import(data_list, data_type, settlement_nyids, progress)
|
||||||
|
else:
|
||||||
|
success, nyids = batch_import(data_list, data_type, None, progress)
|
||||||
|
# 更新沉降NYID到进度
|
||||||
|
if data_type == "settlement":
|
||||||
|
settlement_nyids.update(nyids)
|
||||||
|
progress["settlement_nyids"] = list(settlement_nyids)
|
||||||
|
save_progress(progress)
|
||||||
|
print(f"\n[主逻辑] 沉降数据入库结果:成功 {len(settlement_nyids)} 个NYID(已保存到进度)")
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
print(f"\n❌ 【{data_name}】入库失败 → 终止后续流程(进度已保存)")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 标记当前类型所有文件为已处理
|
||||||
|
progress["processed_files"][data_type].extend(unprocessed_files)
|
||||||
|
save_progress(progress)
|
||||||
|
|
||||||
|
# 最终统计
|
||||||
|
end_time = time.time()
|
||||||
|
elapsed = (end_time - start_time) / 60
|
||||||
|
print(f"\n=== 【所有任务完成】===")
|
||||||
|
print(f"总耗时:{elapsed:.2f} 分钟")
|
||||||
|
print(f"核心成果:")
|
||||||
|
print(f" - 沉降数据:成功入库 {len(settlement_nyids)} 个NYID")
|
||||||
|
print(f" - 所有数据按依赖顺序入库完成,建议后台核对数据完整性")
|
||||||
|
|
||||||
|
# 任务完成后删除进度文件(避免下次误读)
|
||||||
|
# if RESUME_ENABLE and os.path.exists(RESUME_PROGRESS_FILE):
|
||||||
|
# os.remove(RESUME_PROGRESS_FILE)
|
||||||
|
# print(f"[断点续传] 任务全量完成,已删除进度文件:{RESUME_PROGRESS_FILE}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
237
upload_app/insert_data_online.py
Normal file
237
upload_app/insert_data_online.py
Normal file
@@ -0,0 +1,237 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
import random # 新增:用于随机选择User-Agent
|
||||||
|
|
||||||
|
# 全局常见PC端User-Agent列表(包含Chrome、Firefox、Edge等主流浏览器)
|
||||||
|
USER_AGENTS = [
|
||||||
|
# Chrome
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
||||||
|
# Firefox
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
|
||||||
|
# Edge
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.67",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.58",
|
||||||
|
# Safari (Windows版)
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
|
||||||
|
# IE兼容模式(少量保留)
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||||
|
]
|
||||||
|
|
||||||
|
def save_point_times(point_id, point_times):
|
||||||
|
"""保存工作基点的期数到JSON文件"""
|
||||||
|
with open(f'./point_times/{point_id}.txt', 'a', encoding='utf-8') as f:
|
||||||
|
# 去重并排序
|
||||||
|
point_times = list(set(point_times))
|
||||||
|
point_times.sort(reverse=True)
|
||||||
|
# 写入文件
|
||||||
|
f.writelines([f"{i}\n" for i in point_times])
|
||||||
|
|
||||||
|
|
||||||
|
# 批量导入断面数据
|
||||||
|
def batch_import_sections(data_list):
|
||||||
|
"""批量导入断面数据到指定API"""
|
||||||
|
url = "http://www.yuxindazhineng.com:3002/api/comprehensive_data/batch_import_sections"
|
||||||
|
|
||||||
|
# 数据格式校验
|
||||||
|
for index, item in enumerate(data_list):
|
||||||
|
# 检查必填字段
|
||||||
|
required_fields = ["account_id","section_id", "mileage", "work_site", "status"]
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in item:
|
||||||
|
return False, f"第{index+1}条数据缺失必填字段:{field}"
|
||||||
|
|
||||||
|
# 校验section_id是否为整数
|
||||||
|
if not isinstance(item["section_id"], int):
|
||||||
|
return False, f"第{index+1}条数据的section_id必须为整数,实际为:{type(item['section_id']).__name__}"
|
||||||
|
|
||||||
|
# 校验account_id是否为整数
|
||||||
|
if not isinstance(item["account_id"], int):
|
||||||
|
return False, f"第{index+1}条数据的account_id必须为整数,实际为:{type(item['account_id']).__name__}"
|
||||||
|
|
||||||
|
# 校验字符串字段不为空
|
||||||
|
for str_field in ["mileage", "work_site", "status"]:
|
||||||
|
if not isinstance(item[str_field], str) or not item[str_field].strip():
|
||||||
|
return False, f"第{index+1}条数据的{str_field}必须为非空字符串"
|
||||||
|
|
||||||
|
# 构建请求体
|
||||||
|
payload = json.dumps({"data": data_list})
|
||||||
|
|
||||||
|
# 随机选择一个User-Agent
|
||||||
|
headers = {
|
||||||
|
'User-Agent': random.choice(USER_AGENTS), # 核心修改:随机选择
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Host': 'www.yuxindazhineng.com:3002',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
print(f'headers:{time.time()}')
|
||||||
|
try:
|
||||||
|
# 发送POST请求
|
||||||
|
response = requests.post(url, headers=headers, data=payload, timeout=60)
|
||||||
|
if response.status_code >= 400:
|
||||||
|
return False, f"HTTP错误 {response.status_code}:{response.text}"
|
||||||
|
return True, response.text
|
||||||
|
except requests.exceptions.ConnectionError as e: # 补充异常变量e
|
||||||
|
print(f'conn:{e}{time.time()}')
|
||||||
|
return batch_import_sections(data_list)
|
||||||
|
except requests.exceptions.Timeout as e: # 补充异常变量e
|
||||||
|
print(f'timeout:{e}{time.time()}')
|
||||||
|
return batch_import_sections(data_list)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'error:{e}{time.time()}')
|
||||||
|
return batch_import_sections(data_list)
|
||||||
|
|
||||||
|
# 批量导入测点数据
|
||||||
|
def batch_import_checkpoints(data_list):
|
||||||
|
"""批量导入检查点数据到指定API"""
|
||||||
|
url = "http://www.yuxindazhineng.com:3002/api/comprehensive_data/batch_import_checkpoints"
|
||||||
|
|
||||||
|
# 构建请求体
|
||||||
|
payload = json.dumps({"data": data_list})
|
||||||
|
|
||||||
|
# 随机选择User-Agent
|
||||||
|
headers = {
|
||||||
|
'User-Agent': random.choice(USER_AGENTS), # 核心修改
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Host': 'www.yuxindazhineng.com:3002',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, headers=headers, data=payload, timeout=60)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
return batch_import_checkpoints(data_list)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return batch_import_checkpoints(data_list)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return batch_import_checkpoints(data_list)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return batch_import_checkpoints(data_list)
|
||||||
|
except Exception as e:
|
||||||
|
return batch_import_checkpoints(data_list)
|
||||||
|
|
||||||
|
# 导入沉降数据
|
||||||
|
def batch_import_settlement_data(settlement_data_list):
|
||||||
|
return
|
||||||
|
"""批量导入沉降数据到指定API接口"""
|
||||||
|
api_url = "http://www.yuxindazhineng.com:3002/api/comprehensive_data/batch_import_settlement_data"
|
||||||
|
|
||||||
|
request_payload = json.dumps({"data": settlement_data_list})
|
||||||
|
|
||||||
|
# 随机选择User-Agent
|
||||||
|
request_headers = {
|
||||||
|
'User-Agent': random.choice(USER_AGENTS), # 核心修改
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Host': 'www.yuxindazhineng.com:3002',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
url=api_url,
|
||||||
|
headers=request_headers,
|
||||||
|
data=request_payload,
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.HTTPError as http_err:
|
||||||
|
return batch_import_settlement_data(settlement_data_list)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return batch_import_settlement_data(settlement_data_list)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return batch_import_settlement_data(settlement_data_list)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return batch_import_settlement_data(settlement_data_list)
|
||||||
|
except Exception as unknown_err:
|
||||||
|
return batch_import_settlement_data(settlement_data_list)
|
||||||
|
|
||||||
|
# 导入水准数据
|
||||||
|
def batch_import_level_data(data_list):
|
||||||
|
"""批量导入层级数据到指定API"""
|
||||||
|
url = "http://www.yuxindazhineng.com:3002/api/comprehensive_data/batch_import_level_data"
|
||||||
|
|
||||||
|
payload = json.dumps({"data": data_list})
|
||||||
|
|
||||||
|
# 随机选择User-Agent
|
||||||
|
headers = {
|
||||||
|
'User-Agent': random.choice(USER_AGENTS), # 核心修改
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Host': 'www.yuxindazhineng.com:3002',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, headers=headers, data=payload, timeout=60)
|
||||||
|
response.raise_for_status()
|
||||||
|
return True, response.text
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
return batch_import_level_data(data_list)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return batch_import_level_data(data_list)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return batch_import_level_data(data_list)
|
||||||
|
except Exception as e:
|
||||||
|
return batch_import_level_data(data_list)
|
||||||
|
|
||||||
|
# 插入原始数据
|
||||||
|
def batch_import_original_data(data_list):
|
||||||
|
"""批量导入原始数据到指定API"""
|
||||||
|
url = "http://www.yuxindazhineng.com:3002/api/comprehensive_data/batch_import_original_data"
|
||||||
|
|
||||||
|
# 校验数据格式
|
||||||
|
for i, item in enumerate(data_list):
|
||||||
|
required_fields = ["bfpcode", "mtime", "bffb", "bfpl", "bfpvalue", "NYID", "sort"]
|
||||||
|
for field in required_fields:
|
||||||
|
if field not in item:
|
||||||
|
return False, f"第{i+1}条数据缺少必填字段: {field}"
|
||||||
|
|
||||||
|
# 校验mtime格式
|
||||||
|
mtime = item["mtime"]
|
||||||
|
try:
|
||||||
|
datetime.strptime(mtime, "%Y-%m-%d %H:%M:%S")
|
||||||
|
except ValueError:
|
||||||
|
return False, f"第{i+1}条数据的mtime格式错误,应为'YYYY-MM-DD HH:MM:SS',实际值: {mtime}"
|
||||||
|
|
||||||
|
# 校验sort是否为整数
|
||||||
|
if not isinstance(item["sort"], int):
|
||||||
|
return False, f"第{i+1}条数据的sort必须为整数,实际值: {item['sort']}"
|
||||||
|
|
||||||
|
payload = json.dumps({"data": data_list})
|
||||||
|
|
||||||
|
# 随机选择User-Agent
|
||||||
|
headers = {
|
||||||
|
'User-Agent': random.choice(USER_AGENTS), # 核心修改
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
# 'Host': 'www.yuxindazhineng.com:3002',
|
||||||
|
'Host': '127.0.0.1:8000',
|
||||||
|
'Connection': 'keep-alive'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(url, headers=headers, data=payload, timeout=60)
|
||||||
|
response.raise_for_status()
|
||||||
|
return True, response.text
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
print(f'http_error:{e}{time.time()}')
|
||||||
|
return batch_import_original_data(data_list)
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
print(f'conn_error:{e}{time.time()}')
|
||||||
|
return batch_import_original_data(data_list)
|
||||||
|
except requests.exceptions.Timeout as e:
|
||||||
|
print(f'timeout_error:{e}{time.time()}')
|
||||||
|
return batch_import_original_data(data_list)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'error:{e}{time.time()}')
|
||||||
|
return batch_import_original_data(data_list)
|
||||||
Reference in New Issue
Block a user