导出接口

This commit is contained in:
lhx
2025-11-10 09:56:50 +08:00
parent 4ecc770d20
commit 3bd5885dce
6 changed files with 400 additions and 6 deletions

View File

@@ -11,7 +11,7 @@ Parquet数据处理与Excel导出脚本
- 数据关联链:断面→观测点→沉降→水准→原始
3. 以水准数据为主体整理数据
- 拆分的benchmarkids起始点/终止点)
- 收集测点(同一水准线路的所有观测点)
- 收集测点(同一水准线路的所有观测点名称aname
- 计算时间范围原始数据mtime范围
- 格式化日期YYYY-MM-DD
4. 导出为Excel文件
@@ -28,7 +28,14 @@ pip install pandas numpy openpyxl
作者Claude Code
日期2025-11-08
版本1.0
版本1.2.1
更新日志:
- v1.2.1: 测点列优化从point_id改为aname观测点名称
- v1.2.0: 彻底修复numpy array布尔值判断错误
- v1.2: 新增NYID期数ID重复检查功能
- v1.1: 新增数据质量检验机制
- v1.0: 初始版本
"""
import os
@@ -338,7 +345,7 @@ def process_folder_data(folder_name, folder_path, files):
else:
work_site = ""
# 6. 收集同一水准线路编码的所有水准数据对应的沉降数据,进而获取观测点
# 6. 收集同一水准线路编码的所有水准数据对应的沉降数据,进而获取观测点名称
# 找到所有具有相同linecode的水准数据
same_line_levels = level_df[level_df["linecode"] == linecode]
same_line_nyids = same_line_levels["NYID"].unique()
@@ -348,7 +355,18 @@ def process_folder_data(folder_name, folder_path, files):
# 获取这些沉降数据对应的观测点point_id
all_point_ids = all_settlements_same_line["point_id"].unique()
point_ids_str = ",".join(map(str, sorted(all_point_ids)))
# 从观测点数据中获取对应的aname观测点名称
if not checkpoint_df.empty:
related_checkpoints_for_names = checkpoint_df[checkpoint_df["point_id"].isin(all_point_ids)]
if not related_checkpoints_for_names.empty:
# 获取所有的aname
all_anames = related_checkpoints_for_names["aname"].dropna().unique()
anames_str = ",".join(sorted(map(str, all_anames)))
else:
anames_str = ""
else:
anames_str = ""
# 7. 计算时间范围通过同一水准线路编码的所有NYID
if has_original:
@@ -364,7 +382,7 @@ def process_folder_data(folder_name, folder_path, files):
"水准线路": linecode,
"起始点": start_point,
"终止点": end_point,
"测点": point_ids_str,
"测点": anames_str, # 修改为观测点名称
"起始时间": min_mtime,
"终止时间": max_mtime,
"类型": work_site