""" Excel数据读取模块 该模块提供从Excel文件中读取8760小时负荷和发电曲线数据的功能。 作者: iFlow CLI 创建日期: 2025-12-25 """ import pandas as pd import numpy as np from typing import Dict, List, Optional, Tuple import os def validate_excel_data(df: pd.DataFrame, data_type: str = "8760") -> bool: """ 验证Excel数据格式是否正确 Args: df: pandas DataFrame对象 data_type: 数据类型,"24"或"8760" Returns: bool: 验证是否通过 """ expected_length = 8760 if data_type == "8760" else 24 # 检查行数 if len(df) != expected_length: print(f"错误:数据行数应为{expected_length},实际为{len(df)}") return False # 检查必需的列 required_columns = ['光伏出力(MW)', '风电出力(MW)', '火电出力(MW)', '负荷需求(MW)'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: print(f"错误:缺少必需的列:{missing_columns}") return False # 检查数据类型和非负值 for col in required_columns: if not pd.api.types.is_numeric_dtype(df[col]): print(f"错误:列'{col}'必须为数值类型") return False if (df[col] < 0).any(): print(f"错误:列'{col}'包含负值") return False return True def read_excel_data(file_path: str, sheet_name: str = 0) -> Dict[str, List[float]]: """ 从Excel文件读取8760小时数据 Args: file_path: Excel文件路径 sheet_name: 工作表名称或索引,默认为第一个工作表 Returns: 包含所有数据的字典 Raises: FileNotFoundError: 文件不存在 ValueError: 数据格式错误 """ # 检查文件是否存在 if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在:{file_path}") try: # 读取Excel文件 df = pd.read_excel(file_path, sheet_name=sheet_name) # 自动检测数据类型 data_type = "8760" if len(df) >= 8760 else "24" # 验证数据格式 if not validate_excel_data(df, data_type): raise ValueError("Excel数据格式验证失败") # 提取数据并转换为列表 solar_output = df['光伏出力(MW)'].tolist() wind_output = df['风电出力(MW)'].tolist() thermal_output = df['火电出力(MW)'].tolist() load_demand = df['负荷需求(MW)'].tolist() # 如果是24小时数据,扩展到8760小时(重复365天) if data_type == "24" and len(df) == 24: print("检测到24小时数据,自动扩展到8760小时(重复365天)") solar_output = solar_output * 365 wind_output = wind_output * 365 thermal_output = thermal_output * 365 load_demand = load_demand * 365 return { 'solar_output': solar_output, 'wind_output': wind_output, 'thermal_output': thermal_output, 'load_demand': load_demand, 'data_type': data_type, 'original_length': len(df) } except Exception as e: raise ValueError(f"读取Excel文件失败:{str(e)}") def create_excel_template(file_path: str, data_type: str = "8760"): """ 创建Excel数据模板文件 Args: file_path: 保存路径 data_type: 数据类型,"24"或"8760" """ # 生成示例数据 if data_type == "24": hours = 24 # 24小时典型日数据 solar = [0.0] * 6 + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0] + [0.0] * 6 wind = [2.0, 3.0, 4.0, 3.0, 2.0, 1.0] * 4 thermal = [5.0] * 24 load = [3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 18.0, 16.0, 14.0, 12.0, 10.0, 8.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0] description = "24小时典型日数据模板" else: hours = 8760 # 生成8760小时的模拟数据(基于日模式加季节变化) daily_solar = [0.0] * 6 + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0] + [0.0] * 6 daily_wind = [2.0, 3.0, 4.0, 3.0, 2.0, 1.0] * 4 daily_thermal = [5.0] * 24 daily_load = [3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 18.0, 16.0, 14.0, 12.0, 10.0, 8.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0] solar = [] wind = [] thermal = [] load = [] np.random.seed(42) # 确保可重复性 for day in range(365): # 季节性因子 season_factor = 1.0 + 0.3 * np.sin(2 * np.pi * day / 365) for hour in range(24): # 添加随机变化 solar_variation = 1.0 + 0.2 * (np.random.random() - 0.5) wind_variation = 1.0 + 0.3 * (np.random.random() - 0.5) load_variation = 1.0 + 0.1 * (np.random.random() - 0.5) solar.append(daily_solar[hour] * season_factor * solar_variation) wind.append(daily_wind[hour] * wind_variation) thermal.append(daily_thermal[hour]) load.append(daily_load[hour] * (2.0 - season_factor) * load_variation) description = "8760小时全年数据模板" # 创建DataFrame df = pd.DataFrame({ '小时': range(1, hours + 1), '光伏出力(MW)': solar, '风电出力(MW)': wind, '火电出力(MW)': thermal, '负荷需求(MW)': load }) # 保存到Excel with pd.ExcelWriter(file_path, engine='openpyxl') as writer: df.to_excel(writer, sheet_name='数据', index=False) # 添加说明工作表 description_df = pd.DataFrame({ '项目': ['数据说明', '数据类型', '时间范围', '单位', '注意事项'], '内容': [ description, f'{data_type}小时电力数据', f'1-{hours}小时', 'MW (兆瓦)', '所有数值必须为非负数' ] }) description_df.to_excel(writer, sheet_name='说明', index=False) print(f"Excel模板已创建:{file_path}") def analyze_excel_data(file_path: str) -> Dict[str, float]: """ 分析Excel数据的基本统计信息 Args: file_path: Excel文件路径 Returns: 包含统计信息的字典 """ try: data = read_excel_data(file_path) solar = data['solar_output'] wind = data['wind_output'] thermal = data['thermal_output'] load = data['load_demand'] return { 'data_length': len(solar), 'total_solar': sum(solar), 'total_wind': sum(wind), 'total_thermal': sum(thermal), 'total_generation': sum(solar) + sum(wind) + sum(thermal), 'total_load': sum(load), 'max_solar': max(solar), 'max_wind': max(wind), 'max_thermal': max(thermal), 'max_load': max(load), 'avg_solar': np.mean(solar), 'avg_wind': np.mean(wind), 'avg_thermal': np.mean(thermal), 'avg_load': np.mean(load) } except Exception as e: print(f"分析数据失败:{str(e)}") return {} def main(): """主函数,演示Excel数据读取功能""" print("=== Excel数据读取模块演示 ===") # 创建模板文件 template_8760 = "data_template_8760.xlsx" template_24 = "data_template_24.xlsx" print("\n1. 创建Excel模板文件...") create_excel_template(template_8760, "8760") create_excel_template(template_24, "24") # 分析模板数据 print(f"\n2. 分析{template_8760}数据...") stats = analyze_excel_data(template_8760) if stats: print("数据统计信息:") for key, value in stats.items(): print(f" {key}: {value:.2f}") print(f"\n3. 演示读取{template_24}数据...") try: data = read_excel_data(template_24) print(f"成功读取数据,类型:{data['data_type']}") print(f"光伏出力前10小时:{data['solar_output'][:10]}") print(f"风电出力前10小时:{data['wind_output'][:10]}") print(f"负荷需求前10小时:{data['load_demand'][:10]}") except Exception as e: print(f"读取失败:{str(e)}") print("\n=== 演示完成 ===") print("模板文件已创建,您可以根据实际数据修改Excel文件。") if __name__ == "__main__": main()