"""
Excel数据读取模块

该模块提供从Excel文件中读取8760小时负荷和发电曲线数据的功能。

作者: iFlow CLI
创建日期: 2025-12-25
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Optional, Tuple
import os


def validate_excel_data(df: pd.DataFrame, data_type: str = "8760") -> bool:
    """
    验证Excel数据格式是否正确
    
    Args:
        df: pandas DataFrame对象
        data_type: 数据类型，"24"或"8760"
        
    Returns:
        bool: 验证是否通过
    """
    expected_length = 8760 if data_type == "8760" else 24
    
    # 检查行数
    if len(df) != expected_length:
        print(f"错误：数据行数应为{expected_length}，实际为{len(df)}")
        return False
    
    # 检查必需的列
    required_columns = ['光伏出力(MW)', '风电出力(MW)', '火电出力(MW)', '负荷需求(MW)']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        print(f"错误：缺少必需的列：{missing_columns}")
        return False
    
    # 检查数据类型和非负值
    for col in required_columns:
        if not pd.api.types.is_numeric_dtype(df[col]):
            print(f"错误：列'{col}'必须为数值类型")
            return False
        
        if (df[col] < 0).any():
            print(f"错误：列'{col}'包含负值")
            return False
    
    return True


def read_excel_data(file_path: str, sheet_name: str = 0) -> Dict[str, List[float]]:
    """
    从Excel文件读取8760小时数据
    
    Args:
        file_path: Excel文件路径
        sheet_name: 工作表名称或索引，默认为第一个工作表
        
    Returns:
        包含所有数据的字典
        
    Raises:
        FileNotFoundError: 文件不存在
        ValueError: 数据格式错误
    """
    # 检查文件是否存在
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"文件不存在：{file_path}")
    
    try:
        # 读取Excel文件
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        
        # 自动检测数据类型
        data_type = "8760" if len(df) >= 8760 else "24"
        
        # 验证数据格式
        if not validate_excel_data(df, data_type):
            raise ValueError("Excel数据格式验证失败")
        
        # 提取数据并转换为列表
        solar_output = df['光伏出力(MW)'].tolist()
        wind_output = df['风电出力(MW)'].tolist()
        thermal_output = df['火电出力(MW)'].tolist()
        load_demand = df['负荷需求(MW)'].tolist()
        
        # 如果是24小时数据，扩展到8760小时（重复365天）
        if data_type == "24" and len(df) == 24:
            print("检测到24小时数据，自动扩展到8760小时（重复365天）")
            solar_output = solar_output * 365
            wind_output = wind_output * 365
            thermal_output = thermal_output * 365
            load_demand = load_demand * 365
        
        return {
            'solar_output': solar_output,
            'wind_output': wind_output,
            'thermal_output': thermal_output,
            'load_demand': load_demand,
            'data_type': data_type,
            'original_length': len(df)
        }
        
    except Exception as e:
        raise ValueError(f"读取Excel文件失败：{str(e)}")


def create_excel_template(file_path: str, data_type: str = "8760"):
    """
    创建Excel数据模板文件
    
    Args:
        file_path: 保存路径
        data_type: 数据类型，"24"或"8760"
    """
    # 生成示例数据
    if data_type == "24":
        hours = 24
        # 24小时典型日数据
        solar = [0.0] * 6 + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0] + [0.0] * 6
        wind = [2.0, 3.0, 4.0, 3.0, 2.0, 1.0] * 4
        thermal = [5.0] * 24
        load = [3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 18.0, 
                16.0, 14.0, 12.0, 10.0, 8.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0]
        description = "24小时典型日数据模板"
    else:
        hours = 8760
        # 生成8760小时的模拟数据（基于日模式加季节变化）
        daily_solar = [0.0] * 6 + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0] + [0.0] * 6
        daily_wind = [2.0, 3.0, 4.0, 3.0, 2.0, 1.0] * 4
        daily_thermal = [5.0] * 24
        daily_load = [3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 18.0, 
                     16.0, 14.0, 12.0, 10.0, 8.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0]
        
        solar = []
        wind = []
        thermal = []
        load = []
        
        np.random.seed(42)  # 确保可重复性
        
        for day in range(365):
            # 季节性因子
            season_factor = 1.0 + 0.3 * np.sin(2 * np.pi * day / 365)
            
            for hour in range(24):
                # 添加随机变化
                solar_variation = 1.0 + 0.2 * (np.random.random() - 0.5)
                wind_variation = 1.0 + 0.3 * (np.random.random() - 0.5)
                load_variation = 1.0 + 0.1 * (np.random.random() - 0.5)
                
                solar.append(daily_solar[hour] * season_factor * solar_variation)
                wind.append(daily_wind[hour] * wind_variation)
                thermal.append(daily_thermal[hour])
                load.append(daily_load[hour] * (2.0 - season_factor) * load_variation)
        
        description = "8760小时全年数据模板"
    
    # 创建DataFrame
    df = pd.DataFrame({
        '小时': range(1, hours + 1),
        '光伏出力(MW)': solar,
        '风电出力(MW)': wind,
        '火电出力(MW)': thermal,
        '负荷需求(MW)': load
    })
    
    # 保存到Excel
    with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='数据', index=False)
        
        # 添加说明工作表
        description_df = pd.DataFrame({
            '项目': ['数据说明', '数据类型', '时间范围', '单位', '注意事项'],
            '内容': [
                description,
                f'{data_type}小时电力数据',
                f'1-{hours}小时',
                'MW (兆瓦)',
                '所有数值必须为非负数'
            ]
        })
        description_df.to_excel(writer, sheet_name='说明', index=False)
    
    print(f"Excel模板已创建：{file_path}")


def analyze_excel_data(file_path: str) -> Dict[str, float]:
    """
    分析Excel数据的基本统计信息
    
    Args:
        file_path: Excel文件路径
        
    Returns:
        包含统计信息的字典
    """
    try:
        data = read_excel_data(file_path)
        
        solar = data['solar_output']
        wind = data['wind_output']
        thermal = data['thermal_output']
        load = data['load_demand']
        
        return {
            'data_length': len(solar),
            'total_solar': sum(solar),
            'total_wind': sum(wind),
            'total_thermal': sum(thermal),
            'total_generation': sum(solar) + sum(wind) + sum(thermal),
            'total_load': sum(load),
            'max_solar': max(solar),
            'max_wind': max(wind),
            'max_thermal': max(thermal),
            'max_load': max(load),
            'avg_solar': np.mean(solar),
            'avg_wind': np.mean(wind),
            'avg_thermal': np.mean(thermal),
            'avg_load': np.mean(load)
        }
    except Exception as e:
        print(f"分析数据失败：{str(e)}")
        return {}


def main():
    """主函数，演示Excel数据读取功能"""
    print("=== Excel数据读取模块演示 ===")
    
    # 创建模板文件
    template_8760 = "data_template_8760.xlsx"
    template_24 = "data_template_24.xlsx"
    
    print("\n1. 创建Excel模板文件...")
    create_excel_template(template_8760, "8760")
    create_excel_template(template_24, "24")
    
    # 分析模板数据
    print(f"\n2. 分析{template_8760}数据...")
    stats = analyze_excel_data(template_8760)
    if stats:
        print("数据统计信息：")
        for key, value in stats.items():
            print(f"  {key}: {value:.2f}")
    
    print(f"\n3. 演示读取{template_24}数据...")
    try:
        data = read_excel_data(template_24)
        print(f"成功读取数据，类型：{data['data_type']}")
        print(f"光伏出力前10小时：{data['solar_output'][:10]}")
        print(f"风电出力前10小时：{data['wind_output'][:10]}")
        print(f"负荷需求前10小时：{data['load_demand'][:10]}")
    except Exception as e:
        print(f"读取失败：{str(e)}")
    
    print("\n=== 演示完成 ===")
    print("模板文件已创建，您可以根据实际数据修改Excel文件。")


if __name__ == "__main__":
    main()