import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler
# 1. 生成示例数据(替换为你的实际数据)
data = pd.read_excel('C:\\Users\\Lenovo\\Desktop\\4#机组脱硝分析1.xlsx',
usecols = "B:H, N:Z",
skiprows=[0] #跳过首行
#nrows=5 #仅读取前5行
)
X = data.values
# 2. 数据标准化(基于距离的算法需要标准化)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 3. 训练LOF模型
lof = LocalOutlierFactor(
n_neighbors=20, # 考虑20个邻居
contamination=0.1, # 预计异常值比例为10%
novelty=False # 使用默认检测模式
)
# 4. 获取预测结果(-1表示异常值,1表示正常值)
y_pred = lof.fit_predict(X_scaled)
# 5. 提取异常值和正常值
outliers = X[y_pred == -1]
normal_data = X[y_pred == 1]
print(f"检测到异常值数量:{len(outliers)}")
print(f"剩余正常值数量:{len(normal_data)}")
# 6. 获取异常值得分(负数绝对值越大越异常)
negative_outlier_factor = -lof.negative_outlier_factor_
# 7. 筛选异常数据(pandas DataFrame示例)
columns = [f'col{i}' for i in range(1, 21)]
df = pd.DataFrame(X, columns=columns)
#df['IsOutlier'] = ['Outlier' if x == -1 else 'Normal' for x in y_pred]
df['IsNormal'] = ['Normal' if x == 1 else 'Outlier' for x in y_pred]
df['AnomalyScore'] = negative_outlier_factor
# 8、按异常得分排序查看
'''
print("\n异常值列表:")
print(df[df['IsOutlier'] == 'Outlier'].sort_values('AnomalyScore', ascending=False))
'''
print("\n正常值列表:")
print(df[df['IsNormal'] == 'Normal'].sort_values('AnomalyScore', ascending=False))
#9、构建DataFrame
results_df = df[df['IsNormal'] == 'Normal'].sort_values('AnomalyScore', ascending=False)
#10、 导出到Excel
results_df.to_excel('C:\\Users\\Lenovo\\Desktop\\normaldata.xlsx',
sheet_name='normal',
index=False,
engine='openpyxl')

