# -*- coding: utf-8 -*-
"""
Created on Tue Aug 23 13:55:48 2022
@author: cnliutz
"""
# #绘制直方图
import numpy as np
data = np.random.normal(1,10,500)
import matplotlib.pyplot as plt
plt.hist(data)
#随机抽样
# from sklearn import datasets
import pandas as pd
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data)
iris_df.columns = iris.feature_names
print(iris_df.sample(10))
#PCA主成分分析
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing() #调用数据集
y = pd.DataFrame(housing.data) #转换为数据框
from sklearn.decomposition import PCA #调用PCA主成分分析程序
pca = PCA(n_components=5) #选取5个主成分
pca.fit(y) #分析计算
print(pca.explained_variance_ratio_) #主成分方差
print(pca.explained_variance_ratio_.sum()) #主成分方差合计,代表主成分能代表的与数据的信息,本例为0.9999974631075885,即99%