绘制直方图、随机抽样、主成分分析PCA

# -*- coding: utf-8 -*-
"""
Created on Tue Aug 23 13:55:48 2022

@author: cnliutz
"""
# #绘制直方图
import numpy as np
data = np.random.normal(1,10,500)
import matplotlib.pyplot as plt
plt.hist(data)

#随机抽样
# from sklearn import datasets
import pandas as pd
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data)
iris_df.columns = iris.feature_names
print(iris_df.sample(10))

#PCA主成分分析
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()   #调用数据集
y = pd.DataFrame(housing.data)         #转换为数据框
from sklearn.decomposition import PCA  #调用PCA主成分分析程序
pca = PCA(n_components=5)              #选取5个主成分
pca.fit(y)                             #分析计算 
print(pca.explained_variance_ratio_) #主成分方差
print(pca.explained_variance_ratio_.sum()) #主成分方差合计,代表主成分能代表的与数据的信息,本例为0.9999974631075885,即99%