Principal components analysis using pandas dataframe
Most sklearn objects work with pandas
dataframes just fine, would something like this work for you?
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
df = pd.DataFrame(data=np.random.normal(0, 1, (20, 10)))
pca = PCA(n_components=5)
pca.fit(df)
You can access the components themselves with
pca.components_
import pandas
from sklearn.decomposition import PCA
import numpy
import matplotlib.pyplot as plot
df = pandas.DataFrame(data=numpy.random.normal(0, 1, (20, 10)))
# You must normalize the data before applying the fit method
df_normalized=(df - df.mean()) / df.std()
pca = PCA(n_components=df.shape[1])
pca.fit(df_normalized)
# Reformat and view results
loadings = pandas.DataFrame(pca.components_.T,
columns=['PC%s' % _ for _ in range(len(df_normalized.columns))],
index=df.columns)
print(loadings)
plot.plot(pca.explained_variance_ratio_)
plot.ylabel('Explained Variance')
plot.xlabel('Components')
plot.show()