image.png
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans #导入kmeans
from sklearn.utils import shuffle
import numpy as np
from skimage import io
import warnings
warnings.filterwarnings('ignore')
original = mpl.image.imread('Yosemite 5.jpg')
width,height,depth = original.shape
temp = original.reshape(width*height,depth)
temp = np.array(temp, dtype=np.float64) / 255
图像读取完我们获取到的其实是一个width*height的三维矩阵(width,height是图片的分辨率)
original_sample = shuffle(temp, random_state=0)[:1000] #随机取1000个RGB值作为训练集
def cluster(k):
estimator = KMeans(n_clusters=k,n_jobs=8,random_state=0)#构造聚类器
kmeans = estimator.fit(original_sample)#聚类
return kmeans
我们只随机取了1000组RGB值作为训练,k表示聚类成 k个簇,对于本文就是K种颜色。
def recreate_image(codebook, labels, w, h):
d = codebook.shape[1]
image = np.zeros((w, h, d))
label_idx = 0
for i in range(w):
for j in range(h):
image[i][j] = codebook[labels[label_idx]]
label_idx += 1
return image
我们选取了32,64,128三个K值来做比较:
kmeans = cluster(32)
labels = kmeans.predict(temp)
kmeans_32 = recreate_image(kmeans.cluster_centers_, labels,width,height)
kmeans = cluster(64)
labels = kmeans.predict(temp)
kmeans_64 = recreate_image(kmeans.cluster_centers_, labels,width,height)
kmeans = cluster(128)
labels = kmeans.predict(temp)
kmeans_128 = recreate_image(kmeans.cluster_centers_, labels,width,height)
plt.figure(figsize = (15,10))
plt.subplot(2,2,1)
plt.axis('off')
plt.title('Original image')
plt.imshow(original.reshape(width,height,depth))
plt.subplot(2,2,2)
plt.axis('off')
plt.title('Quantized image (128 colors, K-Means)')
plt.imshow(kmeans_128)
io.imsave('kmeans_128.png',kmeans_128)
plt.subplot(2,2,3)
plt.axis('off')
plt.title('Quantized image (64 colors, K-Means)')
plt.imshow(kmeans_64)
io.imsave('kmeans_64.png',kmeans_64)
plt.subplot(2,2,4)
plt.axis('off')
plt.title('Quantized image (32 colors, K-Means)')
plt.imshow(kmeans_32)
io.imsave('kmeans_32.png',kmeans_32)
plt.show()
结果如下:
差别还是比较明显的,随着颜色变少,图片也越来越马赛克了。
其实对于图片压缩这块,各大互联网公司投入人力优化,在保证图片清晰的情况下,减小文件大小,这样一能为公司节省一大笔带宽费用,二也能让用户更快的加载出图片,提升用户体验。 这篇文章也只是我在学k-means时候看到的一个案例,对于图片压缩只是很小的一部分,写这片文章的时候我也查了下相关的知识,真要下功夫研究,可是一门大学问。 最后: peace~