Python is a programming language that lets you work quicklyand integrate systems more effectively.
本文大部分内容来自网络转载,仅供个人记录用。

终于迈向编程的第一步了,对于大数据课上的东西,基本就是会调包就行,其他都是语法和算法层面的东西,下面是这学期所做的三个实验用到的代码,基本都是从网络上抄来的,略有修改。

梯度下降算法

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D

mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'SimHei'
mpl.rcParams['axes.unicode_minus'] = False


# 二维原始图像
def f2(x, y):
return (x - 10) ** 2 + (y - 3) ** 2 + 8


# 导函数、偏导
def hx2(x, y):
return 2 * x - 20


def hy2(x, y):
return 2 * y - 6


# 使用梯度下降法进行求解
GD_X1 = []
GD_X2 = []
GD_Y = []
x1 = 2
x2 = 2
alpha = 0.2 # 学习步长,也叫阿尔法
f_change = f2(x1, x2) # 得到y0的值
f_current = f_change # y0当前值
GD_X1.append(x1)
GD_X2.append(x2)
GD_Y.append(f_current)
iter_number = 0

while f_change > 1e-10 and iter_number < 100:
iter_number += 1
prex1 = x1
prex2 = x2
x1 = x1 - alpha * hx2(prex1, prex2)
x2 = x2 - alpha * hy2(prex1, prex2)
tmp = f2(x1, x2)
# 判断y值的变化,不能太小
f_change = np.abs(f_current - tmp)
f_current = tmp
GD_X1.append(x1)
GD_X2.append(x2)
GD_Y.append(f_current)

print(u'最终的结果:(%.5f,%.5f,%.5f)' % (x1, x2, f_current))
print(u'迭代过程中X的取值,迭代次数: %d' % iter_number)
print(GD_X1)

# 构建数据
X1 = np.arange(-10, 10, 0.2)
X2 = np.arange(-10, 10, 0.2)

X1, X2 = np.meshgrid(X1, X2)

Y = np.array(list(map(lambda t: f2(t[0], t[1]), zip(X1.flatten(), X2.flatten()))))
Y.shape = X1.shape

# 画图
fig = plt.figure(facecolor='w')
ax = Axes3D(fig)
ax.plot_surface(X1, X2, Y, rstride=1, cstride=1, cmap=plt.cm.jet)
ax.plot(GD_X1, GD_X2, GD_Y, 'bo--', linewidth=1)
plt.show()

K-MEANS算法

import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# 读取数据
data = np.array([[0, 0], [1, 2], [3, 1], [8, 8], [9, 10], [10, 7]])
# 聚类数量
k = 4
# 训练模型
model = KMeans(n_clusters=k)
model.fit(data)
# 分类中心点坐标
centers = model.cluster_centers_
# 预测结果
result = model.predict(data)
# 用不同的颜色绘制数据点
mark = ['or', 'og', 'ob', 'ok']
for i, d in enumerate(data):
plt.plot(d[0], d[1], mark[result[i]])
# 画出各个分类的中心点
mark = ['*r', '*g', '*b', '*k']
for i, center in enumerate(centers):
plt.plot(center[0], center[1], mark[i], markersize=20)

# 绘制簇的作用域
# 获取数据值所在的范围
x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1

# 生成网格矩阵
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))
z = model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
cs = plt.contourf(xx, yy, z)
plt.show()

DBSCAN算法

import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.cluster import DBSCAN
iris = datasets.load_iris()
X = iris.data[:, :4] # #表示我们只取特征空间中的4个维度
print(X.shape)
# 绘制数据分布图
plt.scatter(X[:, 0], X[:, 1], c="red", marker='o', label='see')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend(loc=2)
plt.show()
dbscan = DBSCAN(eps=0.4, min_samples=9)
dbscan.fit(X)
label_pred = dbscan.labels_
# 绘制k-means结果
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
x2 = X[label_pred == 2]
plt.scatter(x0[:, 0], x0[:, 1], c="red", marker='o', label='label0')
plt.scatter(x1[:, 0], x1[:, 1], c="green", marker='*', label='label1')
plt.scatter(x2[:, 0], x2[:, 1], c="blue", marker='+', label='label2')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend(loc=2)
plt.show()