#只实现了最基础的import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
defsigmoid(z):#逻辑回归函数return1/(1+ np.exp(- z))defcost(theta, X, y):#代价函数
first =(-y)* np.log(sigmoid(X @ theta))#用数组作为矩阵相乘 ,等价于np.dot()
second =(1- y)*np.log(1- sigmoid(X @ theta))return np.mean(first - second)
data = pd.read_csv('ex2data1.txt', names=['exam1','exam2','admitted'])# add a ones column - this makes the matrix multiplication work out easierif'Ones'notin data.columns:#列标签
data.insert(0,'Ones',1)# set X (training data) and y (target variable)
X = data.iloc[:,:-1].values # Convert the frame to its Numpy-array representation.转化为数组
y = data.iloc[:,-1].values # Return is NOT a Numpy-matrix, rather, a Numpy-array.
theta = np.zeros(X.shape[1])#需要更新的参数值,初始化print(cost(theta, X, y))#打印初始代价# 0.6931471805599453
#尝试使用最基础的梯度下降法更新参数,成功,这才是真真的调参,只不过训练了120000次,学习率0.001,可以尝试一下集成函数#只实现了最基础的#自动求偏导是如何进行和实现的?import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
defsigmoid(z):#逻辑回归函数return1/(1+ np.exp(- z))defcomputeCost(X, y, theta):#代价函数
first =(-y)* np.log(sigmoid(X @ theta))#用数组作为矩阵相乘 ,等价于np.dot()
second =(1- y)*np.log(1- sigmoid(X @ theta))return np.mean(first - second)defgradientDescent(X, y, theta, alpha, epoch):"""reuturn theta, cost"""
temp = np.matrix(np.zeros(theta.shape))# 初始化一个 θ 临时矩阵(1, 2)
parameters =int(theta.flatten().shape[0])# 参数 θ的数量
cost = np.zeros(epoch)# 初始化一个ndarray,包含每次epoch的cost
m = X.shape[0]# 样本数量mfor i inrange(epoch):# 利用向量化一步求解
temp =theta -(X.T @ (sigmoid(X @ theta)- y))/len(X)*alpha
theta = temp
cost[i]= computeCost(X, y, theta)return theta, cost
data = pd.read_csv('ex2data1.txt', names=['exam1','exam2','admitted'])#data = (data - data.mean()) / data.std() #对矩阵进行标准化,经过验证在这里进行标准化是不正确的,因为y为0——1变量# add a ones column - this makes the matrix multiplication work out easierif'Ones'notin data.columns:#列标签
data.insert(0,'Ones',1)# set X (training data) and y (target variable)
X = data.iloc[:,:-1].values # Convert the frame to its Numpy-array representation.转化为数组
y = data.iloc[:,-1].values # Return is NOT a Numpy-matrix, rather, a Numpy-array.
theta = np.zeros(X.shape[1])#需要更新的参数值,初始化,此时维度已经为n+1维向量print(computeCost( X, y,theta))#打印初始代价# 0.6931471805599453
epoch=120000
alpha=0.001
final_theta, cost = gradientDescent(X, y, theta, alpha, epoch)#print(cost)print(final_theta)#这个代价函数近似呈现直线下降,有点奇怪
fig, ax = plt.subplots(figsize=(8,4))
ax.plot(np.arange(epoch), cost,'r')# np.arange()返回等差数组
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()#画出分界线
positive = data[data.admitted.isin(['1'])]# 1
negetive = data[data.admitted.isin(['0'])]# 0
x1 = np.arange(130, step=0.1)
x2 =-(final_theta[0]+ x1*final_theta[1])/ final_theta[2]#对原始式子进行了一个移项
fig, ax = plt.subplots(figsize=(8,5))
ax.scatter(positive['exam1'], positive['exam2'], c='b', label='Admitted')
ax.scatter(negetive['exam1'], negetive['exam2'], s=50, c='r', marker='x', label='Not Admitted')
ax.plot(x1, x2)
ax.set_xlim(0,130)
ax.set_ylim(0,130)
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_title('Decision Boundary')
plt.show()
文章评论