线性回归(公式推导+numpy实现)

  • Post author:
  • Post category:其他




公式推导

含义 公式 维度
输入(矩阵形式)


X

=

[

x

(

1

)

T

x

(

2

)

T

x

(

i

)

T

x

(

m

)

T

]

\mathbf X= \begin{bmatrix}-\mathbf {x^{(1)}}^T – \\-\mathbf {x^{(2)}}^T- \\\cdots\\-\mathbf {x^{(i)}}^T-\\\cdots\\-\mathbf {x^{(m)}}^T-\end{bmatrix}






X




=






































































































































x











(


1


)



















T
























x











(


2


)



















T

































x











(


i


)



















T

































x











(


m


)



















T
















































































































































m

×

n

m\times n






m




×








n




输入


x

(

i

)

=

[

x

1

(

i

)

x

2

(

i

)

x

j

(

i

)

x

n

(

i

)

]

T

\mathbf x^{(i)}=\begin{bmatrix} x_{1}^{(i)} & x_{2}^{(i)} & \cdots & x_{j}^{(i)} & \cdots & x_{n}^{(i)}\end{bmatrix}^T







x











(


i


)












=











[














x











1










(


i


)















































x











2










(


i


)













































































x











j










(


i


)













































































x











n










(


i


)





































]












T














n

×

1

n\times 1






n




×








1




标签


y

=

[

y

(

1

)

y

(

2

)

y

(

i

)

y

(

m

)

]

T

\mathbf y={\begin{bmatrix} y^{(1)} & y^{(2)} & \cdots & y^{(i)} &\cdots &y^{(m)}\end{bmatrix}}^T






y




=












[














y











(


1


)






































y











(


2


)




































































y











(


i


)




































































y











(


m


)




























]













T














m

×

1

m\times 1






m




×








1




参数


w

=

[

w

1

w

2

w

j

w

n

]

T

\mathbf w={\begin{bmatrix}w_{1} & w_{2} & \cdots & w_{j} & \cdots & w_{n}\end{bmatrix}}^T






w




=












[














w











1















































w











2













































































w











j













































































w











n





































]













T














n

×

1

n\times 1






n




×








1




输出


f

w

,

b

(

x

(

i

)

)

=

w

T

x

(

i

)

+

b

f_{w,b}(\mathbf x^{(i)}) = {\mathbf w}^T{\mathbf x}^{(i)} + b







f











w


,


b



















(



x











(


i


)










)




=










w











T











x












(


i


)












+








b




标量
输出(矩阵形式)


f

w

,

b

(

X

)

=

X

w

+

b

f_{w,b}(\mathbf X) = \mathbf X \mathbf w+ b







f











w


,


b



















(


X


)




=








X


w




+








b







m

×

1

m\times 1






m




×








1




损失函数


c

o

s

t

(

i

)

=

(

f

w

,

b

(

x

(

i

)

)

y

(

i

)

)

2

cost^{(i)} = (f_{w,b}(\mathbf x^{(i)}) – y^{(i)})^2






c


o


s



t











(


i


)












=








(



f











w


,


b



















(



x











(


i


)










)














y











(


i


)











)










2











标量
代价函数


J

(

w

,

b

)

=

1

2

m

i

=

1

m

c

o

s

t

(

i

)

+

λ

2

m

j

=

1

n

w

j

2

=

1

2

m

(

f

w

,

b

(

X

)

y

)

T

(

f

w

,

b

(

X

)

y

)

+

λ

2

m

w

T

w

\begin{aligned}J(\mathbf w,b) &= \frac{1}{2m} \sum\limits_{i = 1}^{m} cost^{(i)}+\frac{\lambda}{2m}\sum\limits_{j = 1}^{n} w_{j}^2\\&=\frac{1}{2m}(f_{w,b}(\mathbf X)-\mathbf y)^T(f_{w,b}(\mathbf X)-\mathbf y)+\frac{\lambda}{2m}\mathbf w^T\mathbf w\end{aligned}
















J


(


w


,




b


)



































=















2


m














1































i


=


1



















m




















c


o


s



t











(


i


)












+















2


m














λ































j


=


1



















n





















w











j









2




























=















2


m














1




















(



f











w


,


b



















(


X


)









y



)










T









(



f











w


,


b



















(


X


)









y


)




+















2


m














λ





















w










T









w





















标量
梯度下降


w

j

:

=

w

j

α

J

(

w

,

b

)

w

j

b

:

=

b

α

J

(

w

,

b

)

b

J

(

w

,

b

)

w

j

=

1

m

i

=

1

m

(

f

w

,

b

(

x

(

i

)

)

y

(

i

)

)

x

j

(

i

)

+

λ

m

w

j

J

(

w

,

b

)

b

=

1

m

i

=

1

m

(

f

w

,

b

(

x

(

i

)

)

y

(

i

)

)

\begin{aligned}w_j :&= w_j – \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j}\\ b :&= b – \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}\\\frac{\partial J(\mathbf{w},b)}{\partial w_j} &= \frac{1}{m} \sum\limits_{i = 1}^{m} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) – y^{(i)})x_{j}^{(i)} + \frac{\lambda}{m} w_j \\ \frac{\partial J(\mathbf{w},b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 1}^{m} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) – y^{(i)}) \end{aligned}

















w










j




















:








b




:























w










j

































J


(



w



,




b


)








































b

















J


(



w



,




b


)















































=





w










j

























α

















w










j

































J


(



w



,




b


)






























=




b









α
















b

















J


(



w



,




b


)






























=















m














1































i


=


1



















m


















(



f












w



,


b



















(




x












(


i


)










)










y











(


i


)










)



x











j










(


i


)





















+















m














λ





















w










j




























=















m














1































i


=


1



















m


















(



f












w



,


b



















(




x












(


i


)










)










y











(


i


)










)





















标量
梯度下降(矩阵形式)


w

:

=

w

α

J

(

w

,

b

)

w

J

(

w

,

b

)

w

=

1

m

X

T

(

f

w

,

b

(

X

)

y

)

+

λ

m

w

\begin{aligned}\mathbf w:&=\mathbf w-\alpha\frac{\partial J(\mathbf{w},b)}{\partial \mathbf{w}}\\ \frac{\partial J(\mathbf{w},b)}{\partial \mathbf{w}}&=\frac{1}{m}\mathbf X^T(f_{w,b}(\mathbf X) -\mathbf y)+\frac{\lambda}{m} \mathbf w\end{aligned}
















w




:























w


















J


(



w



,




b


)















































=




w









α

















w


















J


(



w



,




b


)






























=















m














1





















X










T









(



f











w


,


b



















(


X


)









y


)




+















m














λ




















w
























n

×

1

n\times 1






n




×








1






numpy实现

def zscore_normalize_features(X):
    mu=np.mean(X,axis=0)                 
    sigma=np.std(X,axis=0)           
    X_norm=(X-mu)/sigma      
    return X_norm
# f_wb
def compute_f_wb(X,w,b):
    f_wb=np.dot(X,w)+b # m*1
    return f_wb
# j_wb
def compute_cost(X,y,w,b,lambda_,f_wb_function):
    m,n=X.shape
    f_wb=f_wb_function(X,w,b) # m*1
    j_wb=1/(2*m)*np.dot((f_wb-y).T, f_wb-y)+(lambda_/2*m)*np.dot(w.T,w)# 1*1
    j_wb=j_wb[0,0] # scalar  
    return j_wb
# dj_dw,dj_db
def compute_gradient(X, y, w, b, lambda_,f_wb_function): 
    m,n=X.shape
    f_wb=f_wb_function(X,w,b) # m*1
    dj_dw=(1/m)*np.dot(X.T,(f_wb-y))+(lambda_/n)*w # n*1
    dj_db=(1/m)*np.sum(f_wb-y) # scalar
    return dj_dw,dj_db
# w,b,j_history,w_history
def gradient_descent(X, y, w, b, cost_function, gradient_function, f_wb_function,alpha, num_iters,lambda_): 
    J_history = []
    w_history = []
    w_temp = copy.deepcopy(w)  
    b_temp = b
    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(X, y, w_temp,b_temp,lambda_,f_wb_function)  
        w_temp = w_temp  - alpha * dj_dw               
        b_temp  = b_temp  - alpha * dj_db               
        cost =  cost_function(X, y, w_temp, b_temp,lambda_,f_wb_function)
        J_history.append(cost)  
    return w_temp, b_temp, J_history, w_history 
    
	

	



样本点

x = np.arange(0, 20, 1)
y = 1+x+2*pow(x,2)+3*pow(x,3)
fig=go.Figure()
fig.update_layout(width=1000,height=618)
fig.add_trace(
    go.Scatter(
        x=x,
        y=y,
        name="样本点",
        mode="markers"
    )
)
fig.show()

在这里插入图片描述



特征

feature_1=x # (m,)
feature_2=pow(x,2) # (m,)
feature_3=pow(x,3) # (m,)
x_=zscore_normalize_features(np.transpose(np.array([
    feature_1,
    feature_2,
    feature_3
    ])))
y_=y.reshape(-1,1)
x_.shape,y_.shape



梯度下降

m,n=x_.shape
initial_w = np.zeros((n,1))
initial_b = 0
iterations = 150
alpha = 0.5
lambda_=0
w,b,J_history,w_history = gradient_descent(x_ ,y_, initial_w, initial_b, compute_cost, compute_gradient, compute_f_wb,alpha, iterations,lambda_)
fig=go.Figure()
fig.update_layout(width=1000,height=618)
fig.add_trace(
    go.Scatter(
        x=np.arange(1,iterations+1),
        y=J_history,
        name="学习曲线",
        mode="markers+lines"
    )
)
fig.update_layout(
    xaxis_title="迭代次数",
    yaxis_title="J_wb"
)
fig.show()

在这里插入图片描述



预测

y_hat=compute_f_wb(x_,w,b) # m*1
fig=go.Figure()
fig.update_layout(width=1000,height=618)
fig.add_trace(
    go.Scatter(
        x=x,
        y=y,
        name="y",
        mode="markers"
    )
)
fig.add_trace(
    go.Scatter(
        x=x,
        y=y_hat.ravel(),
        name="y_hat"
        
    )
)
fig.update_layout(
    xaxis_title="x",
    yaxis_title="y"
)
fig.show()

在这里插入图片描述



版权声明:本文为qq_42800926原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。