文章目录
一、背景
开发过程中,我们遇到了堆外内存泄漏的问题——服务会出现
假死现象
(看似服务进程还跑着,但是没有业务响应)。
查看logback的error日志中记录下下面的信息:
[location-center:192.168.5.14:8017] [,] 2021-07-27 09:38:39.640 ERROR 53950 [lettuce-nioEventLoop-8-1] io.netty.util.ResourceLeakDetector LEAK: ByteBuf.release() was not called before it's garbage-collected. See https://netty.io/wiki/reference-counted-objects.html for more information.
Recent access records:
Created at:
io.netty.buffer.PooledByteBufAllocator.newDirectBuffer(PooledByteBufAllocator.java:349)
io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:187)
io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:178)
io.netty.buffer.AbstractByteBufAllocator.ioBuffer(AbstractByteBufAllocator.java:139)
io.netty.channel.DefaultMaxMessagesRecvByteBufAllocator$MaxMessageHandle.allocate(DefaultMaxMessagesRecvByteBufAllocator.java:114)
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:147)
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700)
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:635)
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:552)
io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514)
io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050)
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
java.lang.Thread.run(Thread.java:748)
[location-center:192.168.5.14:8017] [,] 2021-07-27 09:38:39.642 ERROR 53950 [lettuce-nioEventLoop-8-1] io.netty.util.ResourceLeakDetector LEAK: ByteBuf.release() was not called before it's garbage-collected. See https://netty.io/wiki/reference-counted-objects.html for more information.
Recent access records:
Created at:
io.netty.buffer.PooledByteBufAllocator.newDirectBuffer(PooledByteBufAllocator.java:349)
io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:187)
io.netty.buffer.AbstractByteBufAllocator.directBuffer(AbstractByteBufAllocator.java:178)
io.netty.buffer.AbstractByteBufAllocator.buffer(AbstractByteBufAllocator.java:115)
io.netty.handler.codec.ByteToMessageDecoder.expandCumulation(ByteToMessageDecoder.java:532)
io.netty.handler.codec.ByteToMessageDecoder$1.cumulate(ByteToMessageDecoder.java:92)
io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:279)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1422)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:931)
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163)
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700)
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:635)
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:552)
io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514)
io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050)
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
java.lang.Thread.run(Thread.java:748)
[location-center:192.168.5.14:8017] [,] 2021-07-27 10:23:56.884 ERROR 53950 [lettuce-nioEventLoop-8-1] io.netty.util.ResourceLeakDetector LEAK: ByteBuf.release() was not called before it's garbage-collected. See https://netty.io/wiki/reference-counted-objects.html for more information.
Recent access records:
Created at:
io.netty.buffer.SimpleLeakAwareByteBuf.unwrappedDerived(SimpleLeakAwareByteBuf.java:143)
io.netty.buffer.SimpleLeakAwareByteBuf.readRetainedSlice(SimpleLeakAwareByteBuf.java:67)
io.netty.handler.codec.DelimiterBasedFrameDecoder.decode(DelimiterBasedFrameDecoder.java:276)
io.netty.handler.codec.DelimiterBasedFrameDecoder.decode(DelimiterBasedFrameDecoder.java:218)
io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:503)
io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:442)
io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:281)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1422)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:931)
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163)
io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700)
io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:635)
io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:552)
io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514)
io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050)
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
java.lang.Thread.run(Thread.java:748)
心急的小伙伴,已经在开始百度的路上了,对不对?此时选择了百度,你就选择了“远路”……
🎃原来,在日志中Netty已经给我们提供了官方文档地址,官方文档对该问题进行了详细的说明。
[danger] 你是否看到了日志中的这个提示?
“See https://netty.io/wiki/reference-counted-objects.html for more information.”
二、Netty堆外内存回收机制
什么是堆外内存?
堆外内存也称为直接内存。Netty使用的ByteBuf分配的就是堆外内存,堆外内存是不受JVM垃圾回收机制控制的。
Netty堆外内存回收机制具体如何?谁来负责堆外内存的回收?
If a [sending] component is supposed to pass a reference-counted object to another [receiving] component, the sending component usually does not need to destroy it but defers that decision to the receiving component.
If a component consumes a reference-counted object and knows nothing else will access it anymore (i.e., does not pass along a reference to yet another component), the component should destroy it.
我们可以从
官方文档
中获取更加清晰的认识,总结如下:
- Netty堆外内存回收算法是基于引用计数的;
- 如果一个发送组件(sending component)企图传递一个基于引用计数对象,通常发送组件不需要回收该对象,而是交给消费组件。
-
如果代码块组件(component)
接收
基于引用计数的内存对象,且不再使用,该组件应负责
引用释放
——引用计数会-1。如:
ChannelHandler
就是典型的引用对象接收组件。
官方提供了判断是否需要引用释放的案例:
public ByteBuf a(ByteBuf input) {
input.writeByte(42);
return input;
}
public ByteBuf b(ByteBuf input) {
try {
output = input.alloc().directBuffer(input.readableBytes() + 1);
output.writeBytes(input);
output.writeByte(42);
return output;
} finally {
input.release();
}
}
public void c(ByteBuf input) {
System.out.println(input);
input.release();
}
public void main() {
...
ByteBuf buf = ...;
// This will print buf to System.out and destroy it.
c(b(a(buf)));
assert buf.refCnt() == 0;
}
事件说明 | 什么对象应该释放? | 什么对象释放成功了? |
---|---|---|
1.
creates
|
→
|
|
2.
calls
with
|
→
|
|
3.
returns
merely. |
→
|
|
4.
calls
with
|
→
|
|
5.
returns the copy of
|
→
,
→
|
releases
|
6.
calls
with
|
→
|
|
7.
swallows
|
→
|
releases
|
翻看netty源码,我们可以找到堆外内存回收的一些入口:
io.netty.buffer.AbstractReferenceCountedByteBuf#release()
。
入队消息对象如何回收?
上面的内容是不是看着有点绕,在Netty中Bytebuf一般用在ChannelHandler中,官方文档也提供了较为详细的案例。
-
入队消息(Inbound messages)触发读取(channelRead()),消费收到消息的handler应该负责调用
release()
方法:
如:
public void channelRead(ChannelHandlerContext ctx, Object msg) {
ByteBuf buf = (ByteBuf) msg;
try {
...
} finally {
buf.release();
}
}
- 消息解析器也会产生引用计数。
// Assuming your handler is placed next to `HttpRequestDecoder`
public void channelRead(ChannelHandlerContext ctx, Object msg) {
if (msg instanceof HttpRequest) {
HttpRequest req = (HttpRequest) msg;
...
}
if (msg instanceof HttpContent) {
HttpContent content = (HttpContent) msg;
try {
...
} finally {
content.release();
}
}
}
以上两种情况,在判别上有难度的话,Netty还提供更加简洁的方式来释放引用:
public void channelRead(ChannelHandlerContext ctx, Object msg) {
try {
...
} finally {
ReferenceCountUtil.release(msg);
}
}
当然,出队消息对象引用释放不当,也会造成堆外内存泄漏,更多内容欢迎自行查看官方文档,子涵再此也就不再一一翻译了。
三、堆外内存监控配置
堆外内存到底有没有被回收,那么,在netty中如何进行监控?Netty中有个对象
PlatformDependent.class
,其中的”DIRECT_MEMORY_COUNTER”记录了堆外内存占用的情况,我们可以通过反射获取该对象的值,从而达到监控的目的。
反射监控堆外内存
在Springboot项目中,我们可以定义一个定时任务,实时打印堆外内存的情况到日志中。
import io.netty.util.internal.PlatformDependent;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
@Configuration //1.主要用于标记配置类,兼备Component的效果。
@EnableScheduling // 2.开启定时任务
@Slf4j
public class SaticScheduleTask {
private static final int _1K = 1024;
private static final String BUSINESS_KEY = "netty-direct-memory";
private AtomicLong directMemory;
@PostConstruct
public void init(){
Field field = ReflectUtil.getField(PlatformDependent.class,"DIRECT_MEMORY_COUNTER");
field.setAccessible(true);
try {
directMemory = (AtomicLong) field.get(PlatformDependent.class);
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
/**
* 每隔5s统计一下堆外直接内存
*/
@Scheduled(fixedRate = 30000)
public void report(){
int memoryInKb = (int) (directMemory.get()/_1K);
log.info("{}:{}k",BUSINESS_KEY,memoryInKb);
}
}
服务器上监控堆外内存
cat location-center-info.log |grep netty-direct
结果如下:
[root@localhost location-center]# cat location-center-info.log |grep netty-direct
[location-center:192.168.5.12:8017] [dce11c40a9bc3f77,dce11c40a9bc3f77] 2021-08-02 09:05:42.332 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:32768k
[location-center:192.168.5.12:8017] [f486d613826fab3c,f486d613826fab3c] 2021-08-02 09:06:12.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [a94a3c3012804430,a94a3c3012804430] 2021-08-02 09:06:42.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [2c1e844ae37e00e0,2c1e844ae37e00e0] 2021-08-02 09:07:12.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [67106bcae0fb116d,67106bcae0fb116d] 2021-08-02 09:07:42.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [b9a673e7118c05d7,b9a673e7118c05d7] 2021-08-02 09:08:12.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [ddfe5f1003ac4514,ddfe5f1003ac4514] 2021-08-02 09:08:42.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [cd1fbb7b9c84423d,cd1fbb7b9c84423d] 2021-08-02 09:09:12.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [fd897a29ec939f7f,fd897a29ec939f7f] 2021-08-02 09:09:42.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
[location-center:192.168.5.12:8017] [48893694a422f0ae,48893694a422f0ae] 2021-08-02 09:10:12.285 INFO 29468 [pool-3-thread-1] com.keyou.evm.location.task.SaticScheduleTask netty-direct-memory:131072k
四、其他案例
Netty堆外内存泄漏还有一些其他场景。本小册不再一一列举,我读过的一些优秀文章希望可以作为大家的参考。
本文已收录在
《从NIO到Netty,实战出租车905.4协议》