在 foreach rdd中执行,重新广播。可以基于job 批次数量 或者是 时间 public class BroadcastStringPeriodicUpdater { private static final int PERIOD = 60 * 1000; private static volatile BroadcastStringPeriodicUpdater instance; private Broadcast<String> broadcast; private long lastUpdate = 0L; private BroadcastStringPeriodicUpdater() {} public static BroadcastStringPeriodicUpdater getInstance() { if (instance == null) { synchronized (BroadcastStringPeriodicUpdater.class) { if (instance == null) { instance = new BroadcastStringPeriodicUpdater(); } } } return instance; } public String updateAndGet(SparkContext sc) { long now = System.currentTimeMillis(); long offset = now - lastUpdate; if (offset > PERIOD || broadcast == null) { if (broadcast != null) { broadcast.unpersist(); } lastUpdate = now; String value = fetchBroadcastValue(); broadcast = JavaSparkContext.fromSparkContext(sc).broadcast(value); } return broadcast.getValue(); } private String fetchBroadcastValue() { } }
用的时候就可以这样用
String broadcastValue = BroadcastStringPeriodicUpdater.getInstance().updateAndGet(rdd.context());
版权声明:本文为u013939918原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。