Commit 97297fd6 authored by Dave Cridland's avatar Dave Cridland

Merge pull request #159 from tevans/OF-864

OF-864: Cleanup routes to defunct cluster nodes
parents 7070fbdb 1c961a26
......@@ -233,22 +233,32 @@ public class RoutingTableImpl extends BasicModule implements RoutingTable, Clust
*/
public void routePacket(JID jid, Packet packet, boolean fromServer) throws PacketException {
boolean routed = false;
if (serverName.equals(jid.getDomain())) {
// Packet sent to our domain.
routed = routeToLocalDomain(jid, packet, fromServer);
}
else if (jid.getDomain().endsWith(serverName) && hasComponentRoute(jid)) {
// Packet sent to component hosted in this server
routed = routeToComponent(jid, packet, routed);
}
else {
// Packet sent to remote server
routed = routeToRemoteDomain(jid, packet, routed);
try {
if (serverName.equals(jid.getDomain())) {
// Packet sent to our domain.
routed = routeToLocalDomain(jid, packet, fromServer);
}
else if (jid.getDomain().endsWith(serverName) && hasComponentRoute(jid)) {
// Packet sent to component hosted in this server
routed = routeToComponent(jid, packet, routed);
}
else {
// Packet sent to remote server
routed = routeToRemoteDomain(jid, packet, routed);
}
} catch (Exception ex) {
// Catch here to ensure that all packets get handled, despite various processing
// exceptions, rather than letting any fall through the cracks. For example,
// an IAE could be thrown when running in a cluster if a remote member becomes
// unavailable before the routing caches are updated to remove the defunct node.
// We have also occasionally seen various flavors of NPE and other oddities,
// typically due to unexpected environment or logic breakdowns.
Log.error("Primary packet routing failed", ex);
}
if (!routed) {
if (Log.isDebugEnabled()) {
Log.debug("RoutingTableImpl: Failed to route packet to JID: {} packet: {}", jid, packet.toXML());
Log.debug("Failed to route packet to JID: {} packet: {}", jid, packet.toXML());
}
if (packet instanceof IQ) {
iqRouter.routingFailed(jid, packet);
......@@ -972,7 +982,69 @@ public class RoutingTableImpl extends BasicModule implements RoutingTable, Clust
}
public void leftCluster(byte[] nodeID) {
// Do nothing
// When a peer server leaves the cluster, any remote routes that were
// associated with the defunct node must be dropped from the routing
// caches that are shared by the remaining cluster member(s).
// drop routes for all client sessions connected via the defunct cluster node
Lock clientLock = CacheFactory.getLock(nodeID, usersCache);
try {
clientLock.lock();
List<String> remoteClientRoutes = new ArrayList<String>();
for (Map.Entry<String, ClientRoute> entry : usersCache.entrySet()) {
if (entry.getValue().getNodeID().equals(nodeID)) {
remoteClientRoutes.add(entry.getKey());
}
}
for (Map.Entry<String, ClientRoute> entry : anonymousUsersCache.entrySet()) {
if (entry.getValue().getNodeID().equals(nodeID)) {
remoteClientRoutes.add(entry.getKey());
}
}
for (String route : remoteClientRoutes) {
removeClientRoute(new JID(route));
}
}
finally {
clientLock.unlock();
}
// remove routes for server domains that were accessed through the defunct node
Lock serverLock = CacheFactory.getLock(nodeID, serversCache);
try {
serverLock.lock();
List<String> remoteServerDomains = new ArrayList<String>();
for (Map.Entry<String, byte[]> entry : serversCache.entrySet()) {
if (entry.getValue().equals(nodeID)) {
remoteServerDomains.add(entry.getKey());
}
}
for (String domain : remoteServerDomains) {
removeServerRoute(new JID(domain));
}
}
finally {
serverLock.unlock();
}
// remove component routes for the defunct node
Lock componentLock = CacheFactory.getLock(nodeID, componentsCache);
try {
componentLock.lock();
List<String> remoteComponents = new ArrayList<String>();
for (Map.Entry<String, Set<NodeID>> entry : componentsCache.entrySet()) {
if (entry.getValue().remove(nodeID) && entry.getValue().size() == 0) {
remoteComponents.add(entry.getKey());
}
}
for (String jid : remoteComponents) {
removeComponentRoute(new JID(jid));
}
}
finally {
componentLock.unlock();
}
}
public void markedAsSeniorClusterMember() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment