001package org.apache.commons.jcs3.auxiliary.remote; 002 003import java.io.IOException; 004 005/* 006 * Licensed to the Apache Software Foundation (ASF) under one 007 * or more contributor license agreements. See the NOTICE file 008 * distributed with this work for additional information 009 * regarding copyright ownership. The ASF licenses this file 010 * to you under the Apache License, Version 2.0 (the 011 * "License"); you may not use this file except in compliance 012 * with the License. You may obtain a copy of the License at 013 * 014 * http://www.apache.org/licenses/LICENSE-2.0 015 * 016 * Unless required by applicable law or agreed to in writing, 017 * software distributed under the License is distributed on an 018 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 019 * KIND, either express or implied. See the License for the 020 * specific language governing permissions and limitations 021 * under the License. 022 */ 023 024import java.util.List; 025import java.util.ListIterator; 026import java.util.concurrent.atomic.AtomicBoolean; 027 028import org.apache.commons.jcs3.auxiliary.remote.behavior.IRemoteCacheAttributes; 029import org.apache.commons.jcs3.auxiliary.remote.server.behavior.RemoteType; 030import org.apache.commons.jcs3.engine.CacheStatus; 031import org.apache.commons.jcs3.engine.behavior.ICache; 032import org.apache.commons.jcs3.engine.behavior.IElementSerializer; 033import org.apache.commons.jcs3.engine.logging.behavior.ICacheEventLogger; 034import org.apache.commons.jcs3.log.Log; 035import org.apache.commons.jcs3.log.LogManager; 036 037/** 038 * Used to provide access to multiple services under nowait protection. Factory should construct 039 * NoWaitFacade to give to the composite cache out of caches it constructs from the varies manager 040 * to lateral services. 041 * <p> 042 * Typically, we only connect to one remote server per facade. We use a list of one 043 * RemoteCacheNoWait. 044 */ 045public class RemoteCacheNoWaitFacade<K, V> 046 extends AbstractRemoteCacheNoWaitFacade<K, V> 047{ 048 /** log instance */ 049 private static final Log log = LogManager.getLog( RemoteCacheNoWaitFacade.class ); 050 051 /** Provide factory instance to RemoteCacheFailoverRunner */ 052 private final RemoteCacheFactory cacheFactory; 053 054 /** Attempt to restore primary connection (switched off for testing) */ 055 protected boolean attemptRestorePrimary = true; 056 057 /** Time in ms to sleep between failover attempts */ 058 private static final long idlePeriod = 20000L; 059 060 /** 061 * Constructs with the given remote cache, and fires events to any listeners. 062 * <p> 063 * @param noWaits 064 * @param rca 065 * @param cacheEventLogger 066 * @param elementSerializer 067 * @param cacheFactory 068 */ 069 public RemoteCacheNoWaitFacade( final List<RemoteCacheNoWait<K,V>> noWaits, 070 final IRemoteCacheAttributes rca, 071 final ICacheEventLogger cacheEventLogger, 072 final IElementSerializer elementSerializer, 073 final RemoteCacheFactory cacheFactory) 074 { 075 super( noWaits, rca, cacheEventLogger, elementSerializer ); 076 this.cacheFactory = cacheFactory; 077 } 078 079 /** 080 * Begin the failover process if this is a local cache. Clustered remote caches do not failover. 081 * <p> 082 * @param rcnw The no wait in error. 083 */ 084 @Override 085 protected void failover( final RemoteCacheNoWait<K, V> rcnw ) 086 { 087 log.debug( "in failover for {0}", rcnw ); 088 089 if ( getAuxiliaryCacheAttributes().getRemoteType() == RemoteType.LOCAL ) 090 { 091 if ( rcnw.getStatus() == CacheStatus.ERROR ) 092 { 093 // start failover, primary recovery process 094 final Thread runner = new Thread(this::connectAndRestore); 095 runner.setDaemon( true ); 096 runner.start(); 097 098 if ( getCacheEventLogger() != null ) 099 { 100 getCacheEventLogger().logApplicationEvent( "RemoteCacheNoWaitFacade", "InitiatedFailover", 101 rcnw + " was in error." ); 102 } 103 } 104 else 105 { 106 log.info( "The noWait is not in error" ); 107 } 108 } 109 } 110 111 /** 112 * The thread tries to establish a connection with a failover 113 * server, if any are defined. Once a failover connection is made, it will 114 * attempt to replace the failover with the primary remote server. 115 * <p> 116 * It works by switching out the RemoteCacheNoWait inside the Facade. 117 * <p> 118 * Client (i.e.) the CompositeCache has reference to a RemoteCacheNoWaitFacade. 119 * This facade is created by the RemoteCacheFactory. The factory maintains a set 120 * of managers, one for each remote server. Typically, there will only be one 121 * manager. 122 * <p> 123 * If you use multiple remote servers, you may want to set one or more as 124 * failovers. If a local cache cannot connect to the primary server, or looses 125 * its connection to the primary server, it will attempt to restore that 126 * Connection in the background. If failovers are defined, the Failover runner 127 * will try to connect to a failover until the primary is restored. 128 * If no failovers are defined, this will exit automatically. 129 * 130 * @since 3.1 131 */ 132 protected void connectAndRestore() 133 { 134 final IRemoteCacheAttributes rca0 = getAuxiliaryCacheAttributes(); 135 // Each RemoteCacheManager corresponds to one remote connection. 136 final List<RemoteLocation> failovers = rca0.getFailovers(); 137 // we should probably check to see if there are any failovers, 138 // even though the caller should have already. 139 140 if ( failovers == null ) 141 { 142 log.warn( "Remote is misconfigured, failovers was null." ); 143 return; 144 } 145 if ( failovers.size() == 1 ) 146 { 147 // if there is only the primary, return out of this 148 log.info( "No failovers defined, exiting failover runner." ); 149 return; 150 } 151 152 final AtomicBoolean allright = new AtomicBoolean(false); 153 154 do 155 { 156 log.info( "Remote cache FAILOVER RUNNING." ); 157 158 // there is no active listener 159 if ( !allright.get() ) 160 { 161 // Monitor each RemoteCacheManager instance one after the other. 162 final int fidx = rca0.getFailoverIndex(); 163 log.debug( "fidx = {0} failovers.size = {1}", rca0::getFailoverIndex, failovers::size); 164 165 // If we don't check the primary, if it gets connected in the 166 // background, 167 // we will disconnect it only to put it right back 168 final ListIterator<RemoteLocation> i = failovers.listIterator(fidx); // + 1; // +1 skips the primary 169 log.debug( "starting at failover i = {0}", i ); 170 171 // try them one at a time until successful 172 while (i.hasNext() && !allright.get()) 173 { 174 final int failoverIndex = i.nextIndex(); 175 final RemoteLocation server = i.next(); 176 log.debug("Trying server [{0}] at failover index i = {1}", server, failoverIndex); 177 178 final RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone(); 179 rca.setRemoteLocation(server); 180 final RemoteCacheManager rcm = cacheFactory.getManager( rca ); 181 182 log.debug( "RemoteCacheAttributes for failover = {0}", rca ); 183 184 if (rcm != null) 185 { 186 // add a listener if there are none, need to tell rca 187 // what number it is at 188 final ICache<K, V> ic = rcm.getCache( rca ); 189 if ( ic.getStatus() == CacheStatus.ALIVE ) 190 { 191 // may need to do this more gracefully 192 log.debug( "resetting no wait" ); 193 restorePrimaryServer((RemoteCacheNoWait<K, V>) ic); 194 rca0.setFailoverIndex(failoverIndex); 195 196 log.debug("setting ALLRIGHT to true"); 197 if (i.hasPrevious()) 198 { 199 log.debug("Moving to Primary Recovery Mode, failover index = {0}", failoverIndex); 200 } 201 else 202 { 203 log.debug("No need to connect to failover, the primary server is back up."); 204 } 205 206 allright.set(true); 207 208 log.info( "CONNECTED to host = [{0}]", rca::getRemoteLocation); 209 } 210 } 211 } 212 } 213 // end if !allright 214 // get here if while index >0 and allright, meaning that we are 215 // connected to some backup server. 216 else 217 { 218 log.debug( "ALLRIGHT is true " ); 219 log.info( "Failover runner is in primary recovery mode. " 220 + "Failover index = {0} Will now try to reconnect to " 221 + "primary server.", rca0::getFailoverIndex); 222 } 223 224 // Exit loop if in test mode 225 if (allright.get() && !attemptRestorePrimary) 226 { 227 break; 228 } 229 230 boolean primaryRestoredSuccessfully = false; 231 // if we are not connected to the primary, try. 232 if (rca0.getFailoverIndex() > 0) 233 { 234 primaryRestoredSuccessfully = restorePrimary(); 235 log.debug( "Primary recovery success state = {0}", 236 primaryRestoredSuccessfully ); 237 } 238 239 if (!primaryRestoredSuccessfully) 240 { 241 // Time driven mode: sleep between each round of recovery attempt. 242 try 243 { 244 log.warn( "Failed to reconnect to primary server. " 245 + "Cache failover runner is going to sleep for " 246 + "{0} milliseconds.", idlePeriod ); 247 Thread.sleep( idlePeriod ); 248 } 249 catch ( final InterruptedException ex ) 250 { 251 // ignore; 252 } 253 } 254 255 // try to bring the listener back to the primary 256 } 257 while (rca0.getFailoverIndex() > 0 || !allright.get()); 258 // continue if the primary is not restored or if things are not allright. 259 260 if ( log.isInfoEnabled() ) 261 { 262 final int failoverIndex = rca0.getFailoverIndex(); 263 log.info( "Exiting failover runner. Failover index = {0}", failoverIndex); 264 265 if ( failoverIndex <= 0 ) 266 { 267 log.info( "Failover index is <= 0, meaning we are not connected to a failover server." ); 268 } 269 else 270 { 271 log.info( "Failover index is > 0, meaning we are connected to a failover server." ); 272 } 273 } 274 } 275 276 /** 277 * Try to restore the primary server. 278 * <p> 279 * Once primary is restored the failover listener must be deregistered. 280 * <p> 281 * The primary server is the first server defines in the FailoverServers 282 * list. 283 * 284 * @return boolean value indicating whether the restoration was successful 285 */ 286 private boolean restorePrimary() 287 { 288 final IRemoteCacheAttributes rca0 = getAuxiliaryCacheAttributes(); 289 // try to move back to the primary 290 final RemoteLocation server = rca0.getFailovers().get(0); 291 292 log.info( "Trying to restore connection to primary remote server " 293 + "[{0}]", server ); 294 295 final RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone(); 296 rca.setRemoteLocation(server); 297 final RemoteCacheManager rcm = cacheFactory.getManager( rca ); 298 299 if (rcm != null) 300 { 301 // add a listener if there are none, need to tell rca what number it 302 // is at 303 final ICache<K, V> ic = rcm.getCache( rca ); 304 // by default the listener id should be 0, else it will be the 305 // listener 306 // Originally associated with the remote cache. either way is fine. 307 // We just don't want the listener id from a failover being used. 308 // If the remote server was rebooted this could be a problem if new 309 // locals were also added. 310 311 if ( ic.getStatus() == CacheStatus.ALIVE ) 312 { 313 try 314 { 315 // we could have more than one listener registered right 316 // now. 317 // this will not result in a loop, only duplication 318 // stop duplicate listening. 319 if (getPrimaryServer() != null && getPrimaryServer().getStatus() == CacheStatus.ALIVE ) 320 { 321 final int fidx = rca0.getFailoverIndex(); 322 323 if ( fidx > 0 ) 324 { 325 final RemoteLocation serverOld = rca0.getFailovers().get(fidx); 326 327 log.debug( "Failover Index = {0} the server at that " 328 + "index is [{1}]", fidx, serverOld ); 329 330 if ( serverOld != null ) 331 { 332 // create attributes that reflect the 333 // previous failed over configuration. 334 final RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) rca0.clone(); 335 rcaOld.setRemoteLocation(serverOld); 336 final RemoteCacheManager rcmOld = cacheFactory.getManager( rcaOld ); 337 338 if ( rcmOld != null ) 339 { 340 // manager can remove by name if 341 // necessary 342 rcmOld.removeRemoteCacheListener( rcaOld ); 343 } 344 log.info( "Successfully deregistered from " 345 + "FAILOVER remote server = {0}", serverOld ); 346 } 347 } 348 else if ( fidx == 0 ) 349 { 350 // this should never happen. If there are no 351 // failovers this shouldn't get called. 352 if ( log.isDebugEnabled() ) 353 { 354 log.debug( "No need to restore primary, it is already restored." ); 355 return true; 356 } 357 } 358 else { 359 // this should never happen 360 log.warn( "Failover index is less than 0, this shouldn't happen" ); 361 } 362 } 363 } 364 catch ( final IOException e ) 365 { 366 // TODO, should try again, or somehow stop the listener 367 log.error("Trouble trying to deregister old failover " 368 + "listener prior to restoring the primary = {0}", 369 server, e ); 370 } 371 372 // Restore primary 373 // may need to do this more gracefully, letting the failover finish in the background 374 final RemoteCacheNoWait<K, V> failoverNoWait = getPrimaryServer(); 375 376 // swap in a new one 377 restorePrimaryServer((RemoteCacheNoWait<K, V>) ic); 378 rca0.setFailoverIndex( 0 ); 379 380 final String message = "Successfully reconnected to PRIMARY " 381 + "remote server. Substituted primary for " 382 + "failoverNoWait [" + failoverNoWait + "]"; 383 log.info( message ); 384 385 if (getCacheEventLogger() != null) 386 { 387 getCacheEventLogger().logApplicationEvent( 388 "RemoteCacheFailoverRunner", "RestoredPrimary", 389 message ); 390 } 391 return true; 392 } 393 } 394 395 // else all right 396 // if the failover index was at 0 here, we would be in a bad 397 // situation, unless there were just 398 // no failovers configured. 399 log.debug( "Primary server status in error, not connected." ); 400 401 return false; 402 } 403}