001package org.apache.commons.jcs.auxiliary.remote; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.IOException; 023import java.util.List; 024import java.util.ListIterator; 025 026import org.apache.commons.jcs.auxiliary.AbstractAuxiliaryCacheMonitor; 027import org.apache.commons.jcs.auxiliary.remote.behavior.IRemoteCacheAttributes; 028import org.apache.commons.jcs.engine.CacheStatus; 029import org.apache.commons.jcs.engine.behavior.ICache; 030 031/** 032 * The RemoteCacheFailoverRunner tries to establish a connection with a failover 033 * server, if any are defined. Once a failover connection is made, it will 034 * attempt to replace the failover with the primary remote server. 035 * <p> 036 * It works by switching out the RemoteCacheNoWait inside the Facade. 037 * <p> 038 * Client (i.e.) the CompositeCache has reference to a RemoteCacheNoWaitFacade. 039 * This facade is created by the RemoteCacheFactory. The factory maintains a set 040 * of managers, one for each remote server. Typically, there will only be one 041 * manager. 042 * <p> 043 * If you use multiple remote servers, you may want to set one or more as 044 * failovers. If a local cache cannot connect to the primary server, or looses 045 * its connection to the primary server, it will attempt to restore that 046 * Connection in the background. If failovers are defined, the Failover runner 047 * will try to connect to a failover until the primary is restored. 048 * 049 */ 050public class RemoteCacheFailoverRunner<K, V> extends AbstractAuxiliaryCacheMonitor 051{ 052 /** The facade returned to the composite cache. */ 053 private final RemoteCacheNoWaitFacade<K, V> facade; 054 055 /** Factory instance */ 056 private final RemoteCacheFactory cacheFactory; 057 058 /** 059 * Constructor for the RemoteCacheFailoverRunner object. This allows the 060 * FailoverRunner to modify the facade that the CompositeCache references. 061 * 062 * @param facade the facade the CompositeCache talks to. 063 * @param cacheFactory the cache factory instance 064 */ 065 public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade<K, V> facade, RemoteCacheFactory cacheFactory ) 066 { 067 super("JCS-RemoteCacheFailoverRunner"); 068 this.facade = facade; 069 this.cacheFactory = cacheFactory; 070 setIdlePeriod(20000L); 071 } 072 073 /** 074 * Clean up all resources before shutdown 075 */ 076 @Override 077 protected void dispose() 078 { 079 // empty 080 } 081 082 /** 083 * do actual work 084 */ 085 @Override 086 protected void doWork() 087 { 088 // empty 089 } 090 091 092 /** 093 * Main processing method for the RemoteCacheFailoverRunner object. 094 * <p> 095 * If we do not have a connection with any failover server, this will try to 096 * connect one at a time. If no connection can be made, it goes to sleep for 097 * a while (20 seconds). 098 * <p> 099 * Once a connection with a failover is made, we will try to reconnect to 100 * the primary server. 101 * <p> 102 * The primary server is the first server defines in the FailoverServers 103 * list. 104 */ 105 @Override 106 public void run() 107 { 108 // start the main work of connecting to a failover and then restoring 109 // the primary. 110 connectAndRestore(); 111 112 if ( log.isInfoEnabled() ) 113 { 114 int failoverIndex = facade.getAuxiliaryCacheAttributes().getFailoverIndex(); 115 log.info( "Exiting failover runner. Failover index = " + failoverIndex); 116 117 if ( failoverIndex <= 0 ) 118 { 119 log.info( "Failover index is <= 0, meaning we are not connected to a failover server." ); 120 } 121 else if ( failoverIndex > 0 ) 122 { 123 log.info( "Failover index is > 0, meaning we are connected to a failover server." ); 124 } 125 // log if we are allright or not. 126 } 127 } 128 129 /** 130 * This is the main loop. If there are failovers defined, then this will 131 * continue until the primary is re-connected. If no failovers are defined, 132 * this will exit automatically. 133 */ 134 private void connectAndRestore() 135 { 136 IRemoteCacheAttributes rca0 = facade.getAuxiliaryCacheAttributes(); 137 138 do 139 { 140 log.info( "Remote cache FAILOVER RUNNING." ); 141 142 // there is no active listener 143 if ( !allright.get() ) 144 { 145 // Monitor each RemoteCacheManager instance one after the other. 146 // Each RemoteCacheManager corresponds to one remote connection. 147 List<RemoteLocation> failovers = rca0.getFailovers(); 148 // we should probably check to see if there are any failovers, 149 // even though the caller 150 // should have already. 151 152 if ( failovers == null ) 153 { 154 log.warn( "Remote is misconfigured, failovers was null." ); 155 return; 156 } 157 else if ( failovers.size() == 1 ) 158 { 159 // if there is only the primary, return out of this 160 log.info( "No failovers defined, exiting failover runner." ); 161 return; 162 } 163 164 int fidx = rca0.getFailoverIndex(); 165 log.debug( "fidx = " + fidx + " failovers.size = " + failovers.size() ); 166 167 // shouldn't we see if the primary is backup? 168 // If we don't check the primary, if it gets connected in the 169 // background, 170 // we will disconnect it only to put it right back 171 ListIterator<RemoteLocation> i = failovers.listIterator(fidx); // + 1; // +1 skips the primary 172 if ( log.isDebugEnabled() ) 173 { 174 log.debug( "starting at failover i = " + i.nextIndex() ); 175 } 176 177 // try them one at a time until successful 178 for ( ; i.hasNext() && !allright.get();) 179 { 180 RemoteLocation server = i.next(); 181 if ( log.isDebugEnabled() ) 182 { 183 log.debug( "Trying server [" + server + "] at failover index i = " + i ); 184 } 185 186 RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone(); 187 rca.setRemoteLocation(server); 188 RemoteCacheManager rcm = cacheFactory.getManager( rca ); 189 190 if ( log.isDebugEnabled() ) 191 { 192 log.debug( "RemoteCacheAttributes for failover = " + rca.toString() ); 193 } 194 195 if (rcm != null) 196 { 197 // add a listener if there are none, need to tell rca 198 // what number it is at 199 ICache<K, V> ic = rcm.getCache( rca ); 200 if ( ic.getStatus() == CacheStatus.ALIVE ) 201 { 202 // may need to do this more gracefully 203 log.debug( "resetting no wait" ); 204 facade.restorePrimaryServer((RemoteCacheNoWait<K, V>) ic); 205 rca0.setFailoverIndex( i.nextIndex() ); 206 207 if ( log.isDebugEnabled() ) 208 { 209 log.debug( "setting ALLRIGHT to true" ); 210 if ( i.hasPrevious() ) 211 { 212 log.debug( "Moving to Primary Recovery Mode, failover index = " + i.nextIndex() ); 213 } 214 else 215 { 216 log.debug( "No need to connect to failover, the primary server is back up." ); 217 } 218 } 219 220 allright.set(true); 221 222 if ( log.isInfoEnabled() ) 223 { 224 log.info( "CONNECTED to host = [" + rca.getRemoteLocation() + "]" ); 225 } 226 } 227 } 228 } 229 } 230 // end if !allright 231 // get here if while index >0 and allright, meaning that we are 232 // connected to some backup server. 233 else 234 { 235 if ( log.isDebugEnabled() ) 236 { 237 log.debug( "ALLRIGHT is true " ); 238 } 239 if ( log.isInfoEnabled() ) 240 { 241 log.info( "Failover runner is in primary recovery mode. Failover index = " 242 + rca0.getFailoverIndex() + "\n" + "Will now try to reconnect to primary server." ); 243 } 244 } 245 246 boolean primaryRestoredSuccessfully = false; 247 // if we are not connected to the primary, try. 248 if ( rca0.getFailoverIndex() > 0 ) 249 { 250 primaryRestoredSuccessfully = restorePrimary(); 251 if ( log.isDebugEnabled() ) 252 { 253 log.debug( "Primary recovery success state = " + primaryRestoredSuccessfully ); 254 } 255 } 256 257 if ( !primaryRestoredSuccessfully ) 258 { 259 // Time driven mode: sleep between each round of recovery 260 // attempt. 261 try 262 { 263 log.warn( "Failed to reconnect to primary server. Cache failover runner is going to sleep for " 264 + idlePeriod + " milliseconds." ); 265 Thread.sleep( idlePeriod ); 266 } 267 catch ( InterruptedException ex ) 268 { 269 // ignore; 270 } 271 } 272 273 // try to bring the listener back to the primary 274 } 275 while ( rca0.getFailoverIndex() > 0 || !allright.get() ); 276 // continue if the primary is not restored or if things are not allright. 277 } 278 279 /** 280 * Try to restore the primary server. 281 * <p> 282 * Once primary is restored the failover listener must be deregistered. 283 * <p> 284 * The primary server is the first server defines in the FailoverServers 285 * list. 286 * 287 * @return boolean value indicating whether the restoration was successful 288 */ 289 private boolean restorePrimary() 290 { 291 IRemoteCacheAttributes rca0 = facade.getAuxiliaryCacheAttributes(); 292 // try to move back to the primary 293 RemoteLocation server = rca0.getFailovers().get(0); 294 295 if ( log.isInfoEnabled() ) 296 { 297 log.info( "Trying to restore connection to primary remote server [" + server + "]" ); 298 } 299 300 RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone(); 301 rca.setRemoteLocation(server); 302 RemoteCacheManager rcm = cacheFactory.getManager( rca ); 303 304 if (rcm != null) 305 { 306 // add a listener if there are none, need to tell rca what number it 307 // is at 308 ICache<K, V> ic = rcm.getCache( rca ); 309 // by default the listener id should be 0, else it will be the 310 // listener 311 // Originally associated with the remote cache. either way is fine. 312 // We just don't want the listener id from a failover being used. 313 // If the remote server was rebooted this could be a problem if new 314 // locals were also added. 315 316 if ( ic.getStatus() == CacheStatus.ALIVE ) 317 { 318 try 319 { 320 // we could have more than one listener registered right 321 // now. 322 // this will not result in a loop, only duplication 323 // stop duplicate listening. 324 if ( facade.getPrimaryServer() != null && facade.getPrimaryServer().getStatus() == CacheStatus.ALIVE ) 325 { 326 int fidx = rca0.getFailoverIndex(); 327 328 if ( fidx > 0 ) 329 { 330 RemoteLocation serverOld = rca0.getFailovers().get(fidx); 331 332 if ( log.isDebugEnabled() ) 333 { 334 log.debug( "Failover Index = " + fidx + " the server at that index is [" 335 + serverOld + "]" ); 336 } 337 338 if ( serverOld != null ) 339 { 340 // create attributes that reflect the 341 // previous failed over configuration. 342 RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) rca0.clone(); 343 rcaOld.setRemoteLocation(serverOld); 344 RemoteCacheManager rcmOld = cacheFactory.getManager( rcaOld ); 345 346 if ( rcmOld != null ) 347 { 348 // manager can remove by name if 349 // necessary 350 rcmOld.removeRemoteCacheListener( rcaOld ); 351 } 352 if ( log.isInfoEnabled() ) 353 { 354 log.info( "Successfully deregistered from FAILOVER remote server = " 355 + serverOld ); 356 } 357 } 358 } 359 else if ( fidx == 0 ) 360 { 361 // this should never happen. If there are no 362 // failovers this shouldn't get called. 363 if ( log.isDebugEnabled() ) 364 { 365 log.debug( "No need to restore primary, it is already restored." ); 366 return true; 367 } 368 } 369 else if ( fidx < 0 ) 370 { 371 // this should never happen 372 log.warn( "Failover index is less than 0, this shouldn't happen" ); 373 } 374 } 375 } 376 catch ( IOException e ) 377 { 378 // TODO, should try again, or somehow stop the listener 379 log.error("Trouble trying to deregister old failover listener prior to restoring the primary = " 380 + server, e ); 381 } 382 383 // Restore primary 384 // may need to do this more gracefully, letting the failover finish in the background 385 RemoteCacheNoWait<K, V> failoverNoWait = facade.getPrimaryServer(); 386 387 // swap in a new one 388 facade.restorePrimaryServer((RemoteCacheNoWait<K, V>) ic); 389 rca0.setFailoverIndex( 0 ); 390 391 if ( log.isInfoEnabled() ) 392 { 393 String message = "Successfully reconnected to PRIMARY remote server. Substituted primary for failoverNoWait [" 394 + failoverNoWait + "]"; 395 log.info( message ); 396 397 if ( facade.getCacheEventLogger() != null ) 398 { 399 facade.getCacheEventLogger().logApplicationEvent( "RemoteCacheFailoverRunner", "RestoredPrimary", message ); 400 } 401 } 402 return true; 403 } 404 } 405 406 // else all right 407 // if the failover index was at 0 here, we would be in a bad 408 // situation, unless there were just 409 // no failovers configured. 410 if ( log.isDebugEnabled() ) 411 { 412 log.debug( "Primary server status in error, not connected." ); 413 } 414 415 return false; 416 } 417}