View Javadoc

1   package org.apache.jcs.auxiliary.remote;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.Serializable;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.jcs.engine.CacheStatus;
28  import org.apache.jcs.engine.behavior.ICache;
29  import org.apache.jcs.engine.behavior.ICompositeCacheManager;
30  import org.apache.jcs.engine.behavior.IElementSerializer;
31  import org.apache.jcs.engine.logging.behavior.ICacheEventLogger;
32  
33  /**
34   * The RemoteCacheFailoverRunner tries to establish a connection with a failover
35   * server, if any are defined. Once a failover connection is made, it will
36   * attempt to replace the failover with the primary remote server.
37   * <p>
38   * It works by switching out the RemoteCacheNoWait inside the Facade.
39   * <p>
40   * Client (i.e.) the CompositeCache has reference to a RemoteCacheNoWaitFacade.
41   * This facade is created by the RemoteCacheFactory. The factory maintains a set
42   * of managers, one for each remote server. Typically, there will only be one
43   * manager.
44   * <p>
45   * If you use multiple remote servers, you may want to set one or more as
46   * failovers. If a local cache cannot connect to the primary server, or looses
47   * its connection to the primary server, it will attempt to restore that
48   * Connection in the background. If failovers are defined, the Failover runner
49   * will try to connect to a failover until the primary is restored.
50   *
51   */
52  public class RemoteCacheFailoverRunner<K extends Serializable, V extends Serializable>
53      implements Runnable
54  {
55      /** The logger */
56      private final static Log log = LogFactory.getLog( RemoteCacheFailoverRunner.class );
57  
58      /** The facade returned to the composite cache. */
59      private final RemoteCacheNoWaitFacade<K, V> facade;
60  
61      /** How long to wait between reconnect attempts. */
62      private static long idlePeriod = 20 * 1000;
63  
64      /** Have we reconnected. */
65      private boolean alright = true;
66  
67      /** The cache manager */
68      private final ICompositeCacheManager cacheMgr;
69  
70      /** The event logger. */
71      private final ICacheEventLogger cacheEventLogger;
72  
73      /** The serializer. */
74      private final IElementSerializer elementSerializer;
75  
76      /**
77       * Constructor for the RemoteCacheFailoverRunner object. This allows the
78       * FailoverRunner to modify the facade that the CompositeCache references.
79       *
80       * @param facade
81       *            the facade the CompositeCache talks to.
82       * @param cacheMgr
83       * @param cacheEventLogger
84       * @param elementSerializer
85       */
86      public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade<K, V> facade, ICompositeCacheManager cacheMgr,
87                                        ICacheEventLogger cacheEventLogger, IElementSerializer elementSerializer )
88      {
89          this.facade = facade;
90          this.cacheMgr = cacheMgr;
91          this.cacheEventLogger = cacheEventLogger;
92          this.elementSerializer = elementSerializer;
93      }
94  
95      /**
96       * Notifies the cache monitor that an error occurred, and kicks off the
97       * error recovery process.
98       */
99      public void notifyError()
100     {
101         bad();
102         synchronized ( this )
103         {
104             notify();
105         }
106     }
107 
108     /**
109      * Main processing method for the RemoteCacheFailoverRunner object.
110      * <p>
111      * If we do not have a connection with any failover server, this will try to
112      * connect one at a time. If no connection can be made, it goes to sleep for
113      * a while (20 seconds).
114      * <p>
115      * Once a connection with a failover is made, we will try to reconnect to
116      * the primary server.
117      * <p>
118      * The primary server is the first server defines in the FailoverServers
119      * list.
120      */
121     public void run()
122     {
123         // start the main work of connecting to a failover and then restoring
124         // the primary.
125         connectAndRestore();
126 
127         if ( log.isInfoEnabled() )
128         {
129             log.info( "Exiting failover runner. Failover index = " + facade.remoteCacheAttributes.getFailoverIndex() );
130             if ( facade.remoteCacheAttributes.getFailoverIndex() <= 0 )
131             {
132                 log.info( "Failover index is <= 0, meaning we are not " + "connected to a failover server." );
133             }
134             else if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 )
135             {
136                 log.info( "Failover index is > 0, meaning we are " + "connected to a failover server." );
137             }
138             // log if we are alright or not.
139         }
140     }
141 
142     /**
143      * This is the main loop. If there are failovers defined, then this will
144      * continue until the primary is re-connected. If no failovers are defined,
145      * this will exit automatically.
146      */
147     @SuppressWarnings("unchecked") // No generic arrays in java
148     private void connectAndRestore()
149     {
150         do
151         {
152             log.info( "Remote cache FAILOVER RUNNING." );
153 
154             // there is no active listener
155             if ( !alright )
156             {
157                 // Monitor each RemoteCacheManager instance one after the other.
158                 // Each RemoteCacheManager corresponds to one remote connection.
159                 String[] failovers = facade.remoteCacheAttributes.getFailovers();
160                 // we should probably check to see if there are any failovers,
161                 // even though the caller
162                 // should have already.
163 
164                 if ( failovers == null )
165                 {
166                     log.warn( "Remote is misconfigured, failovers was null." );
167                     return;
168                 }
169                 else if ( failovers.length == 1 )
170                 {
171                     // if there is only the primary, return out of this
172                     if ( log.isInfoEnabled() )
173                     {
174                         log.info( "No failovers defined, exiting failover runner." );
175                         return;
176                     }
177                 }
178 
179                 int fidx = facade.remoteCacheAttributes.getFailoverIndex();
180                 log.debug( "fidx = " + fidx + " failovers.length = " + failovers.length );
181 
182                 // shouldn't we see if the primary is backup?
183                 // If we don't check the primary, if it gets connected in the
184                 // background,
185                 // we will disconnect it only to put it right back
186                 int i = fidx; // + 1; // +1 skips the primary
187                 if ( log.isDebugEnabled() )
188                 {
189                     log.debug( "stating at failover i = " + i );
190                 }
191 
192                 // try them one at a time until successful
193                 for ( ; i < failovers.length && !alright; i++ )
194                 {
195                     String server = failovers[i];
196                     if ( log.isDebugEnabled() )
197                     {
198                         log.debug( "Trying server [" + server + "] at failover index i = " + i );
199                     }
200 
201                     RemoteCacheAttributes rca = null;
202                     try
203                     {
204                         rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
205                         rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) );
206                         rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) );
207                         RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr, cacheEventLogger, elementSerializer );
208 
209                         if ( log.isDebugEnabled() )
210                         {
211                             log.debug( "RemoteCacheAttributes for failover = " + rca.toString() );
212                         }
213 
214                         // add a listener if there are none, need to tell rca
215                         // what number it is at
216                         ICache<K, V> ic = rcm.getCache( rca.getCacheName() );
217                         if ( ic != null )
218                         {
219                             if ( ic.getStatus() == CacheStatus.ALIVE )
220                             {
221                                 // may need to do this more gracefully
222                                 log.debug( "resetting no wait" );
223                                 facade.noWaits = new RemoteCacheNoWait[1];
224                                 facade.noWaits[0] = (RemoteCacheNoWait<K, V>) ic;
225                                 facade.remoteCacheAttributes.setFailoverIndex( i );
226 
227                                 synchronized ( this )
228                                 {
229                                     if ( log.isDebugEnabled() )
230                                     {
231                                         log.debug( "setting ALRIGHT to true" );
232                                         if ( i > 0 )
233                                         {
234                                             log.debug( "Moving to Primary Recovery Mode, failover index = " + i );
235                                         }
236                                         else
237                                         {
238                                             if ( log.isInfoEnabled() )
239                                             {
240                                                 String message = "No need to connect to failover, the primary server is back up.";
241                                                 log.info( message );
242                                             }
243                                         }
244                                     }
245 
246                                     alright = true;
247 
248                                     if ( log.isInfoEnabled() )
249                                     {
250                                         log.info( "CONNECTED to host = [" + rca.getRemoteHost() + "] port = ["
251                                             + rca.getRemotePort() + "]" );
252                                     }
253                                 }
254                             }
255                         }
256                         else
257                         {
258                             log.info( "noWait is null" );
259                         }
260                     }
261                     catch ( Exception ex )
262                     {
263                         bad();
264                         // Problem encountered in fixing the caches managed by a
265                         // RemoteCacheManager instance.
266                         // Soldier on to the next RemoteCacheManager instance.
267                         String remoteHost = (rca == null) ? "null" : rca.getRemoteHost();
268                         int remotePort = (rca == null) ? 0 : rca.getRemotePort();
269                         if ( i == 0 )
270                         {
271                             log.warn( "FAILED to connect, as expected, to primary" + remoteHost + ":"
272                                 + remotePort, ex );
273                         }
274                         else
275                         {
276                             log.error( "FAILED to connect to failover [" + remoteHost + ":"
277                                 + remotePort + "]", ex );
278                         }
279                     }
280                 }
281             }
282             // end if !alright
283             // get here if while index >0 and alright, meaning that we are
284             // connected to some backup server.
285             else
286             {
287                 if ( log.isDebugEnabled() )
288                 {
289                     log.debug( "ALRIGHT is true " );
290                 }
291                 if ( log.isInfoEnabled() )
292                 {
293                     log.info( "Failover runner is in primary recovery mode. Failover index = "
294                         + facade.remoteCacheAttributes.getFailoverIndex() + "\n" + "Will now try to reconnect to primary server." );
295                 }
296             }
297 
298             boolean primaryRestoredSuccessfully = false;
299             // if we are not connected to the primary, try.
300             if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 )
301             {
302                 primaryRestoredSuccessfully = restorePrimary();
303                 if ( log.isDebugEnabled() )
304                 {
305                     log.debug( "Primary recovery success state = " + primaryRestoredSuccessfully );
306                 }
307             }
308 
309             if ( !primaryRestoredSuccessfully )
310             {
311                 // Time driven mode: sleep between each round of recovery
312                 // attempt.
313                 try
314                 {
315                     log.warn( "Failed to reconnect to primary server. Cache failover runner is going to sleep for "
316                         + idlePeriod + " milliseconds." );
317                     Thread.sleep( idlePeriod );
318                 }
319                 catch ( InterruptedException ex )
320                 {
321                     // ignore;
322                 }
323             }
324 
325             // try to bring the listener back to the primary
326         }
327         while ( facade.remoteCacheAttributes.getFailoverIndex() > 0 || !alright );
328         // continue if the primary is not restored or if things are not alright.
329 
330     }
331 
332     /**
333      * Try to restore the primary server.
334      * <p>
335      * Once primary is restored the failover listener must be deregistered.
336      * <p>
337      * The primary server is the first server defines in the FailoverServers
338      * list.
339      *
340      * @return boolean value indicating whether the restoration was successful
341      */
342     @SuppressWarnings("unchecked") // No generic arrays in java
343     private boolean restorePrimary()
344     {
345         // try to move back to the primary
346         String[] failovers = facade.remoteCacheAttributes.getFailovers();
347         String server = failovers[0];
348 
349         if ( log.isInfoEnabled() )
350         {
351             log.info( "Trying to restore connection to primary remote server [" + server + "]" );
352         }
353 
354         try
355         {
356             RemoteCacheAttributes rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
357             rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) );
358             rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) );
359             RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr, cacheEventLogger, elementSerializer );
360 
361             // add a listener if there are none, need to tell rca what number it
362             // is at
363             ICache<K, V> ic = rcm.getCache( rca.getCacheName() );
364             // by default the listener id should be 0, else it will be the
365             // listener
366             // Originally associated with the remote cache. either way is fine.
367             // We just don't want the listener id from a failover being used.
368             // If the remote server was rebooted this could be a problem if new
369             // locals were also added.
370 
371             if ( ic != null )
372             {
373                 if ( ic.getStatus() == CacheStatus.ALIVE )
374                 {
375                     try
376                     {
377                         // we could have more than one listener registered right
378                         // now.
379                         // this will not result in a loop, only duplication
380                         // stop duplicate listening.
381                         if ( facade.noWaits[0] != null && facade.noWaits[0].getStatus() == CacheStatus.ALIVE )
382                         {
383                             int fidx = facade.remoteCacheAttributes.getFailoverIndex();
384 
385                             if ( fidx > 0 )
386                             {
387                                 String serverOld = failovers[fidx];
388 
389                                 if ( log.isDebugEnabled() )
390                                 {
391                                     log.debug( "Failover Index = " + fidx + " the server at that index is ["
392                                         + serverOld + "]" );
393                                 }
394 
395                                 if ( serverOld != null )
396                                 {
397                                     // create attributes that reflect the
398                                     // previous failed over configuration.
399                                     RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy();
400                                     rcaOld.setRemoteHost( serverOld.substring( 0, serverOld.indexOf( ":" ) ) );
401                                     rcaOld.setRemotePort( Integer.parseInt( serverOld.substring( serverOld
402                                         .indexOf( ":" ) + 1 ) ) );
403                                     RemoteCacheManager rcmOld = RemoteCacheManager.getInstance( rcaOld, cacheMgr, cacheEventLogger, elementSerializer );
404 
405                                     if ( rcmOld != null )
406                                     {
407                                         // manager can remove by name if
408                                         // necessary
409                                         rcmOld.removeRemoteCacheListener( rcaOld );
410                                     }
411                                     if ( log.isInfoEnabled() )
412                                     {
413                                         log.info( "Successfully deregistered from FAILOVER remote server = "
414                                             + serverOld );
415                                     }
416                                 }
417                             }
418                             else if ( fidx == 0 )
419                             {
420                                 // this should never happen. If there are no
421                                 // failovers this shouldn't get called.
422                                 if ( log.isDebugEnabled() )
423                                 {
424                                     log.debug( "No need to restore primary, it is already restored." );
425                                     return true;
426                                 }
427                             }
428                             else if ( fidx < 0 )
429                             {
430                                 // this should never happen
431                                 log.warn( "Failover index is less than 0, this shouldn't happen" );
432                             }
433                         }
434                     }
435                     catch ( IOException e )
436                     {
437                         // TODO, should try again, or somehow stop the listener
438                         log.error(
439                                    "Trouble trying to deregister old failover listener prior to restoring the primary = "
440                                        + server, e );
441                     }
442 
443                     // Restore primary
444                     // may need to do this more gracefully, letting the failover finish in the background
445                     RemoteCacheNoWait<K, V> failoverNoWait = facade.noWaits[0];
446 
447                     // swap in a new one
448                     facade.noWaits = new RemoteCacheNoWait[1];
449                     facade.noWaits[0] = (RemoteCacheNoWait<K, V>) ic;
450                     facade.remoteCacheAttributes.setFailoverIndex( 0 );
451 
452                     if ( log.isInfoEnabled() )
453                     {
454                         String message = "Successfully reconnected to PRIMARY remote server.  Substituted primary for failoverNoWait [" + failoverNoWait + "]";
455                         log.info( message );
456 
457                         if ( facade.getCacheEventLogger() != null )
458                         {
459                             facade.getCacheEventLogger().logApplicationEvent( "RemoteCacheFailoverRunner", "RestoredPrimary",
460                                                                               message );
461                         }
462                     }
463                     return true;
464                 }
465 
466                 // else all right
467                 // if the failover index was at 0 here, we would be in a bad
468                 // situation, unless there were just
469                 // no failovers configured.
470                 if ( log.isDebugEnabled() )
471                 {
472                     log.debug( "Primary server status in error, not connected." );
473                 }
474             }
475             else
476             {
477                 if ( log.isDebugEnabled() )
478                 {
479                     log.debug( "Primary server is null, not connected." );
480                 }
481             }
482         }
483         catch ( NumberFormatException ex )
484         {
485             log.error( ex );
486         }
487         return false;
488     }
489 
490     /**
491      * Sets the "alright" flag to false in a critical section. This flag
492      * indicates whether or not we are connected to any server at all. If we are
493      * connected to a secondary server, then alright will be true, but we will
494      * continue to try to restore the connection with the primary server.
495      * <p>
496      * The primary server is the first server defines in the FailoverServers
497      * list.
498      */
499     private void bad()
500     {
501         if ( alright )
502         {
503             synchronized ( this )
504             {
505                 alright = false;
506             }
507         }
508     }
509 }