1 package org.apache.commons.jcs3.auxiliary.remote;
2
3 import java.io.IOException;
4
5 /*
6 * Licensed to the Apache Software Foundation (ASF) under one
7 * or more contributor license agreements. See the NOTICE file
8 * distributed with this work for additional information
9 * regarding copyright ownership. The ASF licenses this file
10 * to you under the Apache License, Version 2.0 (the
11 * "License"); you may not use this file except in compliance
12 * with the License. You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing,
17 * software distributed under the License is distributed on an
18 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19 * KIND, either express or implied. See the License for the
20 * specific language governing permissions and limitations
21 * under the License.
22 */
23
24 import java.util.List;
25 import java.util.ListIterator;
26 import java.util.concurrent.atomic.AtomicBoolean;
27
28 import org.apache.commons.jcs3.auxiliary.remote.behavior.IRemoteCacheAttributes;
29 import org.apache.commons.jcs3.auxiliary.remote.server.behavior.RemoteType;
30 import org.apache.commons.jcs3.engine.CacheStatus;
31 import org.apache.commons.jcs3.engine.behavior.ICache;
32 import org.apache.commons.jcs3.engine.behavior.IElementSerializer;
33 import org.apache.commons.jcs3.engine.logging.behavior.ICacheEventLogger;
34 import org.apache.commons.jcs3.log.Log;
35 import org.apache.commons.jcs3.log.LogManager;
36
37 /**
38 * Used to provide access to multiple services under nowait protection. Factory should construct
39 * NoWaitFacade to give to the composite cache out of caches it constructs from the varies manager
40 * to lateral services.
41 * <p>
42 * Typically, we only connect to one remote server per facade. We use a list of one
43 * RemoteCacheNoWait.
44 */
45 public class RemoteCacheNoWaitFacade<K, V>
46 extends AbstractRemoteCacheNoWaitFacade<K, V>
47 {
48 /** log instance */
49 private static final Log log = LogManager.getLog( RemoteCacheNoWaitFacade.class );
50
51 /** Provide factory instance to RemoteCacheFailoverRunner */
52 private final RemoteCacheFactory cacheFactory;
53
54 /** Attempt to restore primary connection (switched off for testing) */
55 protected boolean attemptRestorePrimary = true;
56
57 /** Time in ms to sleep between failover attempts */
58 private static final long idlePeriod = 20000L;
59
60 /**
61 * Constructs with the given remote cache, and fires events to any listeners.
62 * <p>
63 * @param noWaits
64 * @param rca
65 * @param cacheEventLogger
66 * @param elementSerializer
67 * @param cacheFactory
68 */
69 public RemoteCacheNoWaitFacade( final List<RemoteCacheNoWait<K,V>> noWaits,
70 final IRemoteCacheAttributes rca,
71 final ICacheEventLogger cacheEventLogger,
72 final IElementSerializer elementSerializer,
73 final RemoteCacheFactory cacheFactory)
74 {
75 super( noWaits, rca, cacheEventLogger, elementSerializer );
76 this.cacheFactory = cacheFactory;
77 }
78
79 /**
80 * Begin the failover process if this is a local cache. Clustered remote caches do not failover.
81 * <p>
82 * @param rcnw The no wait in error.
83 */
84 @Override
85 protected void failover( final RemoteCacheNoWait<K, V> rcnw )
86 {
87 log.debug( "in failover for {0}", rcnw );
88
89 if ( getAuxiliaryCacheAttributes().getRemoteType() == RemoteType.LOCAL )
90 {
91 if ( rcnw.getStatus() == CacheStatus.ERROR )
92 {
93 // start failover, primary recovery process
94 final Thread runner = new Thread(this::connectAndRestore);
95 runner.setDaemon( true );
96 runner.start();
97
98 if ( getCacheEventLogger() != null )
99 {
100 getCacheEventLogger().logApplicationEvent( "RemoteCacheNoWaitFacade", "InitiatedFailover",
101 rcnw + " was in error." );
102 }
103 }
104 else
105 {
106 log.info( "The noWait is not in error" );
107 }
108 }
109 }
110
111 /**
112 * The thread tries to establish a connection with a failover
113 * server, if any are defined. Once a failover connection is made, it will
114 * attempt to replace the failover with the primary remote server.
115 * <p>
116 * It works by switching out the RemoteCacheNoWait inside the Facade.
117 * <p>
118 * Client (i.e.) the CompositeCache has reference to a RemoteCacheNoWaitFacade.
119 * This facade is created by the RemoteCacheFactory. The factory maintains a set
120 * of managers, one for each remote server. Typically, there will only be one
121 * manager.
122 * <p>
123 * If you use multiple remote servers, you may want to set one or more as
124 * failovers. If a local cache cannot connect to the primary server, or looses
125 * its connection to the primary server, it will attempt to restore that
126 * Connection in the background. If failovers are defined, the Failover runner
127 * will try to connect to a failover until the primary is restored.
128 * If no failovers are defined, this will exit automatically.
129 *
130 * @since 3.1
131 */
132 protected void connectAndRestore()
133 {
134 final IRemoteCacheAttributes rca0 = getAuxiliaryCacheAttributes();
135 // Each RemoteCacheManager corresponds to one remote connection.
136 final List<RemoteLocation> failovers = rca0.getFailovers();
137 // we should probably check to see if there are any failovers,
138 // even though the caller should have already.
139
140 if ( failovers == null )
141 {
142 log.warn( "Remote is misconfigured, failovers was null." );
143 return;
144 }
145 if ( failovers.size() == 1 )
146 {
147 // if there is only the primary, return out of this
148 log.info( "No failovers defined, exiting failover runner." );
149 return;
150 }
151
152 final AtomicBoolean allright = new AtomicBoolean(false);
153
154 do
155 {
156 log.info( "Remote cache FAILOVER RUNNING." );
157
158 // there is no active listener
159 if ( !allright.get() )
160 {
161 // Monitor each RemoteCacheManager instance one after the other.
162 final int fidx = rca0.getFailoverIndex();
163 log.debug( "fidx = {0} failovers.size = {1}", rca0::getFailoverIndex, failovers::size);
164
165 // If we don't check the primary, if it gets connected in the
166 // background,
167 // we will disconnect it only to put it right back
168 final ListIterator<RemoteLocation> i = failovers.listIterator(fidx); // + 1; // +1 skips the primary
169 log.debug( "starting at failover i = {0}", i );
170
171 // try them one at a time until successful
172 while (i.hasNext() && !allright.get())
173 {
174 final int failoverIndex = i.nextIndex();
175 final RemoteLocation server = i.next();
176 log.debug("Trying server [{0}] at failover index i = {1}", server, failoverIndex);
177
178 final RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone();
179 rca.setRemoteLocation(server);
180 final RemoteCacheManager rcm = cacheFactory.getManager( rca );
181
182 log.debug( "RemoteCacheAttributes for failover = {0}", rca );
183
184 if (rcm != null)
185 {
186 // add a listener if there are none, need to tell rca
187 // what number it is at
188 final ICache<K, V> ic = rcm.getCache( rca );
189 if ( ic.getStatus() == CacheStatus.ALIVE )
190 {
191 // may need to do this more gracefully
192 log.debug( "resetting no wait" );
193 restorePrimaryServer((RemoteCacheNoWait<K, V>) ic);
194 rca0.setFailoverIndex(failoverIndex);
195
196 log.debug("setting ALLRIGHT to true");
197 if (i.hasPrevious())
198 {
199 log.debug("Moving to Primary Recovery Mode, failover index = {0}", failoverIndex);
200 }
201 else
202 {
203 log.debug("No need to connect to failover, the primary server is back up.");
204 }
205
206 allright.set(true);
207
208 log.info( "CONNECTED to host = [{0}]", rca::getRemoteLocation);
209 }
210 }
211 }
212 }
213 // end if !allright
214 // get here if while index >0 and allright, meaning that we are
215 // connected to some backup server.
216 else
217 {
218 log.debug( "ALLRIGHT is true " );
219 log.info( "Failover runner is in primary recovery mode. "
220 + "Failover index = {0} Will now try to reconnect to "
221 + "primary server.", rca0::getFailoverIndex);
222 }
223
224 // Exit loop if in test mode
225 if (allright.get() && !attemptRestorePrimary)
226 {
227 break;
228 }
229
230 boolean primaryRestoredSuccessfully = false;
231 // if we are not connected to the primary, try.
232 if (rca0.getFailoverIndex() > 0)
233 {
234 primaryRestoredSuccessfully = restorePrimary();
235 log.debug( "Primary recovery success state = {0}",
236 primaryRestoredSuccessfully );
237 }
238
239 if (!primaryRestoredSuccessfully)
240 {
241 // Time driven mode: sleep between each round of recovery attempt.
242 try
243 {
244 log.warn( "Failed to reconnect to primary server. "
245 + "Cache failover runner is going to sleep for "
246 + "{0} milliseconds.", idlePeriod );
247 Thread.sleep( idlePeriod );
248 }
249 catch ( final InterruptedException ex )
250 {
251 // ignore;
252 }
253 }
254
255 // try to bring the listener back to the primary
256 }
257 while (rca0.getFailoverIndex() > 0 || !allright.get());
258 // continue if the primary is not restored or if things are not allright.
259
260 if ( log.isInfoEnabled() )
261 {
262 final int failoverIndex = rca0.getFailoverIndex();
263 log.info( "Exiting failover runner. Failover index = {0}", failoverIndex);
264
265 if ( failoverIndex <= 0 )
266 {
267 log.info( "Failover index is <= 0, meaning we are not connected to a failover server." );
268 }
269 else
270 {
271 log.info( "Failover index is > 0, meaning we are connected to a failover server." );
272 }
273 }
274 }
275
276 /**
277 * Try to restore the primary server.
278 * <p>
279 * Once primary is restored the failover listener must be deregistered.
280 * <p>
281 * The primary server is the first server defines in the FailoverServers
282 * list.
283 *
284 * @return boolean value indicating whether the restoration was successful
285 */
286 private boolean restorePrimary()
287 {
288 final IRemoteCacheAttributes rca0 = getAuxiliaryCacheAttributes();
289 // try to move back to the primary
290 final RemoteLocation server = rca0.getFailovers().get(0);
291
292 log.info( "Trying to restore connection to primary remote server "
293 + "[{0}]", server );
294
295 final RemoteCacheAttributes rca = (RemoteCacheAttributes) rca0.clone();
296 rca.setRemoteLocation(server);
297 final RemoteCacheManager rcm = cacheFactory.getManager( rca );
298
299 if (rcm != null)
300 {
301 // add a listener if there are none, need to tell rca what number it
302 // is at
303 final ICache<K, V> ic = rcm.getCache( rca );
304 // by default the listener id should be 0, else it will be the
305 // listener
306 // Originally associated with the remote cache. either way is fine.
307 // We just don't want the listener id from a failover being used.
308 // If the remote server was rebooted this could be a problem if new
309 // locals were also added.
310
311 if ( ic.getStatus() == CacheStatus.ALIVE )
312 {
313 try
314 {
315 // we could have more than one listener registered right
316 // now.
317 // this will not result in a loop, only duplication
318 // stop duplicate listening.
319 if (getPrimaryServer() != null && getPrimaryServer().getStatus() == CacheStatus.ALIVE )
320 {
321 final int fidx = rca0.getFailoverIndex();
322
323 if ( fidx > 0 )
324 {
325 final RemoteLocation serverOld = rca0.getFailovers().get(fidx);
326
327 log.debug( "Failover Index = {0} the server at that "
328 + "index is [{1}]", fidx, serverOld );
329
330 if ( serverOld != null )
331 {
332 // create attributes that reflect the
333 // previous failed over configuration.
334 final RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) rca0.clone();
335 rcaOld.setRemoteLocation(serverOld);
336 final RemoteCacheManager rcmOld = cacheFactory.getManager( rcaOld );
337
338 if ( rcmOld != null )
339 {
340 // manager can remove by name if
341 // necessary
342 rcmOld.removeRemoteCacheListener( rcaOld );
343 }
344 log.info( "Successfully deregistered from "
345 + "FAILOVER remote server = {0}", serverOld );
346 }
347 }
348 else if ( fidx == 0 )
349 {
350 // this should never happen. If there are no
351 // failovers this shouldn't get called.
352 if ( log.isDebugEnabled() )
353 {
354 log.debug( "No need to restore primary, it is already restored." );
355 return true;
356 }
357 }
358 else {
359 // this should never happen
360 log.warn( "Failover index is less than 0, this shouldn't happen" );
361 }
362 }
363 }
364 catch ( final IOException e )
365 {
366 // TODO, should try again, or somehow stop the listener
367 log.error("Trouble trying to deregister old failover "
368 + "listener prior to restoring the primary = {0}",
369 server, e );
370 }
371
372 // Restore primary
373 // may need to do this more gracefully, letting the failover finish in the background
374 final RemoteCacheNoWait<K, V> failoverNoWait = getPrimaryServer();
375
376 // swap in a new one
377 restorePrimaryServer((RemoteCacheNoWait<K, V>) ic);
378 rca0.setFailoverIndex( 0 );
379
380 final String message = "Successfully reconnected to PRIMARY "
381 + "remote server. Substituted primary for "
382 + "failoverNoWait [" + failoverNoWait + "]";
383 log.info( message );
384
385 if (getCacheEventLogger() != null)
386 {
387 getCacheEventLogger().logApplicationEvent(
388 "RemoteCacheFailoverRunner", "RestoredPrimary",
389 message );
390 }
391 return true;
392 }
393 }
394
395 // else all right
396 // if the failover index was at 0 here, we would be in a bad
397 // situation, unless there were just
398 // no failovers configured.
399 log.debug( "Primary server status in error, not connected." );
400
401 return false;
402 }
403 }