1 package org.apache.jcs.auxiliary.remote;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.jcs.engine.CacheStatus;
25
26 /**
27 * Used to monitor and repair any failed connection for the remote cache service. By default the
28 * monitor operates in a failure driven mode. That is, it goes into a wait state until there is an
29 * error. TODO consider moving this into an active monitoring mode. Upon the notification of a
30 * connection error, the monitor changes to operate in a time driven mode. That is, it attempts to
31 * recover the connections on a periodic basis. When all failed connections are restored, it changes
32 * back to the failure driven mode.
33 */
34 public class RemoteCacheMonitor
35 implements Runnable
36 {
37 /** The logger */
38 private final static Log log = LogFactory.getLog( RemoteCacheMonitor.class );
39
40 /** The remote cache that we are monitoring */
41 private static RemoteCacheMonitor instance;
42
43 /** Time between checks */
44 private static long idlePeriod = 30 * 1000;
45
46 // minimum 30 seconds.
47 //private static long idlePeriod = 3*1000; // for debugging.
48
49 /**
50 * Must make sure RemoteCacheMonitor is started before any remote error can be detected!
51 */
52 private boolean alright = true;
53
54 /** Time driven mode */
55 final static int TIME = 0;
56
57 /** Error driven mode -- only check on health if there is an error */
58 final static int ERROR = 1;
59
60 /** The mode to use */
61 static int mode = ERROR;
62
63 /**
64 * Configures the idle period between repairs.
65 * <p>
66 * @param idlePeriod The new idlePeriod value
67 */
68 public static void setIdlePeriod( long idlePeriod )
69 {
70 if ( idlePeriod > RemoteCacheMonitor.idlePeriod )
71 {
72 RemoteCacheMonitor.idlePeriod = idlePeriod;
73 }
74 }
75
76 /** Constructor for the RemoteCacheMonitor object */
77 private RemoteCacheMonitor()
78 {
79 super();
80 }
81
82 /**
83 * Returns the singleton instance.
84 * <p>
85 * @return The instance value
86 */
87 static RemoteCacheMonitor getInstance()
88 {
89 synchronized ( RemoteCacheMonitor.class )
90 {
91 if ( instance == null )
92 {
93 return instance = new RemoteCacheMonitor();
94 }
95 }
96 return instance;
97 }
98
99 /**
100 * Notifies the cache monitor that an error occurred, and kicks off the error recovery process.
101 */
102 public void notifyError()
103 {
104 log.debug( "Notified of an error." );
105 bad();
106 synchronized ( this )
107 {
108 notify();
109 }
110 }
111
112 // Run forever.
113
114 // Avoid the use of any synchronization in the process of monitoring for
115 // performance reason.
116 // If exception is thrown owing to synchronization,
117 // just skip the monitoring until the next round.
118 /** Main processing method for the RemoteCacheMonitor object */
119 public void run()
120 {
121 log.debug( "Monitoring daemon started" );
122 do
123 {
124 if ( mode == ERROR )
125 {
126 synchronized ( this )
127 {
128 if ( alright )
129 {
130 // make this configurable, comment out wait to enter
131 // time driven mode
132 // Failure driven mode.
133 try
134 {
135 if ( log.isDebugEnabled() )
136 {
137 log.debug( "FAILURE DRIVEN MODE: cache monitor waiting for error" );
138 }
139 wait();
140 // wake up only if there is an error.
141 }
142 catch ( InterruptedException ignore )
143 {
144 // swallow
145 }
146 }
147 }
148 }
149 else
150 {
151 if ( log.isDebugEnabled() )
152 {
153 log.debug( "TIME DRIVEN MODE: cache monitor sleeping for " + idlePeriod );
154 }
155 // Time driven mode: sleep between each round of recovery
156 // attempt.
157 // will need to test not just check status
158 }
159
160 try
161 {
162 Thread.sleep( idlePeriod );
163 }
164 catch ( InterruptedException ex )
165 {
166 // ignore;
167 }
168
169 // The "alright" flag must be false here.
170 // Simply presume we can fix all the errors until proven otherwise.
171 synchronized ( this )
172 {
173 alright = true;
174 }
175 //p("cache monitor running.");
176 // Monitor each RemoteCacheManager instance one after the other.
177 // Each RemoteCacheManager corresponds to one remote connection.
178 for (RemoteCacheManager mgr : RemoteCacheManager.instances.values())
179 {
180 try
181 {
182 // If any cache is in error, it strongly suggests all caches
183 // managed by the
184 // same RmicCacheManager instance are in error. So we fix
185 // them once and for all.
186 for (RemoteCacheNoWait<?, ?> c : mgr.caches.values())
187 {
188 if ( c.getStatus() == CacheStatus.ERROR )
189 {
190 RemoteCacheRestore repairer = new RemoteCacheRestore( mgr );
191 // If we can't fix them, just skip and re-try in
192 // the next round.
193 if ( repairer.canFix() )
194 {
195 repairer.fix();
196 }
197 else
198 {
199 bad();
200 }
201 break;
202 }
203 }
204 }
205 catch ( Exception ex )
206 {
207 bad();
208 // Problem encountered in fixing the caches managed by a
209 // RemoteCacheManager instance.
210 // Soldier on to the next RemoteCacheManager instance.
211 log.error( "Problem fixing caches for manager." + mgr, ex );
212 }
213 }
214 }
215 while ( true );
216 }
217
218 /** Sets the "aright" flag to false in a critical section. */
219 private synchronized void bad()
220 {
221 alright = false;
222 }
223 }