View Javadoc

1   /*
2    * Copyright (c) 1998-2004 The Jgroup Team.
3    *
4    * This program is free software; you can redistribute it and/or modify
5    * it under the terms of the GNU Lesser General Public License version 2 as
6    * published by the Free Software Foundation.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Lesser General Public License for more details.
12   *
13   * You should have received a copy of the GNU Lesser General Public License
14   * along with this program; if not, write to the Free Software
15   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16   *
17   */
18  
19  package jgroup.experiment.runnables;
20  
21  import java.io.File;
22  import java.util.Random;
23  import java.util.Set;
24  
25  import jgroup.core.ConfigurationException;
26  import jgroup.experiment.Experiment;
27  import jgroup.experiment.PropertyDefinition;
28  import jgroup.experiment.Runnable;
29  import jgroup.relacs.config.AppConfig;
30  import jgroup.relacs.config.ClassData;
31  import jgroup.relacs.config.ExperimentConfig;
32  import jgroup.relacs.config.Host;
33  import jgroup.relacs.config.HostSet;
34  import jgroup.util.IntList;
35  import jgroup.util.log.Eventlogger;
36  
37  /**
38   * @author Hein Meling
39   */
40  public class OldCrashFailure
41    implements Runnable
42  {
43  
44    ////////////////////////////////////////////////////////////////////////////////////////////
45    // Static fields
46    ////////////////////////////////////////////////////////////////////////////////////////////
47  
48    /**
49     * Access key to store/retreive the set of replicas in the
50     * <code>Host</code> content map.
51     */
52    private static final String REPLICAS      = "Replicas";
53  
54    /** Property strings */
55    private static final String CRASHES       = "processors.to.crashes";
56    private static final String TMAX          = "tmax";
57    private static final String RECOVERY_TIME = "recovery.time";
58    private static final String MONITORED_APP = "service.being.monitored";
59  
60    private static final PropertyDefinition[] properties = {
61        new PropertyDefinition(CRASHES, PropertyDefinition.INT_TYPE),
62        new PropertyDefinition(TMAX, PropertyDefinition.INT_TYPE),
63        new PropertyDefinition(RECOVERY_TIME, PropertyDefinition.INT_TYPE),
64        new PropertyDefinition(MONITORED_APP, PropertyDefinition.STRING_TYPE)
65    };
66  
67  
68    ////////////////////////////////////////////////////////////////////////////////////////////
69    // Fields
70    ////////////////////////////////////////////////////////////////////////////////////////////
71  
72    private Random rnd = new Random();
73  
74  
75    ////////////////////////////////////////////////////////////////////////////////////////////
76    // Methods from Runnable
77    ////////////////////////////////////////////////////////////////////////////////////////////
78  
79    /* (non-Javadoc)
80     * @see jgroup.experiment.Runnable#run(jgroup.relacs.config.ExperimentConfig)
81     */
82    public void run(ExperimentConfig ec)
83      throws ConfigurationException
84    {
85      // The service to be monitored must be specified
86      String monitoredService = ec.getProperty(this, MONITORED_APP);
87      int crashes = ec.getIntProperty(this, CRASHES, 1);
88      int tmax = ec.getIntProperty(this, TMAX, 5000);
89      int recoveryTime = ec.getIntProperty(this, RECOVERY_TIME, 10000);
90  
91      // All available hosts may be selected to be crashed
92      HostSet availableHosts = ec.getServerConfig().getAllHosts().getAvailHosts();
93      Host[] aHosts = availableHosts.toArray();
94      if (aHosts.length < crashes)
95        throw new ConfigurationException("Not enough available hosts to support "
96            + crashes + " crashes: " + availableHosts);
97  
98      if (Eventlogger.ENABLED) {
99        String logDir = ec.getProperty("local.save.dir");
100       logDir = logDir + File.separator + ec.getExperimentName();
101       File ldir = new File(logDir);
102       // If the directory does not exist, create it.
103       if (!ldir.exists()) {
104         if (!ldir.mkdirs()) {
105           throw new ConfigurationException("Could not create experiment directory: " + ldir);
106         }
107       }
108     }
109 
110     /*
111      * Compute the uniform distribution of failure injection times
112      */
113     int[] failureTime = new int[crashes];
114     failureTime[0] = 0; // first failure always at t=0.
115     for (int i = 1; i < failureTime.length; i++) {
116       failureTime[i] = rnd.nextInt(tmax);
117     }
118 
119     /*
120      * Compute the uniform distribution of processors that is to be crashed
121      */
122     IntList hostsToCrash = new IntList();
123     for (int i = 0; i < crashes; i++) {
124       int tmpRnd = rnd.nextInt(aHosts.length);
125       if (!hostsToCrash.contains(tmpRnd))
126         hostsToCrash.insert(tmpRnd);
127       // if the same host is selected again, then we crash one less host.
128     }
129     // The hcrash array contains indecies into the aHosts/failureTime arrays
130     int[] hcrash = hostsToCrash.toIntArray();
131 
132     if (Eventlogger.ENABLED)
133       //Debug.logEvent("BeginCrashRound", "Exp.no: " + ec.getIntProperty("repeat"));
134       Eventlogger.logEventFlush("BeginCrashRound Exp.no: " + ec.getIntProperty("repeat"));
135 
136     /*
137      * Check if the monitored service is affected by this particular
138      * crash pattern; this part does not perform the actual crashing.
139      */
140     ClassData monCD = AppConfig.getApplication(monitoredService).getClassData();
141     boolean monitoredServiceAffected = false;
142     for (int i = 0; i < hcrash.length; i++) {
143       Host host = aHosts[hcrash[i]];
144       Set replicas = host.queryReplicas();
145       host.put(REPLICAS, replicas);
146       if (replicas.contains(monCD)) {
147         monitoredServiceAffected = true;
148       }
149     }
150     if (!monitoredServiceAffected) {
151       if (Eventlogger.ENABLED) {
152         for (int i = 0; i < hcrash.length; i++) {
153           Host host = aHosts[hcrash[i]];
154           Set replicas = (Set) host.get(REPLICAS);
155           if (!replicas.isEmpty()) {
156             //Debug.logEvent("FakeCrashing @ " + failureTime[i], host.getCanonicalHostName()
157             //    + ", apps: " + replicas);
158             Eventlogger.logEventFlush("FakeCrashing @ " + failureTime[i] + host.getCanonicalHostName()
159                 + ", apps: " + replicas);
160           } else {
161             //Debug.logEvent("FakeCrashing @ " + failureTime[i], host.getCanonicalHostName()
162             //    + ", no applications");
163             Eventlogger.logEventFlush("FakeCrashing @ " + failureTime[i] + host.getCanonicalHostName()
164                 + ", no applications");
165           }
166         }
167         //Debug.logEvent("EndCrashRound", "MonitoredServiceNotAffected");
168         Eventlogger.logEventFlush("EndCrashRound" + "MonitoredServiceNotAffected");
169       }
170       /*
171        * Rerun this experiment round, to save the time of killing hosts,
172        * and restarting everything.
173        */
174       System.out.println("Rerunning experiment: " + ec.getIntProperty("repeat"));
175       run(ec);
176       /*
177        * This is a recursive call, and we should not exit from the
178        * run() method and enter into the actual failure injection part
179        * for other experiment runs than those actually having any
180        * computed failures to inject, therefore the return below.
181        * That is, only one real experiment will be executed, independent
182        * of the number of recursive steps taken.
183        */
184       return;
185     }
186 
187     /*
188      * If we get here, at least one of the monitored service replicas
189      * are supposed to be crashed, and in this part we perform the actual
190      * failure injections.
191      */
192     int lastFailureTime = 0;
193     for (int i = 0; i < hcrash.length; i++) {
194       Host host = aHosts[hcrash[i]];
195       System.out.println("Shutdown: " + host.getCanonicalHostName()
196           + " @ " + failureTime[i]);
197       if (Eventlogger.ENABLED) {
198         Set replicas = (Set) host.get(REPLICAS);
199         if (!replicas.isEmpty()) {
200           //Debug.logEvent("Crashing @ " + failureTime[i], host.getCanonicalHostName()
201           //    + ", apps: " + replicas);
202           Eventlogger.logEventFlush("Crashing @ " + failureTime[i] + host.getCanonicalHostName()
203                   + ", apps: " + replicas);
204         } else {
205           //Debug.logEvent("Crashing @ " + failureTime[i], host.getCanonicalHostName()
206           //    + ", no applications");
207           Eventlogger.logEventFlush("Crashing @ " + failureTime[i] + host.getCanonicalHostName()
208               + ", no applications");
209         }
210       }
211       if (failureTime[i] > lastFailureTime)
212         lastFailureTime = failureTime[i];
213       host.shutdown(failureTime[i]);
214     }
215     recoveryTime += lastFailureTime;
216     Experiment.log.info("Waiting for recovery for: " + (recoveryTime/1000) + " seconds.");
217     try {
218       Thread.sleep(recoveryTime);
219     } catch (InterruptedException e) { }
220     Experiment.log.info("Waiting completed");
221     if (Eventlogger.ENABLED)
222       //Debug.logEvent("EndCrashRound", "MonitoredServiceAffected");
223       Eventlogger.logEventFlush("EndCrashRound MonitoredServiceAffected");
224   }
225 
226 
227   /* (non-Javadoc)
228    * @see jgroup.experiment.Runnable#getProperties()
229    */
230   public PropertyDefinition[] getProperties()
231   {
232     return properties;
233   }
234 
235 } // END CrashFailure