View Javadoc

1   /*
2    * Copyright (c) 1998-2004 The Jgroup Team.
3    *
4    * This program is free software; you can redistribute it and/or modify
5    * it under the terms of the GNU Lesser General Public License version 2 as
6    * published by the Free Software Foundation.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Lesser General Public License for more details.
12   *
13   * You should have received a copy of the GNU Lesser General Public License
14   * along with this program; if not, write to the Free Software
15   * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16   *
17   */
18  
19  package jgroup.experiment.runnables;
20  
21  import java.io.File;
22  import java.util.Random;
23  import java.util.Set;
24  
25  import jgroup.core.ConfigurationException;
26  import jgroup.experiment.Experiment;
27  import jgroup.experiment.PropertyDefinition;
28  import jgroup.experiment.Runnable;
29  import jgroup.relacs.config.AppConfig;
30  import jgroup.relacs.config.ClassData;
31  import jgroup.relacs.config.ExperimentConfig;
32  import jgroup.relacs.config.Host;
33  import jgroup.relacs.config.HostSet;
34  import jgroup.util.IntList;
35  import jgroup.util.log.Eventlogger;
36  
37  /**
38   * @author Hein Meling
39   */
40  public class CrashFailure
41    implements Runnable
42  {
43  
44    ////////////////////////////////////////////////////////////////////////////////////////////
45    // Static fields
46    ////////////////////////////////////////////////////////////////////////////////////////////
47  
48    /**
49     * Access key to store/retreive the set of replicas in the
50     * <code>Host</code> content map.
51     */
52    private static final String REPLICAS      = "Replicas";
53  
54    /** Property strings */
55    private static final String CRASHES       = "processors.to.crashes";
56    private static final String TMAX          = "tmax";
57    private static final String RECOVERY_TIME = "recovery.time";
58    private static final String MONITORED_APP = "service.being.monitored";
59  
60    private static final PropertyDefinition[] properties = {
61        new PropertyDefinition(CRASHES, PropertyDefinition.INT_TYPE),
62        new PropertyDefinition(TMAX, PropertyDefinition.INT_TYPE),
63        new PropertyDefinition(RECOVERY_TIME, PropertyDefinition.INT_TYPE),
64        new PropertyDefinition(MONITORED_APP, PropertyDefinition.STRING_TYPE)
65    };
66  
67  
68    ////////////////////////////////////////////////////////////////////////////////////////////
69    // Fields
70    ////////////////////////////////////////////////////////////////////////////////////////////
71  
72    private Random rnd = new Random();
73  
74  
75    ////////////////////////////////////////////////////////////////////////////////////////////
76    // Methods from Runnable
77    ////////////////////////////////////////////////////////////////////////////////////////////
78  
79    /* (non-Javadoc)
80     * @see jgroup.experiment.Runnable#run(jgroup.relacs.config.ExperimentConfig)
81     */
82    public void run(ExperimentConfig ec)
83      throws ConfigurationException
84    {
85      // The service to be monitored must be specified
86      String monitoredService = ec.getProperty(this, MONITORED_APP);
87      int crashes = ec.getIntProperty(this, CRASHES, 1);
88      int tmax = ec.getIntProperty(this, TMAX, 5000);
89      int recoveryTime = ec.getIntProperty(this, RECOVERY_TIME, 10000);
90  
91      // All available hosts may be selected to be crashed
92      HostSet availableHosts = ec.getServerConfig().getAllHosts().getAvailHosts();
93      Host[] aHosts = availableHosts.toArray();
94      if (aHosts.length < crashes)
95        throw new ConfigurationException("Not enough available hosts to support "
96            + crashes + " crashes: " + availableHosts);
97  
98      if (Eventlogger.ENABLED) {
99        String logDir = ec.getProperty("local.save.dir");
100       logDir = logDir + File.separator + ec.getExperimentName();
101       File ldir = new File(logDir);
102       // If the directory does not exist, create it.
103       if (!ldir.exists()) {
104         if (!ldir.mkdirs()) {
105           throw new ConfigurationException("Could not create experiment directory: " + ldir);
106         }
107       }
108     }
109 
110     /*
111      * Compute the uniform distribution of failure injection times
112      */
113     int[] failureTime = new int[crashes];
114     failureTime[0] = 0; // first failure always at t=0.
115     for (int i = 1; i < failureTime.length; i++) {
116       failureTime[i] = rnd.nextInt(tmax);
117     }
118 
119     /*
120      * Compute the uniform distribution of processors that is to be crashed
121      */
122     IntList hostsToCrash = new IntList();
123     for (int i = 0; i < crashes; i++) {
124       int tmpRnd = rnd.nextInt(aHosts.length);
125       if (!hostsToCrash.contains(tmpRnd))
126         hostsToCrash.insert(tmpRnd);
127       // if the same host is selected again, then we crash one less host.
128     }
129     // The hcrash array contains indecies into the aHosts/failureTime arrays
130     int[] hcrash = hostsToCrash.toIntArray();
131 
132     if (Eventlogger.ENABLED)
133       //Debug.logEvent("BeginCrashRound", "Exp.no: " + ec.getIntProperty("repeat"));
134       Eventlogger.logEventFlush("BeginCrashRound Exp.no: " + ec.getIntProperty("repeat"));
135 
136     /*
137      * In this part we perform the actual failure injections.
138      */
139     int lastFailureTime = 0;
140     ClassData monCD = AppConfig.getApplication(monitoredService).getClassData();
141     boolean monitoredServiceAffected = false;
142     for (int i = 0; i < hcrash.length; i++) {
143       Host host = aHosts[hcrash[i]];
144       String hostname = host.getCanonicalHostName();
145       System.out.println("Shutdown: " + hostname + " @ " + failureTime[i]);
146       if (Eventlogger.ENABLED) {
147         /*
148          * Check which replicas exists on the selected host.
149          */
150         Set replicas = host.queryReplicas();
151         if (replicas.contains(monCD)) {
152           monitoredServiceAffected = true;
153         }
154         //Debug.logEvent("Crashing @ " + failureTime[i], hostname + ", apps: " + replicas);
155         Eventlogger.logEventFlush("Crashing @ " + failureTime[i] + " " + hostname + ", apps: " + replicas);
156         
157         
158       }
159       if (failureTime[i] > lastFailureTime)
160         lastFailureTime = failureTime[i];
161       host.shutdown(failureTime[i]);
162     }
163     recoveryTime += lastFailureTime;
164     Experiment.log.info("Waiting for recovery for: " + (recoveryTime/1000) + " seconds.");
165     try {
166       Thread.sleep(recoveryTime);
167     } catch (InterruptedException e) { }
168     Experiment.log.info("Waiting completed");
169     if (Eventlogger.ENABLED) {
170       Eventlogger.logEventFlush("EndCrashRound " +
171           ((monitoredServiceAffected) ?
172               "MonitoredServiceAffected" : "MonitoredServiceNotActuallyAffected"));
173     }
174   }
175 
176 
177   /* (non-Javadoc)
178    * @see jgroup.experiment.Runnable#getProperties()
179    */
180   public PropertyDefinition[] getProperties()
181   {
182     return properties;
183   }
184 
185 } // END CrashFailure