特来电混沌工程实践-混沌事件注入
上篇博文特来电混沌工程实践中,我们详细介绍了特来电混沌工程实践的设计和规划。目前我们已经启动了应用层的混沌实验。 在应用层的混沌实验中,我们经常需要模拟HSF服务容器的线程被打爆、CPU使用率25%,50%,75%、端口被打爆、内存泄露、服务超时、服务异常等场景。 前期我们实现的时候,一般会选择一个典型的HSF服务去模拟注入上述混沌事件场景。但是每次注入,准备时间长、耗时长、控制复杂,遇到这些问题。 后来和阿里的中亭老师交流,收获到了启发,我们应该写一个混沌事件注入工具。然后根据混沌实验场景,灵活的注入混沌事件。 因此,我们启动了混沌实验注入工具的研发。先说一下具体的思路吧:
一、统一混沌事件的注入接口,实现各类混沌事件注入 ? 1. 先定义混沌事件注入接口IChaosEvent,包含两个方法Inject注入和Stop停止 1 interface IChaosEvent 2 { 3 void Inject(Dictionary<string,string> context); 4 5 void Stop(); 6 } ? 同时增加一个混沌事件枚举ChaosEventType public enum ChaosEventType { CPU25,CPU50,CPU75,ServiceTimeout,ServiceException,Memory,Threads,Ports } 2. 实现各类混沌事件注入 ? ?HighCpu-25%CPU使用率 class Chaos_HighCPU25 : IChaosEvent { CancellationTokenSource cts; public Chaos_HighCPU25() { cts = new CancellationTokenSource(); } context) { try { var count = (25 / 100.0) * Environment.ProcessorCount; for (int i = 0; i < count; i++) { var cpuTask = new Task(() => { while (true && cts.IsCancellationRequested == false) { } },cts.Token,TaskCreationOptions.LongRunning); cpuTask.Start(); } } catch { } } Stop() { cts.Cancel(); } } ? HighCpu-50%CPU使用率 Chaos_HighCPU50 : IChaosEvent { CancellationTokenSource cts; Chaos_HighCPU50() { cts = 50 / Stop() { cts.Cancel(); } } ? HighCpu-75%CPU使用率 Chaos_HighCPU75 : IChaosEvent { CancellationTokenSource cts; Chaos_HighCPU75() { cts = 75 / Stop() { cts.Cancel(); } } ? 内存泄露-2G Chaos_Memory : IChaosEvent { CancellationTokenSource cts; static string OneKB = "222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222111"; static List<string> list = new List<(); Chaos_Memory() { cts = var count = System.Configuration.ConfigurationManager.AppSettings[Chaos.MemoryMB]; if (count == null) count = 2000; int c; if (int.TryParse(count,1)">out c)) { Task task = Task( () => { int k = 0; k < c / 2; k++) { StringBuilder builder = StringBuilder(); 0; i < 1024; i++) { builder.Append(OneKB); } list.Add(builder.ToString()); } },TaskCreationOptions.LongRunning); task.Start(); } } Stop() { cts.Cancel(); list.Clear(); list = (); } } ? 端口被打爆: Chaos_Ports : IChaosEvent { CancellationTokenSource cts; static List<Socket> sockets; Chaos_Ports() { cts = CancellationTokenSource(); sockets = new List<Socket>(); } var count = Convert.ToInt32(context[Count]); var server = Convert.ToString(context[Servervar sp = server.Split(':'); var task = Task.Factory.StartNew(() => { ) { { Socket socket = Socket(AddressFamily.InterNetwork,SocketType.Stream,ProtocolType.Tcp); socket.SetSocketOption(SocketOptionLevel.Socket,SocketOptionName.KeepAlive,true); socket.Connect(sp[0],Convert.ToInt32(sp[1])); SetKeepAliveValues(socket,1)">true,36000000,1)">1000); sockets.Add(socket); } (Exception e) { } } while (cts.IsCancellationRequested == ) { Thread.Sleep(20 * 60000); } },TaskCreationOptions.LongRunning); } Stop() { cts.Cancel(); if (sockets != null) { foreach (var socket in sockets) { { socket.Close(); } { } } sockets.Clear(); sockets = ; } } SetKeepAliveValues ( System.Net.Sockets.Socket Socket,1)">bool On_Off,1)">uint KeepaLiveTime,1)"> KeepaLiveInterval ) { int Result = -; unsafe { TcpKeepAlive KeepAliveValues = TcpKeepAlive(); KeepAliveValues.On_Off = Convert.ToUInt32(On_Off); KeepAliveValues.KeepaLiveTime = KeepaLiveTime; KeepAliveValues.KeepaLiveInterval = KeepaLiveInterval; byte[] InValue = new byte[12]; int I = 0; I < 12; I++) InValue[I] = KeepAliveValues.Bytes[I]; Result = Socket.IOControl(IOControlCode.KeepAliveValues,InValue,1)">); } return Result; } } [ System.Runtime.InteropServices.StructLayout ( System.Runtime.InteropServices.LayoutKind.Explicit ) ] unsafe struct TcpKeepAlive { [System.Runtime.InteropServices.FieldOffset(0)] [ System.Runtime.InteropServices.MarshalAs ( System.Runtime.InteropServices.UnmanagedType.ByValArray,SizeConst = ) ] fixed byte Bytes[]; [System.Runtime.InteropServices.FieldOffset()] On_Off; [System.Runtime.InteropServices.FieldOffset(4 KeepaLiveTime; [System.Runtime.InteropServices.FieldOffset(8 KeepaLiveInterval; } ? 线程被打爆: Chaos_Threads : IChaosEvent { CancellationTokenSource cts; List<Task> tasks = new List<Task> Chaos_Threads() { cts = var count = context[Threadsif (Int32.TryParse(count,1)"> c)) { //Parallel.For(0,c,new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },(i) => 0; i < c; i++var task = { int j = 0; j < 120; j++) { if (cts.IsCancellationRequested) ; Thread.Sleep(10 * ); } },cts.Token); task.Start(); tasks.Add(task); } } } if (tasks != var task tasks) { { task.Dispose(); } { } } } } } ? 服务调用异常: Chaos_ServiceException : IChaosEvent { bool isStop = ; Chaos_ServiceException() { } if (isStop == ) throw new Exception(Chaos_ServiceException); } Stop() { isStop = ; } } ? 服务调用超时: Chaos_ServiceTimeout : IChaosEvent { CancellationTokenSource cts; Chaos_ServiceTimeout() { cts = ) Task.Delay(,cts.Token).Wait(); } Stop() { cts.Cancel(); isStop = ; } } 二、设计一个统一的混沌事件注入器,支持各类混沌事件注入,支持混沌事件的热更新和取消 ? 1. ChaosEventInjecter ? ? ?支持混沌事件接口实现的创建、混沌事件注入(全局注入一次,每次调用都注入)、混沌事件取消(停止) ? ? ?混沌事件接口实现的创建 private IChaosEvent GetOrCreateChaosEvent(ChaosEventType chaosEventType) { if (!eventDic.ContainsKey(chaosEventType)) { lock (syncObj) { eventDic.ContainsKey(chaosEventType)) { IChaosEvent chaosEvent = ; switch (chaosEventType) { case ChaosEventType.CPU75: chaosEvent = Chaos_HighCPU75(); break; ChaosEventType.CPU50: chaosEvent = Chaos_HighCPU50(); ChaosEventType.CPU25: chaosEvent = Chaos_HighCPU25(); ChaosEventType.Memory: chaosEvent = Chaos_Memory(); ChaosEventType.Threads: chaosEvent = Chaos_Threads(); ChaosEventType.ServiceException: chaosEvent = Chaos_ServiceException(); ChaosEventType.ServiceTimeout: chaosEvent = Chaos_ServiceTimeout(); ChaosEventType.Ports: chaosEvent = Chaos_Ports(); default: ; } if (chaosEvent != ) { eventDic.TryAdd(chaosEventType,chaosEvent); chaosEvent; } } } } eventDic[chaosEventType]; } ? ? 混沌事件注入(全局注入一次,每次调用都注入) 1 private object syncObj = object(); 2 object eventObj = 3 4 static ChaosEventInjecter instance; 5 private ConcurrentDictionary<ChaosEventType,IChaosEvent> eventDic; 6 7 triggeredEvent; 8 9 ChaosEventInjecter() 10 11 eventDic = new ConcurrentDictionary<ChaosEventType,1)">12 triggeredEvent = 13 } void SingletonInject(ChaosEventType chaosEventType,Dictionary<string> context = ) { triggeredEvent.ContainsKey(chaosEventType)) { (eventObj) { triggeredEvent.ContainsKey(chaosEventType)) { var chaosEvent = GetOrCreateChaosEvent(chaosEventType); if (chaosEvent == null) ; chaosEvent.Inject(context); triggeredEvent.TryAdd(chaosEventType,chaosEventType); } } } } ? 按服务每次调用都注入 void ServiceInject(List< serviceId) { ChaosEventManager.GetIntance().StopInject = StopInject; (ChaosEventManager.GetIntance().IsEmpty()) { StopInject(); } var service serviceId) { ChaosEventManager.GetIntance().GetChaosEvent(service); (chaosEvent.ChaosEventType) { ChaosEventType.ServiceException: ChaosEventType.ServiceTimeout: Inject(chaosEvent.ChaosEventType,chaosEvent.ChaosValue); : SingletonInject(chaosEvent.ChaosEventType,1)">; } } } } ? 停止混沌注入 StopInject() { if (triggeredEvent == null && triggeredEvent.Count == 0) var chaosEventType triggeredEvent) { GetOrCreateChaosEvent(chaosEventType.Key); ; chaosEvent.Stop(); } triggeredEvent = (); } ? 完整的ChaosEventInjecter代码: /// <summary> /// 混沌事件注入器 </summary> ChaosEventInjecter { ChaosEventInjecter instance; eventDic; triggeredEvent; ChaosEventInjecter() { eventDic = (); triggeredEvent = ChaosEventInjecter GetIntance() { if (instance == ) { instance = ChaosEventInjecter(); } } } instance; } SingletonInject() { var eventType = System.Configuration.ConfigurationManager.AppSettings[Chaos.Event]; if (eventType != ) { ChaosEventType chaosEvent = (ChaosEventType)Enum.Parse(typeof(ChaosEventType),eventType.ToString()); SingletonInject(chaosEvent); } } ; } } } } void Inject(ChaosEventType chaosEventType,1)"> GetOrCreateChaosEvent(chaosEventType); ; chaosEvent.Inject(context); } eventDic[chaosEventType]; } } ? 2.?ChaosEventManager ? ? 混沌事件管理类,负责从Redis中实时获取每个服务配置的混沌事件,支持混沌事件的定时更新10s: ChaosEventManager { ChaosEventManager instance; private ConcurrentDictionary< eventDic; CacheService service = CacheService.GetInstance(DefaultPool); public Action StopInject { get; set; } ChaosEventManager() { eventDic = new ConcurrentDictionary<(); GetAllChaosEvents(); StartUpdateTask(); } StartUpdateTask() { ) { Thread.Sleep(10000); GetAllChaosEvents(); } },TaskCreationOptions.LongRunning); task.ContinueWith((t) => (t.IsFaulted) StartUpdateTask(); }); task.Start(); } ChaosEventManager GetIntance() { ChaosEventManager(); } } } public ChaosEvent GetChaosEvent(string serviceId) { (eventDic.ContainsKey(serviceId)) eventDic[serviceId]; else return ; } IsEmpty() { return eventDic == null || eventDic.Count == GetAllChaosEvents() { var newEventDic = (); using (var client = service.GetClient()) { List<string> keys = client.GetHashKeys(ChaosEventsif (keys != ) keys.ForEach(x => newEventDic.TryAdd(x,client.GetValueFromHash<ChaosEvent>(var item newEventDic) { eventDic[item.Key] = item.Value; } if (newEventDic.Count == if (StopInject != ) StopInject(); } } 三、在HSF、API网关、中间件SDK层面依赖注入混沌事件注入器 ? 在HSF服务调用时增加混沌实验事件AOP注入 ? ? API网关、中间件SDK类似的方法进行注入。 ?同时我们设计了一个混沌事件注入工具:支持混沌事件实时注入、取消、仿真模拟执行: ? ? 分享:仿真模拟执行的效果,例如CPU25%使用率: ?? ?以上工具和设计思路,分享给大家。 ? 周国庆 2019/3/30 ? (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |