Daniel Berger
2006-Dec-07 17:08 UTC
[Win32utils-devel] Fwd: win32-service problems with patch
Got this today. Please take a look and let me know what you think. - Dan ---------- Forwarded message ---------- From: Kevin Burge <kevin.burge at systemware.com> Date: Dec 7, 2006 6:45 AM Subject: win32-service problems with patch To: djberg96 at gmail.com Hi Daniel, Thanks for win32-service. In the process of using it for running a reliable-msg (drb) server and clients, I had some issues with the process abnormally terminating, or just not responding. After much tweaking and playing with it, I think I have something that''s reliable. Here are a summary of the changes: Logic changes: * now service_init is called before anything is actually done thread-wise (green or real). So, service_init needs to be fast, since it''s done before the StartServiceCtrl stuff (SCM will be waiting). I may change this so that START_PENDINGs are sent while in service_init, but because that greatly complicated matters, and I was just trying to get something that WORKED first, the current code does not do this. * now the service terminates if service_init or service_main exit or raise exceptions. Problems I encountered or resolved: * Waiting for the ThreadGroup is not sufficient to make sure ALL threads have exited. Instead, I wait on Thread.list to == 1. I don''t know that this was really necessary. * raising exceptions in service_init and service_main left the service running - required service_init and main to have rescue clauses to catch all exceptions AND NOT EXIT so that the SCM could still send the stop message, so that the threads would be cleaned up and etc. * ErrorStopService removed because it was abused - called when it didn''t make sense. Instead, I explicitly put in error handling and set hStopEvent or set service status appropriately. Probably could still be better. * Waiting for hThread to exit AFTER closing hStopEvent (which might be waiting on hStopEvent) * the state SHOULD == RUNNING when service_main starts (I had a problem where it was 0). * EventHookHash was a global variable that Ruby didn''t know about, so it was being garbage collected before the process terminated (especially when the ruby service control was trying to dispatch an event) * service_stop was never being called that I could tell - now I forcefully dispatch the event after the loop in the ruby green thread, before exiting. I didn''t do anything to handle exceptions within the spawned green threads for the events dispatched. Not sure that anything needs to be done. I have not tried this code with any other win32-service app (rails, etc.) so I don''t know how they''ll work with this. The basic flow is now: Thread 1 Thread 2 "Green Thread" service_init start ThreadProc wait for hStart, or Thread 2 Exit Startup and set hStart wait for hStop start green thread wait for hStop wait for hStop && normal processing service_main wait for hStop wait for hStop && normal processing On exit: set hStop wait for hStop wait for hStop && normal processing wait for ruby threads wait for hStopCompleted schedule service_stop thread wait for ruby threads wait for hStopCompleted exit set hStopCompleted wait for hStopCompleted wait for Thred 2 to exit set SERVICE_STOPPED && exit close hStopEvent && exit http://www.systemware.com/ ? service_c.patch Index: lib/win32/service.c ==================================================================RCS file: /var/cvs/win32utils/win32utils/win32-service/lib/win32/service.c,v retrieving revision 1.67 diff -u -u -r1.67 service.c --- lib/win32/service.c 25 Nov 2006 16:58:41 -0000 1.67 +++ lib/win32/service.c 7 Dec 2006 13:28:27 -0000 @@ -14,6 +14,7 @@ static VALUE cDaemonError; static VALUE v_service_struct, v_service_status_struct; +static HANDLE hThread; static HANDLE hStartEvent; static HANDLE hStopEvent; static HANDLE hStopCompletedEvent; @@ -21,12 +22,6 @@ static DWORD dwServiceState; static TCHAR error[1024]; -static VALUE EventHookHash; -static VALUE thread_group; -static int cAdd; -static int cList; -static int cSize; - CRITICAL_SECTION csControlCode; // I happen to know from looking in the header file // that 0 is not a valid service control code @@ -39,7 +34,6 @@ static VALUE service_close(VALUE); void WINAPI Service_Main(DWORD dwArgc, LPTSTR *lpszArgv); void WINAPI Service_Ctrl(DWORD dwCtrlCode); -void ErrorStopService(); void SetTheServiceStatus(DWORD dwCurrentState,DWORD dwWin32ExitCode, DWORD dwCheckPoint, DWORD dwWaitHint); @@ -57,27 +51,27 @@ (LPHANDLER_FUNCTION)Service_Ctrl); if(ssh == (SERVICE_STATUS_HANDLE)0){ - ErrorStopService(); - rb_raise(cDaemonError,"RegisterServiceCtrlHandler failed"); - } - - // wait for sevice initialization - for(i=1;TRUE;i++) - { - if(WaitForSingleObject(hStartEvent, 1000) == WAIT_OBJECT_0) - break; - - SetTheServiceStatus(SERVICE_START_PENDING, 0, i, 1000); + // no service to stop, no service handle to notify, + // nothing to do but exit + return; } // The service has started. SetTheServiceStatus(SERVICE_RUNNING, NO_ERROR, 0, 0); + SetEvent(hStartEvent); + // Main loop for the service. while(WaitForSingleObject(hStopEvent, 1000) != WAIT_OBJECT_0) { } + // Main loop for the service. + while(WaitForSingleObject(hStopCompletedEvent, 1000) != WAIT_OBJECT_0) + { + OutputDebugString(TEXT("service_main: waiting for stop completed event\n")); + } + // Stop the service. SetTheServiceStatus(SERVICE_STOPPED, NO_ERROR, 0, 0); } @@ -97,10 +91,10 @@ return result; } -VALUE Ruby_Service_Ctrl() +VALUE Ruby_Service_Ctrl(VALUE self) { while (WaitForSingleObject(hStopEvent,0) == WAIT_TIMEOUT) - { + { __try { EnterCriticalSection(&csControlCode); @@ -108,20 +102,22 @@ // Check to see if anything interesting has been signaled if (waiting_control_code != IDLE_CONTROL_CODE) { - // if there is a code, create a ruby thread to deal with it - // this might be over engineering the solution, but I don''t - // want to block Service_Ctrl longer than necessary and the - // critical section will block it. - VALUE val = rb_hash_aref(EventHookHash, INT2NUM(waiting_control_code)); - if(val!=Qnil) { - VALUE thread = rb_thread_create(Service_Event_Dispatch, (void*) val); - rb_funcall(thread_group, cAdd, 1, thread); + if (waiting_control_code != SERVICE_CONTROL_STOP) { + // if there is a code, create a ruby thread to deal with it + // this might be over engineering the solution, but I don''t + // want to block Service_Ctrl longer than necessary and the + // critical section will block it. + VALUE EventHookHash = rb_ivar_get(self, rb_intern("@event_hooks")); + if (EventHookHash != Qnil) { + VALUE val = rb_hash_aref(EventHookHash, INT2NUM(waiting_control_code)); + if(val!=Qnil) { + VALUE thread rb_thread_create(Service_Event_Dispatch, (void*) val); + } + } } - - // some seriously ugly flow control going on in here - if (waiting_control_code == SERVICE_CONTROL_STOP) + else { break; - + } waiting_control_code = IDLE_CONTROL_CODE; } } @@ -131,20 +127,19 @@ } // This is an ugly polling loop, be as polite as possible - rb_thread_polling(); - } - - for (;;) - { - VALUE list = rb_funcall(thread_group, cList, 0); - VALUE size = rb_funcall(list, cSize, 0); - if (NUM2INT(size) == 0) - break; - - // This is another ugly polling loop, be as polite as possible rb_thread_polling(); } - SetEvent(hStopCompletedEvent); + + // force service_stop call + { + VALUE EventHookHash = rb_ivar_get(self, rb_intern("@event_hooks")); + if (EventHookHash != Qnil) { + VALUE val = rb_hash_aref(EventHookHash, INT2NUM(SERVICE_CONTROL_STOP)); + if(val!=Qnil) { + VALUE thread = rb_thread_create(Service_Event_Dispatch, (void*) val); + } + } + } return Qnil; } @@ -198,16 +193,9 @@ if ((dwCtrlCode == SERVICE_CONTROL_STOP) || (dwCtrlCode == SERVICE_CONTROL_SHUTDOWN)) { - // how long should we give ruby to clean up? - // right now we give it forever :-) - while (WaitForSingleObject(hStopCompletedEvent, 500) == WAIT_TIMEOUT) - { - SetTheServiceStatus(dwState, NO_ERROR, 0, 0); + if (!SetEvent(hStopEvent)) { + SetTheServiceStatus(SERVICE_STOPPED, GetLastError(), 0, 0); } - - if (!SetEvent(hStopEvent)) - ErrorStopService(); - // Raise an error here? } } @@ -239,46 +227,82 @@ // Send status of the service to the Service Controller. if(!SetServiceStatus(ssh, &ss)){ - ErrorStopService(); + SetEvent(hStopEvent); } } -// Handle API errors or other problems by ending the service -void ErrorStopService(){ - - // If you have threads running, tell them to stop. Something went - // wrong, and you need to stop them so you can inform the SCM. - SetEvent(hStopEvent); - - // Stop the service. - SetTheServiceStatus(SERVICE_STOPPED, GetLastError(), 0, 0); -} - DWORD WINAPI ThreadProc(LPVOID lpParameter){ SERVICE_TABLE_ENTRY ste[] {{TEXT(""),(LPSERVICE_MAIN_FUNCTION)Service_Main}, {NULL, NULL}}; if (!StartServiceCtrlDispatcher(ste)){ - ErrorStopService(); - strcpy(error,ErrorDescription(GetLastError())); - // Very questionable here, we should generate an event - // and be polling in a green thread for the event, but - // this really should not happen so here we go - rb_raise(cDaemonError,error); + // no service to step, no service handle, no ruby + // exceptions, just terminate thread + return 1; } return 0; } static VALUE daemon_allocate(VALUE klass){ - EventHookHash = rb_hash_new(); - - thread_group = rb_class_new_instance(0, 0, - rb_const_get(rb_cObject, rb_intern("ThreadGroup"))); return Data_Wrap_Struct(klass, 0, 0, 0); } +static VALUE +daemon_mainloop_protect(VALUE self) +{ + // Call service_main method + if(rb_respond_to(self,rb_intern("service_main"))){ + rb_funcall(self,rb_intern("service_main"),0); + } + + return self; +} + +static VALUE +daemon_mainloop_ensure(VALUE self) +{ + int i; + + // signal both the ruby thread and service_main thread to terminate + SetEvent(hStopEvent); + + // wait for ALL ruby threads to exit + + for(i=1;TRUE;i++) + { + VALUE list = rb_funcall(rb_cThread, rb_intern("list"), 0); + + { + char buf[80]; + sprintf(buf, "num_threads = %u\n", RARRAY(list)->len); + } + + if (RARRAY(list)->len <= 1) + break; + + // This is another ugly polling loop, be as polite as possible + rb_thread_polling(); + + SetTheServiceStatus(SERVICE_STOP_PENDING, 0, i, 1000); + } + + // just one ruby thread + SetEvent(hStopCompletedEvent); + + // wait for the thread to stop BEFORE we close the hStopEvent + // handle + WaitForSingleObject(hThread, INFINITE); + + // Close the event handle (who cares if it succeeds, and + // we may be cleaning up after an exception, so let''s just + // let that exception fall through + CloseHandle(hStopEvent); + + return self; +} + /* * This is the method that actually puts your code into a loop and allows it * to run as a service. The code that is actually run while in the mainloop @@ -288,14 +312,19 @@ daemon_mainloop(VALUE self) { DWORD ThreadId; - HANDLE hThread; + HANDLE events[2]; + DWORD index; + VALUE result; + int status = 0; + VALUE EventHookHash; dwServiceState = 0; - // Save a couple symbols - cAdd = rb_intern("add"); - cList = rb_intern("list"); - cSize = rb_intern("size"); + // use a markable instance variable to prevent the garbage collector + // from freeing the hash before Ruby_Service_Ctrl exits, or just + // at any ole time while running the service + EventHookHash = rb_hash_new(); + rb_ivar_set(self, rb_intern("@event_hooks"), EventHookHash); // Event hooks if(rb_respond_to(self,rb_intern("service_stop"))){ @@ -358,11 +387,20 @@ } #endif + // calling init here so that init failures never even tries to + // start the service... of course that means that init methods + // must be very quick, because the SCM will be receiving no + // START_PENDING messages while init''s running - I may fix this + // later + if(rb_respond_to(self,rb_intern("service_init"))){ + rb_funcall(self,rb_intern("service_init"),0); + OutputDebugString(TEXT("mainloop: called service init\n")); + } + // Create the event to signal the service to start. hStartEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if(hStartEvent == NULL){ strcpy(error,ErrorDescription(GetLastError())); - ErrorStopService(); rb_raise(cDaemonError,error); } @@ -370,7 +408,6 @@ hStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if(hStopEvent == NULL){ strcpy(error,ErrorDescription(GetLastError())); - ErrorStopService(); rb_raise(cDaemonError,error); } @@ -378,47 +415,44 @@ hStopCompletedEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if(hStopCompletedEvent == NULL){ strcpy(error,ErrorDescription(GetLastError())); - ErrorStopService(); rb_raise(cDaemonError,error); } - // Create the green thread to poll for Service_Ctrl events - rb_thread_create(Ruby_Service_Ctrl, 0); - // Create Thread for service main hThread = CreateThread(NULL,0,ThreadProc,0,0,&ThreadId); if(hThread == INVALID_HANDLE_VALUE){ strcpy(error,ErrorDescription(GetLastError())); - ErrorStopService(); rb_raise(cDaemonError,error); } - if(rb_respond_to(self,rb_intern("service_init"))){ - rb_funcall(self,rb_intern("service_init"),0); - } - - SetEvent(hStartEvent); + events[0] = hThread; + events[1] = hStartEvent; - // Call service_main method - if(rb_respond_to(self,rb_intern("service_main"))){ - rb_funcall(self,rb_intern("service_main"),0); - } + // wait for the Service_Main function to either start + // the service OR terminate - while(WaitForSingleObject(hStopEvent, 1000) != WAIT_OBJECT_0) + while((index = WaitForMultipleObjects(2, events, FALSE, 1000)) =WAIT_TIMEOUT) { } - // Close the event handle and the thread handle. - if(!CloseHandle(hStopEvent)){ - strcpy(error,ErrorDescription(GetLastError())); - ErrorStopService(); - rb_raise(cDaemonError,error); + if (index == WAIT_OBJECT_0) { + // thread exited, so the show is off + rb_raise(cDaemonError,"Service_Main thread exited abnormally"); } - // Wait for Thread service main - WaitForSingleObject(hThread, INFINITE); + // from this point onward, stopevent must be triggered! - return self; + // Create the green thread to poll for Service_Ctrl events + rb_thread_create(Ruby_Service_Ctrl, (void *)self); + + result = rb_protect(daemon_mainloop_protect, self, &status); + if (status) { + // service_main raised an exception + daemon_mainloop_ensure(self); + rb_jump_tag(status); + } + // service_main exited cleanly + return daemon_mainloop_ensure(self); } /*