-
-
Save hidegh/07d5588702e2b56a3fc2a3d73848d9f3 to your computer and use it in GitHub Desktop.
| using System; | |
| using System.Diagnostics; | |
| using System.IO; | |
| using System.Linq.Expressions; | |
| using System.Text; | |
| using System.Threading; | |
| using System.Threading.Tasks; | |
| namespace R | |
| { | |
| public static class ProcessEx | |
| { | |
| /// <summary> | |
| /// The article/forum: http://alabaxblog.info/2013/06/redirectstandardoutput-beginoutputreadline-pattern-broken/ | |
| /// - issues with BeginOutputReadLine and EOF (possibility to loose EOF) | |
| /// - issue with OutputDataReceived (hevy load) | |
| /// - FileStream (CopyAsynchTo & WriteAsynch) does support cancellation only at beginning... | |
| /// </summary> | |
| /// <param name="fileName"></param> | |
| /// <param name="arguments"></param> | |
| /// <param name="timeout"></param> | |
| /// <param name="standardInput"></param> | |
| /// <param name="standardOutput"></param> | |
| /// <param name="standardError"></param> | |
| /// <returns></returns> | |
| public static int ExecuteProcess( | |
| string fileName, | |
| string arguments, | |
| int timeout, | |
| Stream standardInput, | |
| Stream standardOutput, | |
| out string standardError) | |
| { | |
| int exitCode; | |
| using (var process = new Process()) | |
| { | |
| process.StartInfo.UseShellExecute = false; | |
| process.StartInfo.CreateNoWindow = true; | |
| process.StartInfo.FileName = fileName; | |
| process.StartInfo.Arguments = arguments; | |
| process.StartInfo.RedirectStandardInput = true; | |
| process.StartInfo.RedirectStandardOutput = true; | |
| process.StartInfo.RedirectStandardError = true; | |
| process.Start(); | |
| // | |
| // Write input stream...then close it! | |
| Object writerThreadLock = new object(); | |
| Thread writerThread = null; | |
| using (Task inputWriter = Task.Factory.StartNew(() => | |
| { | |
| try | |
| { | |
| // Mark as running... | |
| lock (writerThreadLock) | |
| writerThread = Thread.CurrentThread; | |
| // NOTE: Closing the input (process.StandardInput.BaseStream) after write op. is done (or aborted) is important! | |
| using (var processInputStream = process.StandardInput.BaseStream) | |
| standardInput.CopyTo(processInputStream); | |
| // Mark as finished | |
| lock (writerThreadLock) | |
| writerThread = null; | |
| } | |
| catch (ThreadAbortException) | |
| { | |
| // NOTE: to be able to "abort" the copy process and quit without additional exception we need to reset! | |
| Thread.ResetAbort(); | |
| } | |
| })) | |
| // | |
| // Read output stream and error string (both async)... | |
| using (Task<bool> processWaiter = Task.Factory.StartNew(() => process.WaitForExit(timeout))) | |
| using (Task outputReader = Task.Factory.StartNew(() => { process.StandardOutput.BaseStream.CopyTo(standardOutput); })) | |
| using (Task<string> errorReader = Task.Factory.StartNew(() => process.StandardError.ReadToEnd())) | |
| { | |
| // Check result (whether process finished) from processWaiter... | |
| bool processFinished = processWaiter.Result; | |
| // If we got timeout, we need to kill the process... | |
| if (!processFinished) | |
| { | |
| lock (writerThreadLock) | |
| { | |
| // NOTE: a non-null waitherThread signalizes that a inputWriter thread is still running (which we need to abort)... | |
| writerThread?.Abort(); | |
| } | |
| process.Kill(); | |
| } | |
| // NOTE: even after calling process kill (asynchronously) - not just on success - , make sure we wait for the process to finish. | |
| // See: https://msdn.microsoft.com/en-us/library/system.diagnostics.process.kill.aspx | |
| process.WaitForExit(); | |
| // Make sure everything is read from the streams... | |
| Task.WaitAll(outputReader, errorReader, inputWriter); | |
| // Timeout-ed process has to raise an exception... | |
| if (!processFinished) | |
| throw new TimeoutException("Process wait timeout expired"); | |
| // Success... | |
| standardError = errorReader.Result; | |
| exitCode = process.ExitCode; | |
| } | |
| } | |
| return exitCode; | |
| } | |
| } | |
| } |
C# Process RedirectStandardOutput pattern: BeginOutputReadLine + OutputDataReceived is broken 19
28 Jun 2013 | Default Tags: C# · Process
Source code is here: https://github.com/alabax/CsharpRedirectStandardOutput
I have a habit to review changes other guys do to my code. That’s helping me to maintain the quality and basically this covers my ass. One day I see a guy chainging my unit test’s header from [Fact] to [Fact(Skip = "Not a unittest")]. This unit test was rarely failing on the Continuous Integration server and when it happened before him he decided to turn off the test. If you are reading these lines don’t be a thoughtless selfish person don’t silently turn off other guy’s unit tests if they are failing. When I checked server’s logs I found another guy when he faced the test failing other day just did re-ran the build. Because he knew the test is failing occasionally. So don’t be apathetic selfish person and let the authors of unit tests know their tests are failing. Being ready to accept and share bugs is essential for developers. Period.
I knew the test was failing but it seemed to me the bug is somewhere else. I added more debug output and was waiting for it to fail again. I could not think this issue is something serious that needs investigation. But enough lyrics, let’s jump to…
Mechanics
So how do you typically create a background process, set an execution timeout for it and read its output streams? I think it looks something like this:
public static int ExecuteProcess(
string fileName,
string arguments,
int timeout,
out string standardOutput,
out string standardError)
{
int exitCode;
var standardOutputBuilder = new StringBuilder();
var standardErrorBuilder = new StringBuilder();
using (var process = new Process())
{
process.StartInfo.UseShellExecute = false;
process.StartInfo.CreateNoWindow = true;
process.StartInfo.RedirectStandardError = true;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.FileName = fileName;
process.StartInfo.Arguments = arguments;
process.OutputDataReceived += (sender, args) =>
{
if (args.Data != null)
{
standardOutputBuilder.AppendLine(args.Data);
}
};
process.ErrorDataReceived += (sender, args) =>
{
if (args.Data != null)
{
standardErrorBuilder.AppendLine(args.Data);
}
};
process.Start();
// This is our place of interest
process.BeginOutputReadLine();
process.BeginErrorReadLine();
if (process.WaitForExit(timeout))
{
exitCode = process.ExitCode;
}
else
{
process.Kill();
throw new TimeoutException("Process wait timeout expired");
}
}
standardOutput = standardOutputBuilder.ToString();
standardError = standardErrorBuilder.ToString();
return exitCode;
}Everything looks ordinary. You prepare the ProcessStartInfo, tell that you want to RedirectStandardError and RedirectStandardOutput. You subscribe to OutputDataReceived and ErrorDataReceived to capture standard output and standard error streams from the process. Then you run the process with process.Start(). After that it is possible to tell runtime to start firing OutputDataReceived and ErrorDataReceived events (check why is it so). You do that by calling process.BeginOutputReadLine() and process.BeginErrorReadLine().
Update: as Jacob Korsgaard pointed out in comments the above code can be easily fixed by invoking process.WaitForExit() without arguments second time after the first call with timeout argument returns true. This waits until streams flush and should solve the problem. (Do you have failing code that brought you here? Please share your feedback how it helped!) If you are interested in alternative solution, keep reading.
Notice the comment line I placed between Start and BeginOutputReadLine calls, this place was problematic for us. If the created process manages to write to its output stream quickly after it gets created that output may just get discarded and never get to the OutputDataReceived handler because of some race conditions in BeginOutputReadLine.
You can try the following test to catch the race condition:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Xunit;
namespace RedirectStandardOutputLibrary.Tests
{
public class FlawedPatternTests
{
private const int DefaultTimeout = 1000;
private const int Runs = 1000;
[Fact]
public void TestItBreaks()
{
for (int i = 0; i < Runs; ++i)
{
string testString = Guid.NewGuid().ToString();
string standardOutput, standardError;
int returnCode = FlawedPattern.ExecuteProcess(
"cmd.exe",
string.Format("/c echo {0}", testString),
DefaultTimeout,
out standardOutput,
out standardError);
Assert.Equal(0, returnCode);
Assert.Equal(testString, standardOutput.Trim()); // should fail here
Assert.Equal(string.Empty, standardError);
}
}
}
}If you are lucky enough you’ll see this bug – standardOutput will be equal to "" or string.Empty. I’m not that lucky (though our CI server is) so I add Thread.Sleep(100) in ExecuteProcess code between calls to Start and BeginOutputReadLine.
process.Start();
Thread.Sleep(100);
process.BeginOutputReadLine();
process.BeginErrorReadLine();With that it fails after a few loops. Interestingly, it’s not failing on a first iteration.
Obviously not having Thread.Sleep() does not solve any issues. You won’t get some of the output in random cases and it leads to hard-to-reproduce bugs.
Oh, by the way I’m targeting .NET framework 4.0 with 4.5 installed.
Also as OutputDataReceived and ErrorDataReceived are only triggered on a newline or EOF. It might be a problem for the caller that expects to read anything when the child process has not written newline or not closed the output stream and is waiting for the input stream or other event. See here and here for details. But it is not so important for us – we expect to read all output unconditionally. Though there’s a nuance. If the process puts on the output the last line as "Hello World!\r\n" or as "Hello World!" does not make any difference for the events. In both cases string arrives as "Hello World!" and there’s no way to find out if there was a newline at the end of last line. Sometimes this matters.
Some other day I’ve seen a guy sharing his experience on StackOverflow that OutputDataReceived and ErrorDataReceived events are missing inputs under heavy load. My small investigation partially proves that. It is better to avoid using those events. And it takes us to the…
Remedy
What basically needs to be done is to fall back to Process.StandardOutput and Process.StandardError streams. Of course it’s not as easy as to write:
process.Start();
standardOutput = process.StandardOutput.ReadToEnd(); // wrong, deadlock is possible here
standardError = process.StandardError.ReadToEnd();Trying to read streams like this can lead to deadlocks. That’s well described in Remarks section of ProcessStartInfo.RedirectStandardOutput documentation. Streams have to be read in parallel and it requires writing asychronous code which may seem frightening. That’s why many people end up using the pattern of OutputDataReceived, ErrorDataReceived events I started with.
I do not usually advertise MS technology (rather doing the opposite) however I would recommend Task Parallel Library that works nicely here. The simplified version of the pattern (when it’s not required to terminate a process if it is running more than allowed timeout) ends up in:
public static void ExecuteProcess(
string fileName,
string arguments,
out string standardOutput,
out string standardError)
{
using (var process = new Process())
{
process.StartInfo.UseShellExecute = false;
process.StartInfo.CreateNoWindow = true;
process.StartInfo.RedirectStandardError = true;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.FileName = fileName;
process.StartInfo.Arguments = arguments;
process.Start();
//Thread.Sleep(100);
using (Task processWaiter = Task.Factory.StartNew(() => process.WaitForExit()))
using (Task<string> outputReader = Task.Factory.StartNew(() => process.StandardOutput.ReadToEnd()))
using (Task<string> errorReader = Task.Factory.StartNew(() => process.StandardError.ReadToEnd()))
{
Task.WaitAll(processWaiter, outputReader, errorReader);
standardOutput = outputReader.Result;
standardError = errorReader.Result;
}
}
}This is short, self-explaining and it works in C# 4 and .NET 4.0. You can check even with uncommented Thread.Sleep it’s not missing output. Tests are available with the source code.
By the way read this article on when it’s good to dispose Tasks and when it’s not required. The code above is just the right case for .NET 4.0 to have them disposed with usings.
Upgrading approach
When going in a lager scale we need an execution timeout for the process. To properly do this we would need to cancel reading from standard streams if the wait timeout is met. Because we don’t want read Tasks to hang around and fail with Exception when we try to Kill the process. However cancelling is not possible because:
If we are to use CancellationToken we face that Read*Async methods taking CancellationToken do not exist for StreamReader class which is the type of Process.StandardOutput and Process.StandardError.
The BaseStream property of Process.StandardOutput or Process.StandardError is a FileStream and it has ReadAsync method that accepts a CancellationToken but that one effectively does not support cancellation (see this discussion). In runtime FileStream.IsAsync property is false for standard output and error streams. And the execution flows to the method where cancellation is checked only at the beginning (disassembled with ILSpy):
// System.IO.Stream
public virtual Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
{
if (!cancellationToken.IsCancellationRequested)
{
return this.BeginEndReadAsync(buffer, offset, count);
}
return Task.FromCancellation<int>(cancellationToken);
}Maybe there’s a way to use unmanaged code to be able to cancel stream/file operations or (which is better) to check if stream has data available to read without blocking thread. But I’m not aware of that.
But why does it have to be so complex at all? :) Why don’t just try the following straightforward variant.
public static int ExecuteProcess(
string fileName,
string arguments,
int timeout,
out string standardOutput,
out string standardError)
{
int exitCode;
using (var process = new Process())
{
process.StartInfo.UseShellExecute = false;
process.StartInfo.CreateNoWindow = true;
process.StartInfo.RedirectStandardError = true;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.FileName = fileName;
process.StartInfo.Arguments = arguments;
process.Start();
//Thread.Sleep(100);
using (Task<bool> processWaiter = Task.Factory.StartNew(() => process.WaitForExit(timeout)))
using (Task<string> outputReader = Task.Factory.StartNew((Func<object, string>)ReadStream, process.StandardOutput))
using (Task<string> errorReader = Task.Factory.StartNew((Func<object, string>)ReadStream, process.StandardError))
{
bool waitResult = processWaiter.Result;
if (!waitResult)
{
process.Kill();
}
Task.WaitAll(outputReader, errorReader);
// if waitResult == true hope those already finished or will finish fast
// otherwise wait for taks to complete to be able to dispose them
if (!waitResult)
{
throw new TimeoutException("Process wait timeout expired");
}
exitCode = process.ExitCode;
standardOutput = outputReader.Result;
standardError = errorReader.Result;
}
}
return exitCode;
}
private static string ReadStream(object streamReader)
{
string result = ((StreamReader)streamReader).ReadToEnd();
return result;
} // put breakpoint on this lineEventually it works. I created ReadStream method on purpose so you can place a breakpoint to check how it works in case process gets killed. Run unit test AdvancedPatternTests.TestTimeout for this. When the process is killed the data already written to the stream gets returned and no exception is thrown. If you comment-out process.Kill() and Task.WaitAll(outputReader, errorReader) you can still observe how ReadStream methods successfully finish after Process object is disposed.
So my fears that Process.StandardOutput.ReadToEnd() (same for StandardError of course) throw if process is killed or disposed seem not to have a justification. I suspect that try/catch is needed in ReadStream for some edge-cases. And leaving it as is. Maybe me or somebody will come around and give all the stuff good testing. But for now I’m going to blindly update production to utilize this fresh new approach. ;)
======
19 thoughts on “C# Process RedirectStandardOutput pattern: BeginOutputReadLine + OutputDataReceived is broken”
--
Reply troy Oct 11,2013 07:50
Thanks for this post, I ran into this (I think with 4.5 upgrade) and it’s DRIVING ME NUTS ARGGH! … how annoying! :)
Pingback: Getting output from System.Diagnostics.Process ain’t easy! | The Hive Mind | test driven hosting infrastructure, stuff like that.
--
Reply Sparky Apr 1,2014 14:39
Great post with very useful examples. Thanks!
--
Reply Danny Apr 25,2014 04:17
I want to combine both standard output and standard error into a chronological stream . Let me clarify :
Lines of strings from a process are arriving one by one at both standard output and standard error . Is there a way to combine both standard output and standard output , but preserving the time of arrival of each line of string ?
--
Reply Michael Sugakov Apr 27,2014 23:21
Hi Danny,
It is not hard to do that, just change the tasks function collect lines read with ReadLine in a single StringWriter or StringBuilder. Check this for example https://gist.github.com/alabax/11353282
--
Reply Jacob Korsgaard May 28,2014 16:58
I think all your issues are based on you using Async stream redirect with WaitForExit(int). You aren’t guaranteed that the streams have flushed when using a timeout on your WaitForExit. The solution is to do a WaitForExit() after you waited with timeout, that’ll ensure the streams are flushed. See remarks here: http://msdn.microsoft.com/en-us/library/ty0d8k56(v=vs.110).aspx
--
Reply Michael Sugakov May 29,2014 00:08
Thanks for sharing this wonderful nuance Jacob.
What it means here one can modify the first ExecuteProcess this way:
process.Start();
// Thread.Sleep(100);
process.BeginOutputReadLine();
process.BeginErrorReadLine();
if (process.WaitForExit(timeout))
{
exitCode = process.ExitCode;
process.WaitForExit(); // add this
}My small testing shows that with the change it captures output even if Thread.Sleep call is uncommented. Unfortunately I don’t have access to the original repro case so can’t confirm that it works universally, hopefully it does.
Reply Alastair Maw Jun 21,2014 00:27
Or simpler:
StringBuilder stdout = new StringBuilder();
StringBuilder stderr = new StringBuilder();
process.StartInfo.RedirectStandardError = true;
process.StartInfo.RedirectStandardOutput = true;
process.OutputDataReceived += (sender, e) => { stdout.Append(e.Data); };
process.ErrorDataReceived += (sender, e) => { stderr.Append(e.Data); };
process.EnableRaisingEvents = true;
process.Start();
process.WaitForExit();Reply Michael Sugakov Jun 23,2014 19:31
Alastair, the subject of this post is to spawn the process and wait for its completion for certain limited amount of time, not infinitely like in your example.
There is EnableRaisingEvents property being set but there are no calls to BeginOutputReadLine, BeginErrorReadLine. Are you sure this code collects output of a process?
Reply Ron Harding Jul 24,2014 21:30
I have successfully used process in the past calling various command line processes, capturing associated output. Sometimes these processes had to run for extended periods of time. I did have occasional problems where the process would lock up, and the solution to that problem is the following;
when you start a process, be sure to record the associated process ID and/or the process name.
in the try/catch/finally when a process exception is caught, tell windows to kill the process then restart it. I saw in process a ‘kill’ command in there now. In the past i used ‘taskkill’ something that only lives in MS windows professional and above. so this was system that had to run several days. i found after running, the process stopped/restarted a few times in the log file.
Additionally, onto problems with process i would also use AutoResetEvent/ManualResetEvent, consider using multi-threading blocking calls possibly.
Reply Kalen Sep 15,2014 17:35
Thanks for this post – I’m using the approach it in a Jenkins / CI context and my test job is really robust now!
Pingback: O hai let me wanna-be! pe Trilema - Un blog de Mircea Popescu.
Reply Johnathan Kastner Jan 23,2015 20:25
Excellent example and progression – you probably saved me hours of tinkering. Thanks.
Reply David Dikman Sep 15,2015 15:49
Thanks a bunch for this, unfortunately it took me far too long to find.
Reply Spider May 5,2016 14:38
After reading this I’m still confused as to which is the correct way. Are you saying I can fix the first example by the suggestion by Jacob Korsgaard? If so can you explain where this goes please as I cont figure out exactly what Jacob Korsgaard means?
Reply Michael Sugakov May 6,2016 12:40
Yes, please use Jacob’s way. Here you can find modified code for the first sample which should correctly wait for standard error and standard output streams: https://gist.github.com/alabax/08da3044b899b86d8af316f8e426a485#file-sample-cs-L46 . The only change is line 46 added.
Please note that I can’t verify this as I am not doing C# for couple of years. Hopefully it works correct because it follows recommended way per documentation (https://msdn.microsoft.com/en-us/library/fb4aw7b8%28v=vs.110%29.aspx see Remarks section).
Reply Rayo Aug 16,2016 17:47
It seems to me there are two issues with the original pattern as you have pointed out in this article. Loss of trailing output (remedied by the additional call to WaitForExit()) but also the loss of leading output (// This is our place of interest). If you are recommending the use of the solution as indicated by Jacob, then surely it still leaves the latter case unresolved?
Reply b Aug 17,2016 19:01
I think there’s an important row missing (although comment related to is on line 35 and 36).
Before Task.WaitAll(outputReader, errorReader); we can safely call process.WaitForExit(); for bot success and timeout scenario
// NOTE: even after calling process kill (asynchronously) – not just on success – , make sure we wait for the process to finish.
// See: https://msdn.microsoft.com/en-us/library/system.diagnostics.process.kill.aspx
process.WaitForExit();Reply b Aug 18,2016 12:01
There’s one thing that i don’t like with the solution: it uses strings. Although you can change it’s binary representation form (UTF8/Ascii), it won’t help you if the process is supplying you with a binary data that isn’t related to strings, so conversions via encoding does not make sense.
For this 2 reasons 1′ve changed the way I read the output and the error stream:
using (Task outputReader = Task.Factory.StartNew(() => { var ms = new MemoryStream();process.StandardOutput.BaseStream.CopyTo(ms); return ms;}))
using (Task errorReader = Task.Factory.StartNew(() => { var ms = new MemoryStream(); process.StandardError.BaseStream.CopyTo(ms); return ms; }))And in the final code I get streams (bytes) which I also return (so the functions signature needs also a change):
outputMemoryStream = outputReader.Result
errorMemoryStream = errorReader.ResultBtw: you could supply those return memory streams (or any IStream) via parameter and use it inside the task (instead of creating a new) – so taking care of disposal is then left to the outside process.
Another similar threads: