Get ReadyState from WebBrowser control without DoEvents
Below is a basic WinForms app code, illustrating how to wait for the DocumentCompleted
event asynchronously, using async/await
. It navigates to multiple pages, one after another. Everything is taking place on the main UI thread.
Instead of calling this.webBrowser.Navigate(url)
, it might be simulating a form button click, to trigger a POST-style navigation.
The webBrowser.IsBusy
async loop logic is optional, its purpose is to account (non-deterministically) for the page's dynamic AJAX code which may take place after window.onload
event.
using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace WebBrowserApp
{
public partial class MainForm : Form
{
WebBrowser webBrowser;
public MainForm()
{
InitializeComponent();
// create a WebBrowser
this.webBrowser = new WebBrowser();
this.webBrowser.Dock = DockStyle.Fill;
this.Controls.Add(this.webBrowser);
this.Load += MainForm_Load;
}
// Form Load event handler
async void MainForm_Load(object sender, EventArgs e)
{
// cancel the whole operation in 30 sec
var cts = new CancellationTokenSource(30000);
var urls = new String[] {
"http://www.example.com",
"http://www.gnu.org",
"http://www.debian.org" };
await NavigateInLoopAsync(urls, cts.Token);
}
// navigate to each URL in a loop
async Task NavigateInLoopAsync(string[] urls, CancellationToken ct)
{
foreach (var url in urls)
{
ct.ThrowIfCancellationRequested();
var html = await NavigateAsync(ct, () =>
this.webBrowser.Navigate(url));
Debug.Print("url: {0}, html: \n{1}", url, html);
}
}
// asynchronous navigation
async Task<string> NavigateAsync(CancellationToken ct, Action startNavigation)
{
var onloadTcs = new TaskCompletionSource<bool>();
EventHandler onloadEventHandler = null;
WebBrowserDocumentCompletedEventHandler documentCompletedHandler = delegate
{
// DocumentCompleted may be called several time for the same page,
// if the page has frames
if (onloadEventHandler != null)
return;
// so, observe DOM onload event to make sure the document is fully loaded
onloadEventHandler = (s, e) =>
onloadTcs.TrySetResult(true);
this.webBrowser.Document.Window.AttachEventHandler("onload", onloadEventHandler);
};
this.webBrowser.DocumentCompleted += documentCompletedHandler;
try
{
using (ct.Register(() => onloadTcs.TrySetCanceled(), useSynchronizationContext: true))
{
startNavigation();
// wait for DOM onload event, throw if cancelled
await onloadTcs.Task;
}
}
finally
{
this.webBrowser.DocumentCompleted -= documentCompletedHandler;
if (onloadEventHandler != null)
this.webBrowser.Document.Window.DetachEventHandler("onload", onloadEventHandler);
}
// the page has fully loaded by now
// optional: let the page run its dynamic AJAX code,
// we might add another timeout for this loop
do { await Task.Delay(500, ct); }
while (this.webBrowser.IsBusy);
// return the page's HTML content
return this.webBrowser.Document.GetElementsByTagName("html")[0].OuterHtml;
}
}
}
If you're looking to do something similar from a console app, here is an example of that.
The solution is simple:
// MAKE SURE ReadyState = Complete
while (WebBrowser1.ReadyState.ToString() != "Complete") {
Application.DoEvents();
}
// Move on to your sub-sequence code...
Dirty and quick.. I am a VBA guys, this logic has been working forever, just took me days and found none for C# but I just figured this out myself.
Following is my complete function, the objective is to obtain a segment of info from a webpage:
private int maxReloadAttempt = 3;
private int currentAttempt = 1;
private string GetCarrier(string webAddress)
{
WebBrowser WebBrowser_4MobileCarrier = new WebBrowser();
string innerHtml;
string strStartSearchFor = "subtitle block pull-left\">";
string strEndSearchFor = "<";
try
{
WebBrowser_4MobileCarrier.ScriptErrorsSuppressed = true;
WebBrowser_4MobileCarrier.Navigate(webAddress);
// MAKE SURE ReadyState = Complete
while (WebBrowser_4MobileCarrier.ReadyState.ToString() != "Complete") {
Application.DoEvents();
}
// LOAD HTML
innerHtml = WebBrowser_4MobileCarrier.Document.Body.InnerHtml;
// ATTEMPT (x3) TO EXTRACT CARRIER STRING
while (currentAttempt <= maxReloadAttempt) {
if (innerHtml.IndexOf(strStartSearchFor) >= 0)
{
currentAttempt = 1; // Reset attempt counter
return Sub_String(innerHtml, strStartSearchFor, strEndSearchFor, "0"); // Method: "Sub_String" is my custom function
}
else
{
currentAttempt += 1; // Increment attempt counter
GetCarrier(webAddress); // Recursive method call
} // End if
} // End while
} // End Try
catch //(Exception ex)
{
}
return "Unavailable";
}