Friday, December 2, 2011

HTML Element Extractor

using System.Collections.Generic;
using System.Windows.Forms;
using mshtml;

namespace WindowsFormsApplication7
{
    public class HtmlElementExtractor
    {
        public static HashSet<string> HtmlElementsFound { get; set; }

         static HtmlElementExtractor()
        {
            HtmlElementsFound = new HashSet<string>();
        }

        public static List<string> GetAllInputElemnts(WebBrowser webBrowser, ElementType type)
        {
            var inputElements = new List<string>();

            if (webBrowser.Document != null)
            {
                foreach (HtmlElement element in webBrowser.Document.All)
                {

                    if (string.IsNullOrEmpty(element.Id)) continue;

                    switch (type)
                    {
                        case ElementType.Input:
                            if (element.DomElement is HTMLInputElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);
                           
                            }
                            break;
                        case ElementType.TextArea:
                            if (element.DomElement is HTMLTextAreaElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);

                            }
                            break;
                        case ElementType.Text:
                            if (element.DomElement is HTMLTextElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);

                            }
                            break;
                        case ElementType.Button:
                            if (element.DomElement is HTMLButtonElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);

                            }
                            break;
                    }

                    if (element.Children.Count > 0)
                    {
                        inputElements.AddRange(GetInputElementFromHtmlElement(type, element));
                    }

                }
                foreach (HtmlElement element in webBrowser.Document.Forms)
                {
                    foreach (HtmlElement s in element.All)
                    {
                        if (string.IsNullOrEmpty(s.Id)) continue;
                        switch (type)
                        {
                            case ElementType.Input:
                                if (s.DomElement is HTMLInputElementClass && !inputElements.Contains(s.Id) && !HtmlElementsFound.Contains(s.Id))
                                {
                                    HtmlElementsFound.Add(s.Id);
                                    inputElements.Add(s.Id);
                                }
                                break;
                            case ElementType.TextArea:
                                if (s.DomElement is HTMLTextAreaElementClass && !inputElements.Contains(s.Id) && !HtmlElementsFound.Contains(s.Id))
                                {
                                    HtmlElementsFound.Add(s.Id);
                                    inputElements.Add(s.Id);
                                }
                                break;
                            case ElementType.Text:
                                if (s.DomElement is HTMLTextElementClass && !inputElements.Contains(s.Id) && !HtmlElementsFound.Contains(s.Id))
                                {
                                    HtmlElementsFound.Add(s.Id);
                                    inputElements.Add(s.Id);
                                }
                                break;
                            case ElementType.Button:
                                if (s.DomElement is HTMLInputButtonElementClass && !inputElements.Contains(s.Id) && !HtmlElementsFound.Contains(s.Id))
                                {
                                    HtmlElementsFound.Add(s.Id);
                                    inputElements.Add(s.Id);
                                }
                                break;
                        }
                    }
                }
            }


            return inputElements;
        }

        public static List<string> GetInputElementFromHtmlElement(ElementType type, HtmlElement htmlElement)
        {
            var inputElements = new List<string>();

            if (htmlElement != null)
                foreach (HtmlElement element in htmlElement.All)
                {

                    if (string.IsNullOrEmpty(element.Id)) continue;

                    switch (type)
                    {
                        case ElementType.Input:
                            if (element.DomElement is HTMLInputElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);
                            }
                            break;
                        case ElementType.TextArea:
                            if (element.DomElement is HTMLTextAreaElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);
                            }
                            break;
                        case ElementType.Text:
                            if (element.DomElement is HTMLTextElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);
                            }
                            break;
                        case ElementType.Button:
                            if (element.DomElement is HTMLInputButtonElementClass && !inputElements.Contains(element.Id) && !HtmlElementsFound.Contains(element.Id))
                            {
                                HtmlElementsFound.Add(element.Id);
                                inputElements.Add(element.Id);
                            }
                            break;
                    }


                    if (element.Children.Count <= 0) continue;
                    foreach (HtmlElement childeren in element.Children)
                    {
                        inputElements.AddRange(GetInputElementFromHtmlElement(type, childeren));
                    }
                }

            return inputElements;
        }

        public static List<HtmlElement> GetAllHtmlElementTypes(string id, WebBrowser webBrowser, ElementType type)
        {
            var htmlElementCollection = new List<HtmlElement>();

            if (webBrowser.Document != null)
            {
                foreach (HtmlElement element in webBrowser.Document.All)
                {

                    if (string.IsNullOrEmpty(element.Id)) continue;

                    switch (type)
                    {
                        case ElementType.Input:
                            if (element.DomElement is HTMLInputElementClass && element.Id == id)
                                htmlElementCollection.Add(element);
                            break;
                        case ElementType.TextArea:
                            if (element.DomElement is HTMLTextAreaElementClass && element.Id == id)
                                htmlElementCollection.Add(element);
                            break;
                        case ElementType.Text:
                            if (element.DomElement is HTMLTextElementClass && element.Id == id)
                                htmlElementCollection.Add(element);
                            break;
                        case ElementType.Button:
                            if (element.DomElement is HTMLInputButtonElementClass && element.Id == id)
                                htmlElementCollection.Add(element);
                            break;
                    }



                    if (element.Children.Count > 0)
                    {
                        htmlElementCollection.AddRange(GetInputElementFromHtmlElementTypes(id, element, type));
                    }

                }
                foreach (HtmlElement element in webBrowser.Document.Forms)
                {
                    foreach (HtmlElement s in element.All)
                    {
                        if (string.IsNullOrEmpty(s.Id)) continue;
                        if (s.Id == id)
                            if (s.DomElement is HTMLInputElementClass)
                                htmlElementCollection.Add(s);
                    }
                }
            }


            return htmlElementCollection;
        }

        public static List<HtmlElement> GetInputElementFromHtmlElementTypes(string id, HtmlElement htmElement, ElementType type)
        {
            var inputElements = new List<HtmlElement>();

            if (htmElement != null)
                foreach (HtmlElement element in htmElement.All)
                {

                    if (string.IsNullOrEmpty(element.Id)) continue;
                    switch (type)
                    {
                        case ElementType.Input:
                            if (element.DomElement is HTMLInputElementClass && element.Id == id)
                                inputElements.Add(element);
                            break;
                        case ElementType.TextArea:
                            if (element.DomElement is HTMLTextAreaElementClass && element.Id == id)
                                inputElements.Add(element);
                            break;
                        case ElementType.Text:
                            if (element.DomElement is HTMLTextElementClass && element.Id == id)
                                inputElements.Add(element);
                            break;
                        case ElementType.Button:
                            if (element.DomElement is HTMLInputButtonElementClass && element.Id == id)
                                inputElements.Add(element);
                            break;
                    }

                    if (element.Children.Count <= 0) continue;
                    foreach (HtmlElement childeren in element.Children)
                    {
                        inputElements.AddRange(GetInputElementFromHtmlElementTypes(id, childeren, type));
                    }
                }

            return inputElements;
        }
    }
}