﻿using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace crawlWebsiteAndExtractInfo
{
    public partial class frmCrawlWebsite : Form
    {
        public frmCrawlWebsite()
        {
            InitializeComponent();
        }

        private void btnCrawlAndExtract_Click(object sender, EventArgs e)
        {
            //step1: get html from url
            //http://www.songtaste.com/user/351979/
            string urlToCrawl = txbUrlToCrawl.Text;
            //generate http request
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(urlToCrawl);
            //use GET method to get url's html
            req.Method = "GET";
            //use request to get response
            HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
            string htmlCharset = "GBK";
            //use songtaste's html's charset GB2312 to decode html
            //otherwise will return messy code
            Encoding htmlEncoding = Encoding.GetEncoding(htmlCharset);
            StreamReader sr = new StreamReader(resp.GetResponseStream(), htmlEncoding);
            //read out the returned html
            string respHtml = sr.ReadToEnd();
            rtbExtractedHtml.Text = respHtml;
        }

        private void btnExtractInfo_Click(object sender, EventArgs e)
        {
            //step2: extract expected info
            //<h1 class="h1user">crifan</h1>
            string h1userP = @"<h1\s+class=""h1user"">(?<h1user>.+?)</h1>";
            Match foundH1user = (new Regex(h1userP)).Match(rtbExtractedHtml.Text);
            if (foundH1user.Success)
            {
                //extracted the expected h1user's value
                txbExtractedInfo.Text = foundH1user.Groups["h1user"].Value;
            }
            else
            {
                txbExtractedInfo.Text = "Not found h1 user !";
            }
        }

        private void lklTutorialUrl_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
        {
            string tutorialUrl = "http://www.crifan.com/crawl_website_html_and_extract_info_using_csharp";
            System.Diagnostics.Process.Start(tutorialUrl);
        }
    }
}
