【背景】
和:
【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前
类似,但是是现在,2013-09-12,
网站已经改版,然后重新写了代码,去爬取网站的。
【FiverComScraper 代码】
1.截图:
【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之前
一样,就不贴了。
2.完整项目代码下载:
FiverrComScraper_2013-09-12_afterWebsiteChange.7z
3.源码分享:
(1)frmFiverrComScraper.cs
/*
* [File]
* frmFiverrComScraper.cs
*
* [Function]
* scrape fiverr.com
*
* [Note]
*
* [update]
* 2013-09-12
*
* [Author]
* Crifan Li
*
* [Contact]
* https://www.crifan.com/contact_me/
*
* [History]
*/
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Xml;
using System.IO;
using System.Web;
using Excel = Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop.Excel;
using HtmlAgilityPack;
/*
* icons:
*
* search/find
* http://www.easyicon.cn/icondetail/106/
*
* stop
* http://www.easyicon.cn/icondetail/568811/
*
* excel
* http://www.easyicon.cn/icondetail/1087666/
*
* csv
* http://www.easyicon.cn/icondetail/558199/
*
* help
* http://www.easyicon.cn/icondetail/12270/
*/
namespace FiverComScraper
{
public partial class frmFiverrComScraper : Form
{
const string fiverrComDomain = "http://fiverr.com";
public crifanLib crifanLib;
public DataGridViewButtonColumn gigUrlColumn = null;
public static int girUrlColumnIdx = 12;
//need get more gig to scrape or not
bool needGetMore = true;
enum search_status
{
SEARCH_STATUS_STOPPED,
SEARCH_STATUS_SEARCHING,
SEARCH_STATUS_PAUSED
};
search_status curSearchStatus = search_status.SEARCH_STATUS_STOPPED;
public struct search_info
{
public int pageNum;
public string searchUrl;
public string searchRespHtml;
public HtmlAgilityPack.HtmlDocument htmlDoc;
//public XmlNamespaceManager m;
//public HtmlNodeCollection gigDataList;
//public int nodeIdx;
public Object[] itemObjList;
public int curItemIdx;
};
search_info curSearchInfo = new search_info();
public struct gigSearchItemInfo
{
//{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >"},
public string title;
public string title_full;
public int duration;
public string price;
public int rating;
public int rating_count;
public bool is_featured;
public int gig_id;
public string gig_url;
public string img_medium;
public string video_thumb;
public string seller_name;
public string seller_created_at;
public string seller_country_name;
public string seller_country;
public string seller_url;
public string seller_level; //null/"level_two_seller"/"top_rated_seller"/
public string gig_image;
};
public frmFiverrComScraper()
{
AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve);
InitializeComponent();
crifanLib = new crifanLib();
gigUrlColumn = new DataGridViewButtonColumn();
}
//for load embedded dll
System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args)
{
string dllName = args.Name.Contains(",") ? args.Name.Substring(0, args.Name.IndexOf(',')) : args.Name.Replace(".dll", "");
dllName = dllName.Replace(".", "_");
if (dllName.EndsWith("_resources")) return null;
System.Resources.ResourceManager rm = new System.Resources.ResourceManager(GetType().Namespace + ".Properties.Resources", System.Reflection.Assembly.GetExecutingAssembly());
byte[] bytes = (byte[])rm.GetObject(dllName);
return System.Reflection.Assembly.Load(bytes);
}
private void frmFiverrComScraper_Load(object sender, EventArgs e)
{
//DataGridView init
dgvSearchResult.ColumnCount = 12;
dgvSearchResult.RowHeadersWidth = 60;
dgvSearchResult.RowHeadersDefaultCellStyle.Alignment = DataGridViewContentAlignment.MiddleCenter;
dgvSearchResult.RowHeadersWidthSizeMode = DataGridViewRowHeadersWidthSizeMode.DisableResizing;
dgvSearchResult.AutoSizeColumnsMode = DataGridViewAutoSizeColumnsMode.None;
dgvSearchResult.AutoSizeRowsMode = DataGridViewAutoSizeRowsMode.AllCellsExceptHeaders;
//(1)title
dgvSearchResult.Columns[0].AutoSizeMode = DataGridViewAutoSizeColumnMode.Fill;
dgvSearchResult.Columns[0].HeaderText = "Title";
dgvSearchResult.Columns[0].Width = 100;
//(2)seller rating ( based on 1-100% format )
dgvSearchResult.Columns[1].HeaderText = "Seller Rating";
dgvSearchResult.Columns[1].Width = 49;
//(3)estimated delivery ( based on 24 hours - 7days format )
dgvSearchResult.Columns[2].HeaderText = "Estimated Delivery";
dgvSearchResult.Columns[2].Width = 66;
//(4)gig rating ( based on 1-100% )
dgvSearchResult.Columns[3].HeaderText = "Gig Rating";
dgvSearchResult.Columns[3].Width = 47;
//(5)orders in que ( based on 0-9999 format )
dgvSearchResult.Columns[4].HeaderText = "Orders in Queue";
dgvSearchResult.Columns[4].Width = 54;
//(6)level of the seller ( 1-3 )
dgvSearchResult.Columns[5].HeaderText = "Seller Level";
dgvSearchResult.Columns[5].Width = 47;
//(7)haz video ( yes or no )
dgvSearchResult.Columns[6].HeaderText = "Has Video";
dgvSearchResult.Columns[6].Width = 42;
//(8)express gigs (yes or no )
dgvSearchResult.Columns[7].HeaderText = "Is Express Gig";
dgvSearchResult.Columns[7].Width = 55;
//(9)country flag ( display county flag )
dgvSearchResult.Columns[8].HeaderText = "Country Flag";
dgvSearchResult.Columns[8].Width = 106;
//(10)+ve reviews and -ve reviews ( based on 1-9999 )
dgvSearchResult.Columns[9].HeaderText = "Positive Reviews";
dgvSearchResult.Columns[9].Width = 57;
dgvSearchResult.Columns[10].HeaderText = "Negative Reviews";
dgvSearchResult.Columns[10].Width = 60;
//(11)top rated seller ( yes or no )
dgvSearchResult.Columns[11].HeaderText = "Is Top Rated Seller";
dgvSearchResult.Columns[11].Width = 50;
////(12)gig url
//dgvSearchResult.Columns[12].HeaderText = "Gig Url";
//dgvSearchResult.Columns[12].Width = 106;
// Add a button column
gigUrlColumn.HeaderText = "Gig Url";
//gigUrlColumn.Name = "Gig Url name";
gigUrlColumn.Text = "Buy Now";
//gigUrlColumn.UseColumnTextForButtonValue = true;
gigUrlColumn.Width = 106;
dgvSearchResult.Columns.Add(gigUrlColumn);
//this.WindowState = FormWindowState.Maximized;
updateUI();
}
//update UI according current status
private void updateUI()
{
if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED)
{
btnSearch.Enabled = true;
btnSearch.Text = "Search";
btnPause.Enabled = false;
btnStop.Enabled = false;
}
else if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
{
btnSearch.Enabled = true;
btnSearch.Text = "Continue Search";
btnPause.Enabled = false;
btnStop.Enabled = true;
}
else if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING)
{
btnSearch.Enabled = false;
btnSearch.Text = "Searching";
btnPause.Enabled = true;
btnStop.Enabled = true;
}
}
private void processEachGitItemInfo(gigSearchItemInfo gigItemInfo)
{
gigInfo singleGigInfo = new gigInfo();
//(12)gig url
//gigUrl
singleGigInfo.gigUrl = fiverrComDomain + gigItemInfo.gig_url;
string respGigHtml = crifanLib.getUrlRespHtml_multiTry(singleGigInfo.gigUrl);
HtmlNode rootNode = crifanLib.htmlToHtmlDoc(respGigHtml).DocumentNode;
//(1)title
singleGigInfo.title = gigItemInfo.title_full;
//(2)seller rating ( based on 1-100% format )
//http://fiverr.com/betaomicronalph/write-seo-articles-for-you
//<div class="stats-row stats-row-ratings cf">
// <div class="stat ">91%<small>positive rating</small></div>
// <div class="stat-sml">by</div>
// <div class="stat">590<small>votes</small></div>
//</div>
HtmlNode positiveRaingNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-ratings cf']/div[@class='stat ']");
if (null != positiveRaingNode)
{
//"100%positive rating"
string ratingPercentStr = "";
if (crifanLib.extractSingleStr(@"(\d+)%", positiveRaingNode.InnerText, out ratingPercentStr))
{
singleGigInfo.sellerRating = Int32.Parse(ratingPercentStr);
}
else
{
}
}
else
{
}
//(3)estimated delivery ( based on 24 hours - 7days format )
singleGigInfo.estimatedDeliveryStr = gigItemInfo.duration.ToString() + " days";
//(4)gig rating ( based on 1-100% )
singleGigInfo.gigRating = gigItemInfo.rating * 10;
//(5)orders in que ( based on 0-9999 format )
//http://fiverr.com/trickyguy/be-your-seo-teacher
//<div class="stats-row stats-row-locked icn-orders cf">
// <div class="stat">0</div>
// <div class="stat"><small>orders<br>in queue</small></div>
//</div>
HtmlNode icnOrdersCfStatNode = rootNode.SelectSingleNode("//div[@class='stats-row stats-row-locked icn-orders cf']/div[@class='stat']");
if (null != icnOrdersCfStatNode)
{
string orderInQueuesNumStr = icnOrdersCfStatNode.InnerText; //"0"
singleGigInfo.ordersInQueue = Int32.Parse(orderInQueuesNumStr);
}
else
{
}
//(6)level of the seller ( 1-3 )
//(11)top rated seller ( yes or no )
singleGigInfo.isTopRatedSeller = false;
string strLevel = gigItemInfo.seller_level;
switch(strLevel)
{
case "level_one_seller":
singleGigInfo.sellerLevel = 1;
break;
case "level_two_seller":
singleGigInfo.sellerLevel = 2;
break;
case "top_rated_seller":
singleGigInfo.sellerLevel = 3;
singleGigInfo.isTopRatedSeller = true;
break;
default:
singleGigInfo.sellerLevel = 0;
break;
}
//(7)haz video ( yes or no )
singleGigInfo.hasVideo = (!string.IsNullOrEmpty(gigItemInfo.video_thumb)); //false
//(8)express gigs (yes or no )
singleGigInfo.isExpressGig = gigItemInfo.is_featured;
//(9)country flag ( display county flag )
singleGigInfo.coutryFlag = gigItemInfo.seller_country_name; //"India"
//(10)+ve reviews and -ve reviews ( based on 1-9999 )
//http://fiverr.com/trickyguy/be-your-seo-teacher
//<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
// <span class="summary summary-pos" itemprop="ratingValuePositive" content="5.0">7</span>
// <span class="summary summary-neg" itemprop="reviewCount" content="0">0</span>
//</div>
//http://fiverr.com/betaomicronalph/write-seo-articles-for-you
//<div class="reviews-summary" itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
// <span class="summary summary-pos" itemprop="ratingValuePositive" content="4.6">537</span>
// <span class="summary summary-neg" itemprop="reviewCount" content="53">53</span>
//</div>
HtmlNode summaryPosNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-pos']");
if (null != summaryPosNode)
{
string posibiteValue = summaryPosNode.InnerText; //"7"
singleGigInfo.positiveReviews = Int32.Parse(posibiteValue);
}
else
{
singleGigInfo.positiveReviews = 0;
}
HtmlNode summaryNegNode = rootNode.SelectSingleNode("//div[@class='reviews-summary' and @itemprop='aggregateRating']/span[@class='summary summary-neg']");
if (null != summaryNegNode)
{
string negativeValue = summaryNegNode.InnerText; //"0"
singleGigInfo.negativeReviews = Int32.Parse(negativeValue);
}
else
{
singleGigInfo.negativeReviews = 0;
}
storeGigInfo(singleGigInfo);
//update UI
System.Windows.Forms.Application.DoEvents();
}
public struct gigInfo
{
public string title;
public int sellerRating;
public string estimatedDeliveryStr;
public int gigRating;
public int ordersInQueue;
public int sellerLevel;
public bool hasVideo;
public bool isExpressGig;
public string coutryFlag;
public int positiveReviews;
public int negativeReviews;
public bool isTopRatedSeller;
public string gigUrl;
};
private void processSingleGigSearchItemObject(Object singleGigItemObject)
{
Dictionary<string, Object> itemDict = (Dictionary<string, Object>)singleGigItemObject;
gigSearchItemInfo itemInfo = new gigSearchItemInfo();
Object objItemTitle;
itemDict.TryGetValue("title", out objItemTitle);
itemInfo.title = objItemTitle.ToString();
Object objItemTitleFull;
itemDict.TryGetValue("title_full", out objItemTitleFull);
itemInfo.title_full = objItemTitleFull.ToString();
Object objItemDuration;
itemDict.TryGetValue("duration", out objItemDuration);
itemInfo.duration = Int32.Parse(objItemDuration.ToString());
Object objItemPrice;
itemDict.TryGetValue("price", out objItemPrice);
itemInfo.price = objItemPrice.ToString();
Object objItemRating;
itemDict.TryGetValue("rating", out objItemRating);
itemInfo.rating = Int32.Parse(objItemRating.ToString());
Object objItemRatingCount;
itemDict.TryGetValue("rating_count", out objItemRatingCount);
itemInfo.rating_count = Int32.Parse(objItemRatingCount.ToString());
Object objItemIsFeatured;
itemDict.TryGetValue("is_featured", out objItemIsFeatured);
itemInfo.is_featured = Boolean.Parse(objItemIsFeatured.ToString());
Object objItemGigId;
itemDict.TryGetValue("gig_id", out objItemGigId);
itemInfo.gig_id = Int32.Parse(objItemGigId.ToString());
Object objItemGigUrl;
itemDict.TryGetValue("gig_url", out objItemGigUrl);
itemInfo.gig_url = objItemGigUrl.ToString();
Object objItemImgMedium;
itemDict.TryGetValue("img_medium", out objItemImgMedium);
itemInfo.img_medium = objItemImgMedium.ToString();
Object objItemVideoThumb;
itemDict.TryGetValue("video_thumb", out objItemVideoThumb);
//Convert.ToBoolean
//if (Boolean.TryParse(objItemVideoThumb, out bVideoThumb))
//if(objItemVideoThumb.GetType() == typeof(bool))
if (objItemVideoThumb is bool)
{
//normal:
//"video_thumb":false
bool bVideoThumb = (bool)objItemVideoThumb;
if (false == bVideoThumb)
{
itemInfo.video_thumb = string.Empty;
}
else
{
//unexpected ???
itemInfo.video_thumb = string.Empty;
}
}
else if (objItemVideoThumb is string)
{
//special:
//"video_thumb":"<img src=\\"http://static.dmcloud.net/4e5bf73e94a6f629c900461b/5172ce9c06361d76ae000218/thumb-162x121-f.jpeg\\"
string strVideoThumb = (string)objItemVideoThumb;
itemInfo.video_thumb = strVideoThumb;
}
Object objItemSellerName;
itemDict.TryGetValue("seller_name", out objItemSellerName);
itemInfo.seller_name = objItemSellerName.ToString();
Object objItemSellerCreatedAt;
itemDict.TryGetValue("seller_created_at", out objItemSellerCreatedAt);
itemInfo.seller_created_at = objItemSellerCreatedAt.ToString();
Object objItemSellerCountryName;
itemDict.TryGetValue("seller_country_name", out objItemSellerCountryName);
itemInfo.seller_country_name = objItemSellerCountryName.ToString();
Object objItemSellerCountry;
itemDict.TryGetValue("seller_country", out objItemSellerCountry);
itemInfo.seller_country = objItemSellerCountry.ToString();
Object objItemSellerUrl;
itemDict.TryGetValue("seller_url", out objItemSellerUrl);
itemInfo.seller_url = objItemSellerUrl.ToString();
Object objItemSellerLevel;
itemDict.TryGetValue("seller_level", out objItemSellerLevel);
if (objItemSellerLevel != null)
{
itemInfo.seller_level = objItemSellerLevel.ToString();
}
else
{
//may be null
itemInfo.seller_level = string.Empty;
}
Object objItemGigImage;
itemDict.TryGetValue("gig_image", out objItemGigImage);
itemInfo.gig_image = objItemGigImage.ToString();
//itemInfoList.Add(itemInfo);
processEachGitItemInfo(itemInfo);
}
private void btnSearch_Click(object sender, EventArgs e)
{
string respHtml = "";
Dictionary<string, string> headerDict;
//STEP1: access main page
string fiverMainUrl = "http://fiverr.com/";
respHtml = crifanLib.getUrlRespHtml_multiTry(fiverMainUrl);
HtmlAgilityPack.HtmlDocument htmlDoc = crifanLib.htmlToHtmlDoc(respHtml);
HtmlNode rootNode = htmlDoc.DocumentNode;
//1.extract category_id
//<div class="gig-carousel gallery loading cf carousel-noaction" data-json-path="/gigs/endless_page_as_json?host=homepage&type=endless&category_id=99912&limit=30" data-load-more="true" data-hide-empty="false" data-gigs-shown="30" data-do-special="false" data-do-endless="true" data-box-id="hp99912_1" >
HtmlNode datajsonPathNode = rootNode.SelectSingleNode("//div[@class and contains(@data-json-path, 'category_id=')]");
string dataJsonPathValue = datajsonPathNode.Attributes["data-json-path"].Value;
string categoryId = "";
if(crifanLib.extractSingleStr(@"category_id=(\d+)", dataJsonPathValue, out categoryId))
{
//got category_id: 99912
}
//2. extract X-CSRF-Token value
//<meta content="6j77ymABhWzqVarvSOXSIl4MwW3KrEESH8rofrLem4w=" name="csrf-token" />
HtmlNode csrfTokenNode = rootNode.SelectSingleNode("//meta[@content and @name='csrf-token']");
string csrfTokenValue = "";
if (csrfTokenNode != null)
{
csrfTokenValue = csrfTokenNode.Attributes["content"].Value; //"K4Q+6uaGEvepuVLy5EwEDTddxbgzS3BJQ2zHIDdBFpk="
}
//STEP2: access search url
//http://fiverr.com/gigs/search?utf8=%E2%9C%93&search_in=everywhere&query=seo&x=12&y=12
curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93"
+ "&search_in=everywhere"
+ "&query=" + HttpUtility.UrlEncode(txbKeyword.Text)
+ "&x=12"
+ "&y=12";
headerDict = new Dictionary<string, string>();
headerDict.Add("Referer", fiverMainUrl);
respHtml = crifanLib.getUrlRespHtml_multiTry(curSearchInfo.searchUrl, headerDict: headerDict);
if (curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
{
needGetMore = true;
//restore status
//continue search
curSearchStatus = search_status.SEARCH_STATUS_SEARCHING;
updateUI();
//curSearchInfo = curSearchInfo;
//for debug
//int debugNum = 0;
//int debugMaxNum = 3;
for (; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++)
{
Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx];
if (needGetMore)
{
processSingleGigSearchItemObject(itemObj);
////for debug
//debugNum++;
//if (debugNum >= debugMaxNum)
//{
// //debug
// needGetMore = false;
// break;
//}
}
else
{
break;
}
}
//update for next page
curSearchInfo.pageNum++;
}
else if (curSearchStatus == search_status.SEARCH_STATUS_STOPPED)
{
// new search -> clear previously searched result
crifanLib.dgvClearContent(dgvSearchResult);
curSearchStatus = search_status.SEARCH_STATUS_SEARCHING;
updateUI();
curSearchInfo = new search_info();
curSearchInfo.pageNum = 1;
curSearchInfo.curItemIdx = 0;
needGetMore = true;
}
else
{
//unexpected status
return;
}
while (needGetMore)
{
//http://fiverr.com/gigs/search?utf8=%E2%9C%93&query=seo&x=15&y=13&page=2
//curSearchInfo.searchUrl = "http://fiverr.com/gigs/search?utf8=%E2%9C%93"
// + "&query=" + HttpUtility.UrlEncode(txbKeyword.Text)
// + "&page=" + curSearchInfo.pageNum.ToString();
//STEP3: search each page, got json, parse search result to list
int numPerPage = 50;
//page 1:
//http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=1
//page 2:
//http://fiverr.com/gigs/gigs_as_json?host=search&type=best_match&query_string=seo&search_filter=auto&category_id=99912&limit=50&page=2
curSearchInfo.searchUrl = "http://fiverr.com/gigs/gigs_as_json?"
+ "host=search"
+ "&type=best_match"
+ "&query_string=" + HttpUtility.UrlEncode(txbKeyword.Text)
+ "&search_filter=auto"
+ "&category_id=" + categoryId
+ "&limit=" + numPerPage.ToString()
+ "&page=" + curSearchInfo.pageNum.ToString();
headerDict = new Dictionary<string, string>();
headerDict.Add("X-CSRF-Token", csrfTokenValue);
headerDict.Add("X-Requested-With", "XMLHttpRequest");
string respGigJson = crifanLib.getUrlRespHtml(curSearchInfo.searchUrl, headerDict: headerDict);
//got json string, processed like this:
//{"gigs":[
//{"title":"be your SEO teacher","title_full":"be your SEO teacher","duration":1,"price":"$5","rating":10,"rating_count":7,"is_featured":false,"gig_id":1082012,"gig_url":"/trickyguy/be-your-seo-teacher","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >","video_thumb":false,"seller_name":"trickyguy","seller_created_at":"12 months","seller_country_name":"India","seller_country":"in","seller_url":"/trickyguy","seller_level":null,"gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/1082012/v2_162/seo-advice.jpg?1349431594\\" alt=\\"be your SEO teacher\\" >"},
//{"title":"write SEO articles for you","title_full":"write SEO articles for you","duration":4,"price":"$5","rating":9,"rating_count":590,"is_featured":false,"gig_id":283403,"gig_url":"/betaomicronalph/write-seo-articles-for-you","img_medium":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\" alt=\\"write SEO articles for you\\" >","video_thumb":false,"seller_name":"betaomicronalph","seller_created_at":"over 2 years","seller_country_name":"United States","seller_country":"us","seller_url":"/betaomicronalph","seller_level":"level_two_seller","gig_image":"<img src=\\"http://cdn3.fiverrcdn.com/photos/283403/v2_162/Apocalyptica02.jpg?1304543693\\" alt=\\"write SEO articles for you\\" >"},
//....
//],"total_results":519,"next_page":true}
Dictionary<string, Object> gigsObjDict = (Dictionary<string, Object>)crifanLib.jsonToDict(respGigJson);
bool bNextPage = false;
Object objNextPage = null;
if (gigsObjDict.ContainsKey("next_page") && gigsObjDict.TryGetValue("next_page", out objNextPage))
{
bNextPage = (bool)objNextPage;
}
needGetMore = bNextPage;
Object itemDictListObj = null;
if (gigsObjDict.ContainsKey("gigs") && gigsObjDict.TryGetValue("gigs", out itemDictListObj))
{
//Dictionary<string, Object>[] itemDictList = (Dictionary<string, Object>[])itemDictListObj;
curSearchInfo.itemObjList = (Object[])itemDictListObj;
//List<gigSearchItemInfo> itemInfoList = new List<gigSearchItemInfo>();
for (curSearchInfo.curItemIdx = 0; curSearchInfo.curItemIdx < curSearchInfo.itemObjList.Length; curSearchInfo.curItemIdx++)
{
Object itemObj = curSearchInfo.itemObjList[curSearchInfo.curItemIdx];
if (needGetMore)
{
processSingleGigSearchItemObject(itemObj);
////for debug
//debugNum++;
//if (debugNum >= debugMaxNum)
//{
// //debug
// needGetMore = false;
// break;
//}
}
else
{
break;
}
}//foreach (Object itemObj in itemObjList)
//update for next page
curSearchInfo.pageNum++;
}
else
{
//some error ?
needGetMore = false;
}
};
}
private void btnPause_Click(object sender, EventArgs e)
{
if (curSearchStatus == search_status.SEARCH_STATUS_SEARCHING)
{
curSearchStatus = search_status.SEARCH_STATUS_PAUSED;
updateUI();
needGetMore = false;
//store current status and progress
//MessageBox.Show(curSearchInfo.gigDataList[0].ToString());
}
}
private void btnStopSearching_Click(object sender, EventArgs e)
{
if ((curSearchStatus == search_status.SEARCH_STATUS_SEARCHING) ||
(curSearchStatus == search_status.SEARCH_STATUS_PAUSED)
)
{
curSearchStatus = search_status.SEARCH_STATUS_STOPPED;
updateUI();
needGetMore = false;
//clear things
}
}
void storeGigInfo(gigInfo singleGigInfo)
{
//DataGridViewButtonCell gigUrlCell = new DataGridViewButtonCell();
//gigUrlCell.Value = "Buy Now";
//gigUrlCell.Tag = singleGigInfo.gigUrl;
dgvSearchResult.Rows.Add(
singleGigInfo.title,
singleGigInfo.sellerRating,
singleGigInfo.estimatedDeliveryStr,
singleGigInfo.gigRating,
singleGigInfo.ordersInQueue,
singleGigInfo.sellerLevel,
singleGigInfo.hasVideo ? "yes" : "no",
singleGigInfo.isExpressGig,
singleGigInfo.coutryFlag,
singleGigInfo.positiveReviews,
singleGigInfo.negativeReviews,
singleGigInfo.isTopRatedSeller);
//gigUrlCell);
//singleGigInfo.gigUrl);
gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Value = "Buy Now";
gigUrlColumn.DataGridView.Rows[dgvSearchResult.Rows.Count - 1].Cells[girUrlColumnIdx].Tag = singleGigInfo.gigUrl;
dgvSearchResult.Rows[dgvSearchResult.Rows.Count - 1].Selected = true;
dgvSearchResult.FirstDisplayedScrollingRowIndex = dgvSearchResult.Rows.Count - 1;
crifanLib.dgvDrawHeaderNum(dgvSearchResult);
return;
}
private void dgvSearchResult_CellContentClick(object sender, DataGridViewCellEventArgs e)
{
if ((e.RowIndex >= 0) && (e.ColumnIndex == girUrlColumnIdx))
{
DataGridViewButtonCell clickedButtonCell = (DataGridViewButtonCell)dgvSearchResult.Rows[e.RowIndex].Cells[e.ColumnIndex];
//MessageBox.Show(clickedButtonCell.Value.ToString() + clickedButtonCell.Tag.ToString());
System.Diagnostics.Process.Start(clickedButtonCell.Tag.ToString());
}
}
private void btnSaveAll_Click(object sender, EventArgs e)
{
string outputFilename = "fiverrComScrapedResult" + ".xls";
string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename);
//List<int> omitColumnIdxList = new List<int>();
////omit the last column: View page
//omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1);
//crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList);
List<int> useTagColumnIdxList = new List<int>();
//columns that use tag as value
useTagColumnIdxList.Add(girUrlColumnIdx);
crifanLib.dgvExportToExcel(dgvSearchResult, fullFilename, useTagColumnIdxList: useTagColumnIdxList);
crifanLib.openFolderAndSelectFile(fullFilename);
}
private void openFolderAndSelectFile(string fullFilename)
{
System.Diagnostics.Process.Start("Explorer.exe", "/select," + fullFilename);
}
private void btnExportToCsv_Click(object sender, EventArgs e)
{
string outputFilename = "fiverrComScrapedResult.csv";
string fullFilename = Path.Combine(crifanLib.getSaveFolder(fbdSaveFolder), outputFilename);
//List<int> omitColumnIdxList = new List<int>();
////omit the last column: View page
//omitColumnIdxList.Add(dgvSearchResult.ColumnCount - 1);
//crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, omitColumnIdxList: omitColumnIdxList);
List<int> useTagColumnIdxList = new List<int>();
//columns that use tag as value
useTagColumnIdxList.Add(girUrlColumnIdx);
crifanLib.dgvExportToCsv(dgvSearchResult, fullFilename, delimiter: ",", useTagColumnIdxList: useTagColumnIdxList);
crifanLib.openFolderAndSelectFile(fullFilename);
}
private void btnClearAll_Click(object sender, EventArgs e)
{
crifanLib.dgvClearContent(dgvSearchResult);
}
private void btnHelp_Click(object sender, EventArgs e)
{
string helpUrl = "http://giggladiator.com/help";
System.Diagnostics.Process.Start(helpUrl);
}
private void btnCreateAlert_Click(object sender, EventArgs e)
{
}
private void btnExpReaderToExcel_Click(object sender, EventArgs e)
{
}
private void btnExpReaderToCsv_Click(object sender, EventArgs e)
{
}
}
}(2)
【总结】
转载请注明:在路上 » 【代码分享】C#代码:FiverComScraper – 只抓取fiverr.com,网站改版之后