【背景】
之前写的,去处理:
http://www.gcgis.org/webmappub/titleWF.aspx
http://www.greenvillecounty.org/vrealpr24/clRealProp.ASP?WCI=tplRealSearch&WCE=Form1&WCU=
等地址,
寻找到匹配的地图图片,然后提取相关域的信息,保存为excel文件。
【download_gcgis_map_pic代码分享】
1.截图:
(1)运行效果:
(2)保存信息为excel文件:
(3)下载的地图图片:
2.Python项目代码下载:
download_gcgis_map_pic_2012-11-13.7z
3.代码分享:
(1)download_gcgis_map_pic.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
-------------------------------------------------------------------------------
Function:
Web Scrape 11-10-12
https://www.elance.com/j/web-scrape/35102090/
Version: 2012-11-13
Author: Crifan Li
Contact: https://www.crifan.com/about/me/
-------------------------------------------------------------------------------
"""
#---------------------------------import---------------------------------------
import re;
import sys;
sys.path.append("libs");
from BeautifulSoup import BeautifulSoup,Tag,CData;
import crifanLib;
import logging;
import xlwt;
def searchFromTwoTd(htmlToSearch, keyName, doHtmlDecode=True):
foundValue = "";
foundTwoTd = re.search('<td\s+?style=".+?">'+str(keyName)+'</td>\s*?<td(\s+?style=".+?")?>(?P<foundValue>.+?)</td>', htmlToSearch);
#print "foundTwoTd=",foundTwoTd;
if(foundTwoTd):
foundValue = foundTwoTd.group("foundValue");
#print "foundValue=",foundValue;
foundValue = foundValue.strip();
#print "foundValue=",foundValue;
if(foundValue and doHtmlDecode):
#http://fredericiana.com/2010/10/08/decoding-html-entities-to-text-in-python/
foundValue = crifanLib.decodeHtmlEntity(foundValue, decodedEncoding="GBK");
#print "type(foundValue)=",type(foundValue);
#print "after html decode, foundValue=",foundValue;
return foundValue;
def extractCommonInfo(searchRespHtml):
# 1. Mail Addr
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;width:15%">Mail Addr</td>
# <td style="width:35%">224 STONE LAKE DR </td>
# foundMailAddr = re.search('<td style=".+?">Mail Addr</td>\s*?<td style=".+?">(?P<mailAddr>.+?)</td>', searchRespHtml);
# print "foundMailAddr=",foundMailAddr;
# if(foundMailAddr):
# mailAddr = foundMailAddr.group("mailAddr");
# #print "mailAddr=",mailAddr;
# mailAddr = mailAddr.strip();
# print "mailAddr=",mailAddr;
mailAddr = searchFromTwoTd(searchRespHtml, "Mail Addr");
#print "mailAddr=",mailAddr; #224 STONE LAKE DR
# 2. City
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">City</td>
# <td>GREENVILLE </td>
# foundCity = re.search('<td style=".+?">City</td>\s*?<td>(?P<city>.+?)</td>', searchRespHtml);
# print "foundCity=",foundCity;
# if(foundCity):
# city = foundCity.group("city");
# city = city.strip();
# print "city=",city;
city = searchFromTwoTd(searchRespHtml, "City");
#print "city=",city; #GREENVILLE
# 3. Owner 1
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Owner 1</td>
# <td>HOOPER JOAN KIRKSEY </td>
owner1 = searchFromTwoTd(searchRespHtml, "Owner 1");
#print "owner1=",owner1; #HOOPER JOAN KIRKSEY
# 4. Owner 2
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Owner 2</td>
# <td> </td>
owner2 = searchFromTwoTd(searchRespHtml, "Owner 2");
#print "owner2=",owner2; #
# 5. Zip
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Zip</td>
# <td>29609</td>
zip = searchFromTwoTd(searchRespHtml, "Zip");
#print "zip=",zip; #29609
# 6. Desc
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Desc</td>
# <td>1,PT2,7 </td>
desc = searchFromTwoTd(searchRespHtml, "Desc");
#print "desc=",desc; #1,PT2,7
# 7. Loc
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Loc</td>
# <td>707 GORDON ST EXT </td>
loc = searchFromTwoTd(searchRespHtml, "Loc");
#print "loc=",loc; #707 GORDON ST EXT
# 8. Acreage
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Acreage</td>
# <td>0.49</td>
acreage = searchFromTwoTd(searchRespHtml, "Acreage");
#print "acreage=",acreage; #0.49
# 9. Sq Footage
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;width:15%">Sq Footage</td>
# <td style="width:13%">0</td>
sqFootage = searchFromTwoTd(searchRespHtml, "Sq Footage");
#print "sqFootage=",sqFootage; #0
# 10. Deed Date
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Deed Date</td>
# <td>05/11/2007</td>
deedDate = searchFromTwoTd(searchRespHtml, "Deed Date");
#print "deedDate=",deedDate; #05/11/2007
# 11. Land Use
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Land Use</td>
# <td>6800</td>
landUse = searchFromTwoTd(searchRespHtml, "Land Use");
#print "landUse=",landUse; #6800
# 12. Fair Market Value
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;">Fair Market Value</td>
# <td> </td>
fairMarketValue = searchFromTwoTd(searchRespHtml, "Fair Market Value");
#print "fairMarketValue=",fairMarketValue; #
# 13. Sales Price
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Sales Price</td>
# <td> </td>
salesPrice = searchFromTwoTd(searchRespHtml, "Sales Price");
#print "salesPrice=",salesPrice; #
# 14. Taxable Market Value
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;">Taxable Market Value</td>
# <td>$24,500</td>
taxableMarketValue = searchFromTwoTd(searchRespHtml, "Taxable Market Value");
#print "taxableMarketValue=",taxableMarketValue; #$24,500
# 15. Num Bathrooms
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Num Bathrooms</td>
# <td>0</td>
numBathrooms = searchFromTwoTd(searchRespHtml, "Num Bathrooms");
#print "numBathrooms=",numBathrooms; #0
# 16. Num Bedrooms
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Num Bedrooms</td>
# <td>0</td>
numBedrooms = searchFromTwoTd(searchRespHtml, "Num Bedrooms");
#print "numBedrooms=",numBedrooms; #0
# 17. Total Rollback
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;">Total Rollback</td>
# <td>$0.00</td>
totalRollback = searchFromTwoTd(searchRespHtml, "Total Rollback");
#print "totalRollback=",totalRollback; #$0.00
# 18. Num Half Baths
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold">Num Half Baths</td>
# <td>0</td>
numHalfBaths = searchFromTwoTd(searchRespHtml, "Num Half Baths");
#print "numHalfBaths=",numHalfBaths; #0
# 19. Assmt Class
# <td style="background-color:#004080;text-align:right;color:white;font-weight:bold;">Assmt Class</td>
# <td> OT</td>
assmtClass = searchFromTwoTd(searchRespHtml, "Assmt Class");
#print "assmtClass=",assmtClass; #OT
commonInfoDict = {
'Owner1' : owner1,
'Owner2' : owner2,
'Acreage' : acreage,
'Mail Addr' : mailAddr,
'Mail City' : city,
'Mail Zip' : zip,
'Desc' : desc,
'Loc' : loc,
'Deed Date' : deedDate,
'Sale Price': salesPrice,
'LandUse' : landUse,
'Bath' : numBathrooms,
'Bed' : numBedrooms,
'Half Bath' : numHalfBaths,
'SqFt' : sqFootage,
'Fair Market Val' : fairMarketValue,
'Tax Val' : taxableMarketValue,
'RollBack' : totalRollback,
'Assmt Class' : assmtClass,
};
return commonInfoDict;
def extractOutstandingInfo(searchRespHtml):
(yearsOutstanding, accountNo, amountOutstanding) = ("", "", "");
#original html:
# <tr>
# <td width="33%" bgcolor="#004080" align="middle" ><strong><font color="#ffffff"> Years
# Outstanding</font></strong></td>
# <td width="33%" bgcolor="#004080" align="middle" ><strong><font color="#ffffff"> Account
# No</font></strong></td>
# <td width="34%" bgcolor="#004080" align="right" ><strong><font color="#ffffff"> Amount
# Outstanding</font></strong></td>
# </tr></table></tr></table>
# <table border='0' width="100%" cellspacing='0' cellpadding='0'>
# <tr>
# <td width='33%' bgcolor='#f5f5f5' align="center">
# 2011
# </td>
# <td width='33%' bgcolor='#f5f5f5' align="center">
# 201100011448477001
# </td>
# <td width='34%' bgcolor='#f5f5f5' align="right">
# $1,438.60
# </td>
# </tr>
# </table>
#
foundYearAccountAmount = re.search('<td .+?> Years\s*?Outstanding.+?</td>\s*?<td .+?> Account\s*?No.+?</td>\s*?<td .+?> Amount\s*?Outstanding.+?</td>.+?<td .+?>\s*?(?P<yearsOutstanding>\d+)\s*</td>\s*?<td .+?>\s*?(?P<accountNo>\d+)\s*</td>\s*?<td .+?>\s*?(?P<amountOutstanding>[$,\.\d]+?)\s*</td>', searchRespHtml, re.S);
print "foundYearAccountAmount=",foundYearAccountAmount;
if(foundYearAccountAmount):
yearsOutstanding = foundYearAccountAmount.group("yearsOutstanding");
accountNo = foundYearAccountAmount.group("accountNo");
amountOutstanding = foundYearAccountAmount.group("amountOutstanding");
print "yearsOutstanding=%s, accountNo=%s, amountOutstanding=%s"%(yearsOutstanding, accountNo, amountOutstanding);
return (yearsOutstanding, accountNo, amountOutstanding);
def getHtmlByMapIdAndYear(mapId, year):
#http://www.greenvillecounty.org/vrealpr24/clRealProp.ASP?WCI=tplRealSearch&WCE=Form1&WCU=
searchPropertyUrl = "http://www.greenvillecounty.org/vrealpr24/clRealProp.ASP?WCI=tplRealSearch&WCE=Form1&WCU=";
#post data:
#SelectYear=2012&txt_Name=&txt_Street=&txt_MapNo=0230000400502&txt_Subdiv=&B1=Submit&txt_Voided_MApNo=&SelectSalesYear=ALL&txt_Sales_SheetNo=
#SelectYear=2010&txt_Name=&txt_Street=&txt_MapNo=0230000400502&txt_Subdiv=&B1=Submit&txt_Voided_MApNo=&SelectSalesYear=ALL&txt_Sales_SheetNo=
postData = {
'SelectYear' : str(year),
'txt_Name' : "",
'txt_Street' : "",
'txt_MapNo' : str(mapId),
'txt_Subdiv' : "",
'B1' : "Submit",
'txt_Voided_MApNo' : "",
'SelectSalesYear' : "ALL",
'txt_Sales_SheetNo' : "",
};
searchRespHtml = crifanLib.getUrlRespHtml(searchPropertyUrl, postData);
#print "searchRespHtml=",searchRespHtml;
return searchRespHtml;
def processEachMapId(mapId):
searchRespHtml = getHtmlByMapIdAndYear(mapId, 2012);
crifanLib.printCurrentCookies();
commonInfoDict = extractCommonInfo(searchRespHtml);
(yearsOutstanding, accountNo, amountOutstanding) = extractOutstandingInfo(searchRespHtml);
# infoDict = {
# 'MapID' : "",
# 'Owner1' : "",
# 'Owner2' : "",
# 'Acreage' : "",
# 'Mail Addr' : "",
# 'Mail City' : "",
# 'Mail Zip' : "",
# 'Desc' : "",
# 'Loc' : "",
# 'Deed Date' : "",
# 'Sale Price': "",
# 'LandUse' : "",
# 'Bath' : "",
# 'Bed' : "",
# 'Half Bath' : "",
# 'SqFt' : "",
# 'Fair Market Val' : "",
# 'Tax Val' : "",
# 'RollBack' : "",
# 'Assmt Class' : "",
# '2012 Outstanding' : "",
# '2011 Outstanding' : "",
# '2010 Outstanding' : "",
# };
# add for 2012
commonInfoDict['2012 Outstanding'] = amountOutstanding;
for eachYear in [2010, 2011]:
print "eachYear=",eachYear;
eachYearRespHtml = getHtmlByMapIdAndYear(mapId, eachYear);
#print "eachYear=%d, eachYearRespHtml=%s"%(eachYear, eachYearRespHtml);
(yearsOutstanding, accountNo, amountOutstanding) = extractOutstandingInfo(eachYearRespHtml);
crifanLib.printCurrentCookies();
commonInfoDict[str(eachYear)+' Outstanding'] = amountOutstanding;
# finally add the mapId
commonInfoDict["MapID"] = str(mapId);
#commonInfoDict["MapID"] = mapId;
print "commonInfoDict=",commonInfoDict;
return commonInfoDict;
def downloadMap(mapId, loc):
print "mapId=%s, loc=%s"%(mapId, loc);
# 1. get cookie: ASP.NET_SessionId
titleWfUrl = "http://www.gcgis.org/webmappub/titleWF.aspx";
titleWfRespHtml = crifanLib.getUrlRespHtml(titleWfUrl);
#print "titleWfRespHtml=",titleWfRespHtml;
crifanLib.printCurrentCookies();
#now got Cookie ASP.NET_SessionId=fbh0kcewbftsszzn3pvzqm45
# 2. [11/77] to get __VIEWSTATE for later get pic url use
postBackForGetPicUrl = "http://www.gcgis.org/webmappub/PostBack_WebForm.aspx";
postBackForGetPicUrlRespHtml = crifanLib.getUrlRespHtml(postBackForGetPicUrl);
#<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWKgLzgKHkDAL9mLzBCQKh1vObCgLe35/qBgKW7cXfDgL5xd3gBQL0n+S1CQLZu8/YAgKm7eu0CAKF5em0CAKD8ZLwBgLi6JDwBgL5qq6zBgK2tK6zBgL5qqrTBgK2tKrTBgKD9sPKDQLd+ZO6BwLtmaqoDAKt+fHFBgKL9J2oCQKD39/ZBQLl9OXbAgLP9/udCwLP94+8AwLP96OyBQLP97eoBwLP9+vVBwLP9//LCQLFtoCeBQK/+qWjBAKco4j/BALOs5TJCALCspqrDAL2t82UCAKCjbz2BgKojfk8AqGk/dQLAp6frLMEAvHm8qAOAo3H37oJApS+ipsPn1bbnGxpyN+MEOuP9Zpsv+45KhQ=" />
foundEventValidationForGetPic = re.search('<input\s*?type="hidden"\s*?name="__EVENTVALIDATION"\s*?id="__EVENTVALIDATION"\s*?value="(?P<eventValidationForGetPic>.+?)"\s*?/>', postBackForGetPicUrlRespHtml);
print "foundEventValidationForGetPic=",foundEventValidationForGetPic;
if(foundEventValidationForGetPic):
eventValidationForGetPic = foundEventValidationForGetPic.group("eventValidationForGetPic");
print "eventValidationForGetPic=",eventValidationForGetPic;
#<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTY4MjA2ODAyNmRk95maMKre9V09JJjbi9r8vCJQ1m0=" />
foundViewStateForGetPic = re.search('<input\s*?type="hidden"\s*?name="__VIEWSTATE"\s*?id="__VIEWSTATE"\s*?value="(?P<viewStateForGetPic>.+?)"\s*?/>', postBackForGetPicUrlRespHtml);
print "foundViewStateForGetPic=",foundViewStateForGetPic;
if(foundViewStateForGetPic):
viewStateForGetPic = foundViewStateForGetPic.group("viewStateForGetPic");
print "viewStateForGetPic=",viewStateForGetPic;
# 3. [59/77] get pic url
postBackUrl = "http://www.gcgis.org/webmappub/PostBack_WebForm.aspx";
# __VIEWSTATE=%2FwEPDwUKLTY4MjA2ODAyNmRk95maMKre9V09JJjbi9r8vCJQ1m0%3D
# __EVENTVALIDATION=%2FwEWKgLzgKHkDAL9mLzBCQKh1vObCgLe35%2FqBgKW7cXfDgL5xd3gBQL0n%2BS1CQLZu8%2FYAgKm7eu0CAKF5em0CAKD8ZLwBgLi6JDwBgL5qq6zBgK2tK6zBgL5qqrTBgK2tKrTBgKD9sPKDQLd%2BZO6BwLtmaqoDAKt%2BfHFBgKL9J2oCQKD39%2FZBQLl9OXbAgLP9%2FudCwLP94%2B8AwLP96OyBQLP97eoBwLP9%2BvVBwLP9%2F%2FLCQLFtoCeBQK%2F%2BqWjBAKco4j%2FBALOs5TJCALCspqrDAL2t82UCAKCjbz2BgKojfk8AqGk%2FdQLAp6frLMEAvHm8qAOAo3H37oJApS%2BipsPn1bbnGxpyN%2BMEOuP9Zpsv%2B45KhQ%3D
# govUser_hid=false
# validUser_hid=true
# accountInUse_hid=
# command_hidden=zoomToStartExtent
# mapImageHeight_hidden=431
# mapImageWidth_hidden=1088
# mapImageSrc_hidden=
# xMinMap_hidden=0
# yMinMap_hidden=0
# xMaxMap_hidden=0
# yMaxMap_hidden=0
# x1_hidden=0
# y1_hidden=0
# x2_hidden=0
# y2_hidden=0
# layerIds_hid=
# layersVisible_hid=
# activeLayerId_hid=
# vmlLayerID_hid=0
# vmlObjectID_hid=0
# pointXyText_hid=
# vmlFeatureClass_hid=
# vmlGeometry1_hid=
# vmlGeometry2_hid=
# vmlGeometry3_hid=
# vmlGeometry4_hid=
# vmlGeometry5_hid=
# vmlGeometry6_hid=
# aPoly_hid=
# aLine_hid=
# aCircle_hid=
# aPoint_hid=
# aText_hid=
# numSelect_hid=0
# sQuery_hid=
# addBuffer_hid=false
# sBuffer_hid=0
# resultLayerId_hid=
# compsMapList_hid=undefined
# theme_hid=
# errMsg_hid=
postData = {
'__VIEWSTATE' : viewStateForGetPic,
'__EVENTVALIDATION' : eventValidationForGetPic,
'govUser_hid' : "false",
'validUser_hid' : "true",
'command_hidden' : "zoomToStartExtent",
'mapImageHeight_hidden' : "431",
'mapImageWidth_hidden' : "1088",
'xMinMap_hidden' : "0",
'yMinMap_hidden' : "0",
'xMaxMap_hidden' : "0",
'yMaxMap_hidden' : "0",
'x1_hidden' : "0",
'y1_hidden' : "0",
'x2_hidden' : "0",
'y2_hidden' : "0",
'vmlLayerID_hid' : "0",
'vmlObjectID_hid' : "0",
'numSelect_hid' : "0",
'sBuffer_hid' : "0",
};
postBackUrlRespHtml = crifanLib.getUrlRespHtml(postBackUrl, postData);
print "postBackUrlRespHtml=",postBackUrlRespHtml;
#resp html contain:
# <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTY4MjA2ODAyNmRk95maMKre9V09JJjbi9r8vCJQ1m0=" />
# <input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWKgLzgKHkDAL9mLzBCQKh1vObCgLe35/qBgKW7cXfDgL5xd3gBQL0n+S1CQLZu8/YAgKm7eu0CAKF5em0CAKD8ZLwBgLi6JDwBgL5qq6zBgK2tK6zBgL5qqrTBgK2tKrTBgKD9sPKDQLd+ZO6BwLtmaqoDAKt+fHFBgKL9J2oCQKD39/ZBQLl9OXbAgLP9/udCwLP94+8AwLP96OyBQLP97eoBwLP9+vVBwLP9//LCQLFtoCeBQK/+qWjBAKco4j/BALOs5TJCALCspqrDAL2t82UCAKCjbz2BgKojfk8AqGk/dQLAp6frLMEAvHm8qAOAo3H37oJApS+ipsPn1bbnGxpyN+MEOuP9Zpsv+45KhQ=" />
# <input type="hidden" name="govUser_hid" id="govUser_hid" value="False" />
# <input type="hidden" name="validUser_hid" id="validUser_hid" value="True" />
# <input type="hidden" name="accountInUse_hid" id="accountInUse_hid" />
# <input type="hidden" name="command_hidden" id="command_hidden" value="zoomToStartExtent" />
# <input type="hidden" name="mapImageHeight_hidden" id="mapImageHeight_hidden" value="431" />
# <input type="hidden" name="mapImageWidth_hidden" id="mapImageWidth_hidden" value="1088" />
# <input type="hidden" name="mapImageSrc_hidden" id="mapImageSrc_hidden" value="http://www.gcgis.org/output/webmappub_zs-gisims130202756373.jpg" />
# <input type="hidden" name="xMinMap_hidden" id="xMinMap_hidden" value="1210744.77958237" />
# <input type="hidden" name="yMinMap_hidden" id="yMinMap_hidden" value="958500" />
# <input type="hidden" name="xMaxMap_hidden" id="xMaxMap_hidden" value="1911255.22041763" />
# <input type="hidden" name="yMaxMap_hidden" id="yMaxMap_hidden" value="1236000" />
# <input type="hidden" name="x1_hidden" id="x1_hidden" value="0" />
# <input type="hidden" name="y1_hidden" id="y1_hidden" value="0" />
# <input type="hidden" name="x2_hidden" id="x2_hidden" value="0" />
# <input type="hidden" name="y2_hidden" id="y2_hidden" value="0" />
# <input type="hidden" name="layerIds_hid" id="layerIds_hid" />
# <input type="hidden" name="layersVisible_hid" id="layersVisible_hid" />
# <input type="hidden" name="activeLayerId_hid" id="activeLayerId_hid" value="25" />
# <input type="hidden" name="vmlLayerID_hid" id="vmlLayerID_hid" value="0" />
# <input type="hidden" name="vmlObjectID_hid" id="vmlObjectID_hid" value="0" />
# <input type="hidden" name="pointXyText_hid" id="pointXyText_hid" />
# <input type="hidden" name="vmlFeatureClass_hid" id="vmlFeatureClass_hid" />
# <input type="hidden" name="vmlGeometry1_hid" id="vmlGeometry1_hid" />
# <input type="hidden" name="vmlGeometry2_hid" id="vmlGeometry2_hid" />
# <input type="hidden" name="vmlGeometry3_hid" id="vmlGeometry3_hid" />
# <input type="hidden" name="vmlGeometry4_hid" id="vmlGeometry4_hid" />
# <input type="hidden" name="vmlGeometry5_hid" id="vmlGeometry5_hid" />
# <input type="hidden" name="vmlGeometry6_hid" id="vmlGeometry6_hid" />
# <input type="hidden" name="aPoly_hid" id="aPoly_hid" />
# <input type="hidden" name="aLine_hid" id="aLine_hid" />
# <input type="hidden" name="aCircle_hid" id="aCircle_hid" />
# <input type="hidden" name="aPoint_hid" id="aPoint_hid" />
# <input type="hidden" name="aText_hid" id="aText_hid" />
# <input type="hidden" name="numSelect_hid" id="numSelect_hid" value="0" />
# <input type="hidden" name="sQuery_hid" id="sQuery_hid" />
# <input type="hidden" name="addBuffer_hid" id="addBuffer_hid" value="false" />
# <input type="hidden" name="sBuffer_hid" id="sBuffer_hid" value="0" />
# <input type="hidden" name="resultLayerId_hid" id="resultLayerId_hid" />
# <input type="hidden" name="compsMapList_hid" id="compsMapList_hid" value="undefined" />
# <input type="hidden" name="theme_hid" id="theme_hid" />
# <input type="hidden" name="errMsg_hid" id="errMsg_hid" />
gisImgUrl = "";
foundGisImgUrl = re.search('<input\s*?type="hidden"\s*?name="mapImageSrc_hidden"\s*?id="mapImageSrc_hidden"\s*?value="(?P<gisImgUrl>.+?)"\s*?/>', postBackUrlRespHtml);
print "foundGisImgUrl=",foundGisImgUrl;
if(foundGisImgUrl):
gisImgUrl = foundGisImgUrl.group("gisImgUrl");
print "gisImgUrl=",gisImgUrl;
getImgName = gisImgUrl.split("/")[-1];
print "getImgName=",getImgName;
crifanLib.manuallyDownloadFile(gisImgUrl, getImgName);
# 4. [61/77] find __EVENTVALIDATION and __VIEWSTATE
findLocUrl = "http://www.gcgis.org/webmappub/find.aspx?govUser=false&validUser=true";
getFindLocUrlRespHtml = crifanLib.getUrlRespHtml(findLocUrl);
#<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWCQKbucpfAum77Y8DAr66s9ELAv2YvMEJAqHW85sKAu2ZqqgMAoCf7Z8EAq358cUGAov0nagJEOHv/rftLPy2jxNqDJgxJaj/dyE=" />
foundEventValidation = re.search('<input\s*?type="hidden"\s*?name="__EVENTVALIDATION"\s*?id="__EVENTVALIDATION"\s*?value="(?P<eventValidation>.+?)"\s*?/>', getFindLocUrlRespHtml);
print "foundEventValidation=",foundEventValidation;
if(foundEventValidation):
eventValidation = foundEventValidation.group("eventValidation");
print "eventValidation=",eventValidation;
#<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKMTUzMjUxNjgxMg9kFgICAw9kFgICAQ9kFgJmD2QWAgIBDw8WAh4EVGV4dGRkZBgBBQ5maW5kX011bHRpVmlldw8PZGZkyFe0rX+rEa+u5GulCVJ7mrLSWG8=" />
foundViewState = re.search('<input\s*?type="hidden"\s*?name="__VIEWSTATE"\s*?id="__VIEWSTATE"\s*?value="(?P<viewState>.+?)"\s*?/>', getFindLocUrlRespHtml);
print "foundViewState=",foundViewState;
if(foundViewState):
viewState = foundViewState.group("viewState");
print "viewState=",viewState;
# 5. [66/77] do search
# __EVENTTARGET=
# __EVENTARGUMENT=
# __VIEWSTATE=%2FwEPDwUKMTUzMjUxNjgxMg9kFgICAw9kFgICAQ9kFgJmD2QWAgIBDw8WAh4EVGV4dGRkZBgBBQ5maW5kX011bHRpVmlldw8PZGZkyFe0rX%2BrEa%2Bu5GulCVJ7mrLSWG8%3D
# __EVENTVALIDATION=%2FwEWCQKbucpfAum77Y8DAr66s9ELAv2YvMEJAqHW85sKAu2ZqqgMAoCf7Z8EAq358cUGAov0nagJEOHv%2FrftLPy2jxNqDJgxJaj%2FdyE%3D
# find_TextBox=707++GORDON+ST+EXT
# find_Button=Search
# govUser_hid=false
# validUser_hid=true
# activeLayerId_hid=25
# findText_hid=
# vmlLayerID_hid=
# vmlObjectID_hid=
#findTextBox = urllib.quote_plus(loc);
findTextBox = loc;
print "findTextBox=",findTextBox;
postData = {
#'__EVENTTARGET' : "",
#'__EVENTARGUMENT' : "",
'__VIEWSTATE' : viewState, #%2FwEPDwUKMTUzMjUxNjgxMg9kFgICAw9kFgICAQ9kFgJmD2QWAgIBDw8WAh4EVGV4dGRkZBgBBQ5maW5kX011bHRpVmlldw8PZGZkyFe0rX%2BrEa%2Bu5GulCVJ7mrLSWG8%3D
'__EVENTVALIDATION' : eventValidation, #%2FwEWCQKbucpfAum77Y8DAr66s9ELAv2YvMEJAqHW85sKAu2ZqqgMAoCf7Z8EAq358cUGAov0nagJEOHv%2FrftLPy2jxNqDJgxJaj%2FdyE%3D
'find_TextBox' : findTextBox, #707++GORDON+ST+EXT
'find_Button' : "Search",
'govUser_hid' : "false",
'validUser_hid' : "true",
'activeLayerId_hid' : "25",
#"findText_hid" : "",
#"vmlLayerID_hid" : "",
#"vmlObjectID_hid" : "",
};
headerDict = {
"Referer" : "http://www.gcgis.org/webmappub/find.aspx?govUser=false&validUser=true",
};
postFindLocUrlRespHtml = crifanLib.getUrlRespHtml(findLocUrl, postData, headerDict);
print "postFindLocUrlRespHtml=",postFindLocUrlRespHtml;
# [76/77] get real pic
#postBackUrl = "http://www.gcgis.org/webmappub/PostBack_WebForm.aspx";
# __VIEWSTATE=%2FwEPDwUKLTY4MjA2ODAyNmRk95maMKre9V09JJjbi9r8vCJQ1m0%3D
# __EVENTVALIDATION=%2FwEWKgLzgKHkDAL9mLzBCQKh1vObCgLe35%2FqBgKW7cXfDgL5xd3gBQL0n%2BS1CQLZu8%2FYAgKm7eu0CAKF5em0CAKD8ZLwBgLi6JDwBgL5qq6zBgK2tK6zBgL5qqrTBgK2tKrTBgKD9sPKDQLd%2BZO6BwLtmaqoDAKt%2BfHFBgKL9J2oCQKD39%2FZBQLl9OXbAgLP9%2FudCwLP94%2B8AwLP96OyBQLP97eoBwLP9%2BvVBwLP9%2F%2FLCQLFtoCeBQK%2F%2BqWjBAKco4j%2FBALOs5TJCALCspqrDAL2t82UCAKCjbz2BgKojfk8AqGk%2FdQLAp6frLMEAvHm8qAOAo3H37oJApS%2BipsPn1bbnGxpyN%2BMEOuP9Zpsv%2B45KhQ%3D
# govUser_hid=false
# validUser_hid=true
# accountInUse_hid=
# command_hidden=zoomToFeature
# mapImageHeight_hidden=431
# mapImageWidth_hidden=1088
# mapImageSrc_hidden=http%3A%2F%2Fwww.gcgis.org%2Foutput%2Fwebmappub_zs-gisims130202756373.jpg
# xMinMap_hidden=1210744.77958237
# yMinMap_hidden=958500
# xMaxMap_hidden=1911255.22041763
# yMaxMap_hidden=1236000
# x1_hidden=2000
# y1_hidden=19661
# x2_hidden=0
# y2_hidden=0
# layerIds_hid=
# layersVisible_hid=
# activeLayerId_hid=25
# vmlLayerID_hid=2000
# vmlObjectID_hid=19661
# pointXyText_hid=
# vmlFeatureClass_hid=
# vmlGeometry1_hid=
# vmlGeometry2_hid=
# vmlGeometry3_hid=
# vmlGeometry4_hid=
# vmlGeometry5_hid=
# vmlGeometry6_hid=
# aPoly_hid=
# aLine_hid=
# aCircle_hid=
# aPoint_hid=
# aText_hid=
# numSelect_hid=0
# sQuery_hid=
# addBuffer_hid=false
# sBuffer_hid=0
# resultLayerId_hid=
# compsMapList_hid=undefined
# theme_hid=
# errMsg_hid=
postData = {
'__VIEWSTATE' : viewStateForGetPic,
'__EVENTVALIDATION' : eventValidationForGetPic,
'govUser_hid' : "false",
'validUser_hid' : "true",
'command_hidden' : "zoomToFeature",
'mapImageHeight_hidden' : "431",
'mapImageWidth_hidden' : "1088",
'mapImageSrc_hidden' : gisImgUrl,
'xMinMap_hidden' : "1210744.77958237",
'yMinMap_hidden' : "958500",
'xMaxMap_hidden' : "1911255.22041763",
'yMaxMap_hidden' : "1236000",
'x1_hidden' : "2000",
'y1_hidden' : "19661",
'x2_hidden' : "0",
'y2_hidden' : "0",
'activeLayerId_hid' : "25",
'vmlLayerID_hid' : "2000",
'vmlObjectID_hid' : "19661",
'numSelect_hid' : "0",
'sBuffer_hid' : "0",
'addBuffer_hid' : "false",
'compsMapList_hid' : "undefined",
};
postBackUrlForGetPicRespHtml = crifanLib.getUrlRespHtml(postBackUrl, postData);
print "postBackUrlForGetPicRespHtml=",postBackUrlForGetPicRespHtml;
if(gisImgUrl):
#http://www.gcgis.org/output/webmappub_zs-gisims130202756373.jpg
#download second time, this time, this pic is what we real want
getImgName = gisImgUrl.split("/")[-1];
print "getImgName=",getImgName;
#webmappub_zs-gisims130202756373.jpg
realImgName = "real_" + getImgName;
print "realImgName=",realImgName;
crifanLib.manuallyDownloadFile(gisImgUrl, realImgName);
print "Download real pic OK";
ddddddddd
return ;
def outputInfoDictList(allInfoDictList):
#init output excel file
excelFilename = "extractedRealPropertyInfo.xls";
#https://groups.google.com/forum/?fromgroups=#!topic/python-excel/8kCUw2y8PrU
# badBG = xlwt.Pattern();
# badBG.SOLID_PATTERN = 0x34
# badBG.NO_PATTERN = 0x34
# badBG.pattern_fore_colour = 0x34
# badBG.pattern_back_colour = 0x34
# badFontStyle = xlwt.XFStyle()
# badFontStyle.Pattern = badBG
# sheet1.write(1,1,'hello world', badFontStyle)
#https://github.com/python-excel/xlwt/blob/master/xlwt/Cell.py
#not find background color
#https://github.com/python-excel/xlwt/blob/master/xlwt/Formatting.py
#blueBackgroundPattern = xlwt.Pattern();
#blueBackgroundPattern.pattern_back_colour = 0x34;
#blueBackgroundPattern.SOLID_PATTERN = 0x34
#blueBackgroundPattern.NO_PATTERN = 0x34
#blueBackgroundPattern.pattern_fore_colour = "red"
#blueBackgroundPattern.pattern_back_colour = "blue";
# #https://groups.google.com/forum/?fromgroups=#!topic/python-excel/8kCUw2y8PrU
# badBG = xlwt.Pattern()
# badBG.pattern = badBG.SOLID_PATTERN
# #badBG.pattern_fore_colour = 3
# #badBG.pattern_fore_colour = "blue";
# badBG.pattern_fore_colour = 3;
# badFontStyle = xlwt.XFStyle()
# badFontStyle.pattern = badBG;
# styleBlueBkg = badFontStyle;
#styleBlueBkg = xlwt.easyxf('font: color-index red, bold on');
#styleBlueBkg = xlwt.easyxf('font: background-color-index red, bold on');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour red;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour blue;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour light_blue;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour pale_blue;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour dark_blue;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour dark_blue_ega;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour ice_blue;');
styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour sky_blue;');
#styleBlueBkg = xlwt.easyxf('pattern: pattern solid, fore_colour ocean_blue; font: bold on;'); # 80% like
#blueBkgFontStyle = xlwt.XFStyle()
#blueBkgFontStyle.Pattern = blueBackgroundPattern;
#styleBlueBkg = blueBkgFontStyle;
styleBold = xlwt.easyxf('font: bold on');
wb = xlwt.Workbook();
ws = wb.add_sheet('realPropertyInfo');
#write header
# infoDict = {
# Sequence
# 'MapID' : "",
# 'Owner1' : "",
# 'Owner2' : "",
# 'Acreage' : "",
# 'Mail Addr' : "",
# 'Mail City' : "",
# 'Mail Zip' : "",
# 'Desc' : "",
# 'Loc' : "",
# 'Deed Date' : "",
# 'Sale Price': "",
# 'LandUse' : "",
# 'Bath' : "",
# 'Bed' : "",
# 'Half Bath' : "",
# 'SqFt' : "",
# 'Fair Market Val' : "",
# 'Tax Val' : "",
# 'RollBack' : "",
# 'Assmt Class' : "",
# '2012 Outstanding' : "",
# '2011 Outstanding' : "",
# '2010 Outstanding' : "",
# };
ws.write(0, 0, "Sequence", styleBlueBkg);
ws.write(0, 1, "MapID", styleBlueBkg);
ws.write(0, 2, "Owner1", styleBold);
ws.write(0, 3, "Owner2", styleBold);
ws.write(0, 4, "Acreage", styleBold);
ws.write(0, 5, "Mail Addr", styleBold);
ws.write(0, 6, "Mail City", styleBold);
ws.write(0, 7, "Mail Zip", styleBold);
ws.write(0, 8, "Desc", styleBold);
ws.write(0, 9, "Loc", styleBold);
ws.write(0, 10, "Deed Date",styleBold);
ws.write(0, 11, "Sale Price", styleBold);
ws.write(0, 12, "LandUse", styleBold);
ws.write(0, 13, "Bath", styleBold);
ws.write(0, 14, "Bed", styleBold);
ws.write(0, 15, "Half Bath",styleBold);
ws.write(0, 16, "SqFt", styleBold);
ws.write(0, 17, "Fair Market Val", styleBold);
ws.write(0, 18, "Tax Val", styleBold);
ws.write(0, 19, "RollBack", styleBold);
ws.write(0, 20, "Assmt Class", styleBold);
ws.write(0, 21, "2012 Outstanding", styleBold);
ws.write(0, 22, "2011 Outstanding", styleBold);
ws.write(0, 23, "2010 Outstanding", styleBold);
#output extracted info
print "Outputing extracted info to excel file ",excelFilename;
for index,eachInfoDict in enumerate(allInfoDictList):
number = index + 1;
numberStr = str(number);
#eachInfoDict['Sequence'] = numberStr;
#ws.write(number, 0, eachInfoDict['Sequence']);
ws.write(number, 0, numberStr);
mapId = eachInfoDict['MapID'];
print "mapId=",mapId;
ws.write(number, 1, mapId);
ws.write(number, 2, eachInfoDict['Owner1']);
ws.write(number, 3, eachInfoDict['Owner2']);
ws.write(number, 4, eachInfoDict['Acreage']);
ws.write(number, 5, eachInfoDict['Mail Addr']);
ws.write(number, 6, eachInfoDict['Mail City']);
ws.write(number, 7, eachInfoDict['Mail Zip']);
ws.write(number, 8, eachInfoDict['Desc']);
loc = eachInfoDict['Loc'];
print "loc=",loc;
ws.write(number, 9, loc);
ws.write(number, 10, eachInfoDict['Deed Date']);
ws.write(number, 11, eachInfoDict['Sale Price']);
ws.write(number, 12, eachInfoDict['LandUse']);
ws.write(number, 13, eachInfoDict['Bath']);
ws.write(number, 14, eachInfoDict['Bed']);
ws.write(number, 15, eachInfoDict['Half Bath']);
ws.write(number, 16, eachInfoDict['SqFt']);
ws.write(number, 17, eachInfoDict['Fair Market Val']);
ws.write(number, 18, eachInfoDict['Tax Val']);
ws.write(number, 19, eachInfoDict['RollBack']);
ws.write(number, 20, eachInfoDict['Assmt Class']);
ws.write(number, 21, eachInfoDict['2012 Outstanding']);
ws.write(number, 22, eachInfoDict['2011 Outstanding']);
ws.write(number, 23, eachInfoDict['2010 Outstanding']);
#fetch map
downloadMap(mapId, loc);
wb.save(excelFilename);
return;
def main():
crifanLib.initAutoHandleCookies();
allInfoDictList = [];
mapIdList = [
"0230000400502",
"0230000300400",
"0230000509400",
];
for eachMapId in mapIdList:
singleInfoDict = processEachMapId(eachMapId);
allInfoDictList.append(singleInfoDict);
outputInfoDictList(allInfoDictList);
###############################################################################
if __name__=="__main__":
scriptSelfName = crifanLib.extractFilename(sys.argv[0]);
logging.basicConfig(
level = logging.DEBUG,
format = 'LINE %(lineno)-4d %(levelname)-8s %(message)s',
datefmt = '%m-%d %H:%M',
filename = scriptSelfName + ".log",
filemode = 'w');
# define a Handler which writes INFO messages or higher to the sys.stderr
console = logging.StreamHandler();
console.setLevel(logging.INFO);
# set a format which is simpler for console use
formatter = logging.Formatter('LINE %(lineno)-4d : %(levelname)-8s %(message)s');
# tell the handler to use this format
console.setFormatter(formatter);
logging.getLogger('').addHandler(console);
try:
main();
except:
logging.exception("Unknown Error !");
raise;
【总结】
转载请注明:在路上 » 【代码分享】Python代码:download_gcgis_map_pic – 从gcgis.org中的地图图片中提取信息并保存到excel文件