最新消息:20210816 当前crifan.com域名已被污染,为防止失联,请关注(页面右下角的)公众号

【记录】用Python解析网易163博客的心情随笔FeelingCard返回的DWR-REPLY数据

Crawl_EmulateLogin crifan 4657浏览 0评论

【背景】

之前自己弄的BlogsToWordpress,后来希望添加支持,导出网易163博客中的心情随笔的内容。

之前已经通过代码,可以获得返回的DWR-REPLY数据了:

【记录】给BlogsToWordPress添加支持导出网易的心情随笔

现在就是想办法,在Python去分析并解析这些数据了:

LINE 511   DEBUG    getFeelingCardDwrUrl=http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingCards.dwr
LINE 519   DEBUG    feelingCardRespHtml=//#DWR-INSERT

//#DWR-REPLY

var s0={};var s1={};var s2={};var s3={};var s4={};var s5={};var s6={};var s7={};var s8={};var s9={};var s10={};var s11={};var s12={};var s13={};var s14={};var s15={};var s16={};var s17={};var s18={};var s19={};var s20={};var s21={};var s22={};var s23={};var s24={};var s25={};var s26={};var s27={};var s28={};var s29={};var s30={};var s31={};var s32={};var s33={};var s34={};var s35={};var s36={};var s37={};var s38={};var s39={};var s40={};var s41={};var s42={};var s43={};var s44={};var s45={};var s46={};var s47={};var s48={};var s49={};var s50={};var s51={};var s52={};var s53={};var s54={};var s55={};var s56={};var s57={};var s58={};var s59={};var s60={};var s61={};var s62={};var s63={};var s64={};var s65={};var s66={};var s67={};var s68={};var s69={};var s70={};var s71={};var s72={};var s73={};var s74={};var s75={};var s76={};var s77={};var s78={};var s79={};var s80={};var s81={};var s82={};var s83={};var s84={};var s85={};var s86={};var s87={};var s88={};var s89={};var s90={};var s91={};var s92={};var s93={};var s94={};var s95={};var s96={};var s97={};var s98={};var s99={};var s100={};var s101={};var s102={};var s103={};var s104={};var s105={};var s106={};var s107={};var s108={};var s109={};var s110={};var s111={};var s112={};var s113={};var s114={};var s115={};var s116={};var s117={};var s118={};var s119={};var s120={};var s121={};var s122={};var s123={};var s124={};var s125={};var s126={};var s127={};var s128={};var s129={};var s130={};var s131={};var s132={};var s133={};var s134={};var s135={};var s136={};var s137={};var s138={};var s139={};var s140={};var s141={};var s142={};var s143={};var s144={};var s145={};var s146={};var s147={};var s148={};var s149={};var s150={};var s151={};var s152={};var s153={};var s154={};var s155={};var s156={};var s157={};var s158={};var s159={};var s160={};var s161={};var s162={};var s163={};var s164={};var s165={};var s166={};var s167={};var s168={};var s169={};var s170={};var s171={};var s172={};var s173={};var s174={};var s175={};var s176={};var s177={};var s178={};var s179={};var s180={};var s181={};var s182={};var s183={};var s184={};var s185={};var s186={};var s187={};var s188={};var s189={};var s190={};var s191={};var s192={};var s193={};var s194={};var s195={};var s196={};var s197={};var s198={};var s199={};var s200={};var s201={};var s202={};var s203={};var s204={};var s205={};var s206={};var s207={};var s208={};var s209={};var s210={};var s211={};var s212={};var s213={};var s214={};var s215={};var s216={};var s217={};var s218={};var s219={};var s220={};var s221={};var s222={};var s223={};var s224={};var s225={};var s226={};var s227={};var s228={};var s229={};var s230={};var s231={};var s232={};var s233={};var s234={};var s235={};var s236={};var s237={};var s238={};var s239={};var s240={};var s241={};var s242={};var s243={};var s244={};var s245={};var s246={};var s247={};var s248={};var s249={};var s250={};var s251={};var s252={};var s253={};var s254={};var s255={};var s256={};var s257={};var s258={};var s259={};var s260={};var s261={};var s262={};var s263={};var s264={};var s265={};var s266={};var s267={};var s268={};var s269={};var s270={};var s271={};var s272={};var s273={};var s274={};var s275={};var s276={};var s277={};var s278={};var s279={};var s280={};var s281={};var s282={};var s283={};var s284={};var s285={};var s286={};var s287={};var s288={};var s289={};var s290={};var s291={};var s292={};var s293={};var s294={};var s295={};var s296={};var s297={};var s298={};var s299={};var s300={};var s301={};var s302={};var s303={};var s304={};var s305={};var s306={};var s307={};var s308={};var s309={};var s310={};var s311={};var s312={};var s313={};var s314={};var s315={};var s316={};var s317={};var s318={};var s319={};var s320={};var s321={};var s322={};var s323={};var s324={};var s325={};var s326={};var s327={};var s328={};var s329={};var s330={};var s331={};var s332={};var s333={};var s334={};var s335={};var s336={};var s337={};var s338={};var s339={};var s340={};var s341={};var s342={};var s343={};var s344={};var s345={};var s346={};var s347={};var s348={};var s349={};var s350={};var s351={};var s352={};var s353={};var s354={};var s355={};var s356={};var s357={};var s358={};var s359={};var s360={};var s361={};var s362={};var s363={};var s364={};var s365={};var s366={};var s367={};var s368={};var s369={};var s370={};var s371={};var s372={};var s373={};var s374={};var s375={};var s376={};var s377={};var s378={};var s379={};var s380={};var s381={};var s382={};var s383={};var s384={};var s385={};var s386={};var s387={};var s388={};var s389={};var s390={};var s391={};var s392={};var s393={};var s394={};var s395={};var s396={};var s397={};var s398={};var s399={};var s400={};var s401={};var s402={};var s403={};var s404={};var s405={};var s406={};var s407={};var s408={};var s409={};var s410={};var s411={};var s412={};var s413={};var s414={};var s415={};var s416={};var s417={};var s418={};var s419={};var s420={};var s421={};var s422={};var s423={};var s424={};var s425={};var s426={};var s427={};var s428={};var s429={};var s430={};var s431={};var s432={};var s433={};var s434={};var s435={};var s436={};var s437={};var s438={};var s439={};var s440={};var s441={};var s442={};var s443={};var s444={};var s445={};var s446={};var s447={};var s448={};var s449={};var s450={};var s451={};var s452={};var s453={};var s454={};var s455={};var s456={};var s457={};var s458={};var s459={};var s460={};var s461={};var s462={};var s463={};var s464={};var s465={};var s466={};var s467={};var s468={};var s469={};var s470={};var s471={};var s472={};var s473={};var s474={};var s475={};var s476={};var s477={};var s478={};var s479={};var s480={};var s481={};var s482={};var s483={};var s484={};var s485={};var s486={};var s487={};var s488={};var s489={};var s490={};var s491={};var s492={};var s493={};var s494={};var s495={};var s496={};var s497={};var s498={};var s499={};var s500={};var s501={};var s502={};var s503={};var s504={};var s505={};var s506={};var s507={};var s508={};var s509={};var s510={};var s511={};var s512={};var s513={};var s514={};var s515={};var s516={};var s517={};var s518={};var s519={};var s520={};var s521={};var s522={};var s523={};var s524={};var s525={};var s526={};var s527={};var s528={};var s529={};var s530={};var s531={};var s532={};var s533={};var s534={};var s535={};var s536={};var s537={};var s538={};var s539={};var s540={};var s541={};var s542={};var s543={};var s544={};var s545={};var s546={};var s547={};var s548={};var s549={};var s550={};var s551={};var s552={};var s553={};var s554={};var s555={};var s556={};var s557={};var s558={};var s559={};var s560={};var s561={};var s562={};var s563={};var s564={};var s565={};var s566={};var s567={};var s568={};var s569={};var s570={};var s571={};var s572={};var s573={};var s574={};var s575={};var s576={};var s577={};var s578={};var s579={};var s580={};var s581={};var s582={};var s583={};var s584={};var s585={};var s586={};var s587={};var s588={};var s589={};var s590={};var s591={};var s592={};var s593={};var s594={};var s595={};var s596={};var s597={};var s598={};var s599={};var s600={};var s601={};var s602={};var s603={};var s604={};var s605={};var s606={};var s607={};var s608={};var s609={};var s610={};var s611={};var s612={};var s613={};var s614={};var s615={};var s616={};var s617={};var s618={};var s619={};var s620={};var s621={};var s622={};var s623={};var s624={};var s625={};var s626={};var s627={};var s628={};var s629={};var s630={};var s631={};var s632={};var s633={};var s634={};var s635={};var s636={};var s637={};var s638={};var s639={};var s640={};var s641={};var s642={};var s643={};var s644={};var s645={};var s646={};var s647={};var s648={};var s649={};var s650={};var s651={};var s652={};var s653={};var s654={};var s655={};var s656={};var s657={};var s658={};var s659={};var s660={};var s661={};var s662={};var s663={};var s664={};var s665={};var s666={};var s667={};var s668={};var s669={};var s670={};var s671={};var s672={};var s673={};var s674={};var s675={};var s676={};var s677={};var s678={};var s679={};var s680={};var s681={};var s682={};var s683={};var s684={};var s685={};var s686={};var s687={};var s688={};var s689={};var s690={};var s691={};var s692={};var s693={};var s694={};var s695={};var s696={};var s697={};var s698={};var s699={};var s700={};var s701={};var s702={};var s703={};var s704={};var s705={};var s706={};var s707={};var s708={};var s709={};var s710={};var s711={};var s712={};var s713={};var s714={};var s715={};var s716={};var s717={};var s718={};var s719={};var s720={};var s721={};var s722={};var s723={};var s724={};var s725={};var s726={};var s727={};var s728={};var s729={};var s730={};var s731={};var s732={};var s733={};var s734={};var s735={};var s736={};var s737={};var s738={};var s739={};var s740={};var s741={};var s742={};var s743={};var s744={};var s745={};var s746={};var s747={};var s748={};var s749={};var s750={};var s751={};var s752={};var s753={};var s754={};var s755={};var s756={};var s757={};var s758={};var s759={};var s760={};var s761={};var s762={};var s763={};var s764={};var s765={};var s766={};var s767={};var s768={};var s769={};var s770={};var s771={};var s772={};var s773={};var s774={};var s775={};var s776={};var s777={};var s778={};var s779={};var s780={};var s781={};var s782={};var s783={};var s784={};var s785={};var s786={};var s787={};var s788={};s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";

s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";

s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";

s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";

s4.commentCount=0;s4.content="\u5B66\u957F\u8001\u8BA9\u6211\u5E2E\u4ED6\u4E0B\u7535\u5B50\u4E66\uFF0C\u89C9\u5F97\u6211\u5F88\u5389\u5BB3\uFF0C\u97E9\u56FD\u8FD8\u6CA1\u4E70\u7684\u4E66\u6211\u90FD\u80FD\u4E0B\u6765\uFF0C\u8FD8\u8BF4\u662F\u4E0D\u662F\u7684\u7ED9\u6211\u4E70\u597D\u5403\u7684[P]\u5F00\u6000\u7B11[/P][P]\u5F00\u6000\u7B11[/P]";s4.id="134829323";s4.mainCommentCount=0;s4.moodType=1;s4.moveFrom=null;s4.publishTime=1350366965636;s4.synchMiniBlog=-1;s4.userAvatar=0;s4.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s4.userId=186541395;s4.userName="ni_chen";s4.userNickname="Neysa";

s5.commentCount=0;s5.content="\u6628\u5929\u770B\u5B8C\u300A\u9700\u8981\u6D6A\u6F2B\u300B\uFF0C\u7528\u4E86\u5DEE\u4E0D\u591A\u4E00\u76D2\u7EB8\u5DFE\uFF0C\u773C\u775B\u75BC";s5.id="134829040";s5.mainCommentCount=0;s5.moodType=1;s5.moveFrom=null;s5.publishTime=1350342912629;s5.synchMiniBlog=-1;s5.userAvatar=0;s5.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s5.userId=186541395;s5.userName="ni_chen";s5.userNickname="Neysa";

s6.commentCount=0;s6.content="\"\u751F\u6D3B\u4E2D\u7684\u5899\u4E0D\u662F\u7528\u6765\u649E\u5934\u7684\uFF0C\u662F\u7528\u6765\u8F6C\u5F2F\u7684\". ";s6.id="134829014";s6.mainCommentCount=0;s6.moodType=1;s6.moveFrom=null;s6.publishTime=1350340791901;s6.synchMiniBlog=-1;s6.userAvatar=0;s6.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s6.userId=186541395;s6.userName="ni_chen";s6.userNickname="Neysa";

s7.commentCount=1;s7.content="\u90A3\u5E2E\u4EBA\u53BB\u515C\u98CE\u4E86\uFF0C\u7814\u7A76\u5BA4\u5C31\u5269\u6211\u4E00\u4E2A\u4E86[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s7.id="134768355";s7.mainCommentCount=1;s7.moodType=1;s7.moveFrom=null;s7.publishTime=1350278946278;s7.synchMiniBlog=-1;s7.userAvatar=0;s7.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s7.userId=186541395;s7.userName="ni_chen";s7.userNickname="Neysa";

s8.commentCount=0;s8.content="\u4E09\u5341\u5C81\u524D\u603B\u7B97\u6709\u4E86\u4E2A\u5C0F\u7A9D[IMG]http://img4.ph.126.net/NdQSyqju67sds7xsP9wWVA==/6597146732471627934.jpg[/IMG]";s8.id="134757722";s8.mainCommentCount=0;s8.moodType=0;s8.moveFrom="wap";s8.publishTime=1350211074706;s8.synchMiniBlog=-1;s8.userAvatar=0;s8.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s8.userId=186541395;s8.userName="ni_chen";s8.userNickname="Neysa";

......

s786.commentCount=0;s786.content="\u6211\u8BBE\u8BA1\u7684\u6A21\u7248\uFF0C\u563B\u563B\u3002";s786.id="111039880";s786.mainCommentCount=0;s786.moodType=1;s786.moveFrom="";s786.publishTime=1231545101647;s786.synchMiniBlog=-1;s786.userAvatar=0;s786.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s786.userId=186541395;s786.userName="ni_chen";s786.userNickname="Niya";

s787.commentCount=0;s787.content="\u6211\u542C\u4EBA\u8BF4\uFF0C\u5BC2\u5BDE\u7684\u4EBA\uFF0C\u611F\u5192\u4F1A\u62D6\u5F97\u7279\u522B\u957F\uFF0C\u56E0\u4E3A\u4ED6\u81EA\u5DF1\u4E5F\u4E0D\u60F3\u597D\u3002";s787.id="111039881";s787.mainCommentCount=0;s787.moodType=1;s787.moveFrom="";s787.publishTime=1231329494484;s787.synchMiniBlog=-1;s787.userAvatar=0;s787.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s787.userId=186541395;s787.userName="ni_chen";s787.userNickname="Niya";

s788.commentCount=0;s788.content="\u4E0D\u8981\u8FFD\u6C42\u4EC0\u4E48\u7ED3\u679C\uFF0C\u6BCF\u4E2A\u4EBA\u7ED3\u679C\u90FD\u4E00\u6837\uFF0C\u5C31\u662F\u6B7B\u4EA1\u3002";s788.id="111039882";s788.mainCommentCount=0;s788.moodType=1;s788.moveFrom="";s788.publishTime=1231158155439;s788.synchMiniBlog=-1;s788.userAvatar=0;s788.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s788.userId=186541395;s788.userName="ni_chen";s788.userNickname="Niya";

dwr.engine._remoteHandleCallback('1','0',[s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23,s24,s25,s26,s27,s28,s29,s30,s31,s32,s33,s34,s35,s36,s37,s38,s39,s40,s41,s42,s43,s44,s45,s46,s47,s48,s49,s50,s51,s52,s53,s54,s55,s56,s57,s58,s59,s60,s61,s62,s63,s64,s65,s66,s67,s68,s69,s70,s71,s72,s73,s74,s75,s76,s77,s78,s79,s80,s81,s82,s83,s84,s85,s86,s87,s88,s89,s90,s91,s92,s93,s94,s95,s96,s97,s98,s99,s100,s101,s102,s103,s104,s105,s106,s107,s108,s109,s110,s111,s112,s113,s114,s115,s116,s117,s118,s119,s120,s121,s122,s123,s124,s125,s126,s127,s128,s129,s130,s131,s132,s133,s134,s135,s136,s137,s138,s139,s140,s141,s142,s143,s144,s145,s146,s147,s148,s149,s150,s151,s152,s153,s154,s155,s156,s157,s158,s159,s160,s161,s162,s163,s164,s165,s166,s167,s168,s169,s170,s171,s172,s173,s174,s175,s176,s177,s178,s179,s180,s181,s182,s183,s184,s185,s186,s187,s188,s189,s190,s191,s192,s193,s194,s195,s196,s197,s198,s199,s200,s201,s202,s203,s204,s205,s206,s207,s208,s209,s210,s211,s212,s213,s214,s215,s216,s217,s218,s219,s220,s221,s222,s223,s224,s225,s226,s227,s228,s229,s230,s231,s232,s233,s234,s235,s236,s237,s238,s239,s240,s241,s242,s243,s244,s245,s246,s247,s248,s249,s250,s251,s252,s253,s254,s255,s256,s257,s258,s259,s260,s261,s262,s263,s264,s265,s266,s267,s268,s269,s270,s271,s272,s273,s274,s275,s276,s277,s278,s279,s280,s281,s282,s283,s284,s285,s286,s287,s288,s289,s290,s291,s292,s293,s294,s295,s296,s297,s298,s299,s300,s301,s302,s303,s304,s305,s306,s307,s308,s309,s310,s311,s312,s313,s314,s315,s316,s317,s318,s319,s320,s321,s322,s323,s324,s325,s326,s327,s328,s329,s330,s331,s332,s333,s334,s335,s336,s337,s338,s339,s340,s341,s342,s343,s344,s345,s346,s347,s348,s349,s350,s351,s352,s353,s354,s355,s356,s357,s358,s359,s360,s361,s362,s363,s364,s365,s366,s367,s368,s369,s370,s371,s372,s373,s374,s375,s376,s377,s378,s379,s380,s381,s382,s383,s384,s385,s386,s387,s388,s389,s390,s391,s392,s393,s394,s395,s396,s397,s398,s399,s400,s401,s402,s403,s404,s405,s406,s407,s408,s409,s410,s411,s412,s413,s414,s415,s416,s417,s418,s419,s420,s421,s422,s423,s424,s425,s426,s427,s428,s429,s430,s431,s432,s433,s434,s435,s436,s437,s438,s439,s440,s441,s442,s443,s444,s445,s446,s447,s448,s449,s450,s451,s452,s453,s454,s455,s456,s457,s458,s459,s460,s461,s462,s463,s464,s465,s466,s467,s468,s469,s470,s471,s472,s473,s474,s475,s476,s477,s478,s479,s480,s481,s482,s483,s484,s485,s486,s487,s488,s489,s490,s491,s492,s493,s494,s495,s496,s497,s498,s499,s500,s501,s502,s503,s504,s505,s506,s507,s508,s509,s510,s511,s512,s513,s514,s515,s516,s517,s518,s519,s520,s521,s522,s523,s524,s525,s526,s527,s528,s529,s530,s531,s532,s533,s534,s535,s536,s537,s538,s539,s540,s541,s542,s543,s544,s545,s546,s547,s548,s549,s550,s551,s552,s553,s554,s555,s556,s557,s558,s559,s560,s561,s562,s563,s564,s565,s566,s567,s568,s569,s570,s571,s572,s573,s574,s575,s576,s577,s578,s579,s580,s581,s582,s583,s584,s585,s586,s587,s588,s589,s590,s591,s592,s593,s594,s595,s596,s597,s598,s599,s600,s601,s602,s603,s604,s605,s606,s607,s608,s609,s610,s611,s612,s613,s614,s615,s616,s617,s618,s619,s620,s621,s622,s623,s624,s625,s626,s627,s628,s629,s630,s631,s632,s633,s634,s635,s636,s637,s638,s639,s640,s641,s642,s643,s644,s645,s646,s647,s648,s649,s650,s651,s652,s653,s654,s655,s656,s657,s658,s659,s660,s661,s662,s663,s664,s665,s666,s667,s668,s669,s670,s671,s672,s673,s674,s675,s676,s677,s678,s679,s680,s681,s682,s683,s684,s685,s686,s687,s688,s689,s690,s691,s692,s693,s694,s695,s696,s697,s698,s699,s700,s701,s702,s703,s704,s705,s706,s707,s708,s709,s710,s711,s712,s713,s714,s715,s716,s717,s718,s719,s720,s721,s722,s723,s724,s725,s726,s727,s728,s729,s730,s731,s732,s733,s734,s735,s736,s737,s738,s739,s740,s741,s742,s743,s744,s745,s746,s747,s748,s749,s750,s751,s752,s753,s754,s755,s756,s757,s758,s759,s760,s761,s762,s763,s764,s765,s766,s767,s768,s769,s770,s771,s772,s773,s774,s775,s776,s777,s778,s779,s780,s781,s782,s783,s784,s785,s786,s787,s788]);

【折腾过程】

1.针对第一个获得的数据:

\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86

去分析了一下,结果是:

>>> print ‘\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86’.decode(‘unicode-escape’)

终于有iphone版的了

即,对应着,第一个评论数据:

finally got iphone version

所以,接下来,就是针对于这样的数据:

s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";

s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";

s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";

如何去用python一点点解析出来,成为单个的评论。

2.所以,就是去写代码去解析评论了。

代码如下:

def parseRespDwrToCmtList(respDwrReplyStr):
    """
        Parse response DWR-REPLY string, into comment list
    """
    
    #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";

    #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";

    #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";

    #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
    
    commentStrList = [];
    
    #commentStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr);
    commentStrList = re.findall(r's\d+\.commentCount=.+?s\d+\.userNickname=".+?";', respDwrReplyStr);
    #logging.info("commentStrList=%s", commentStrList);
    logging.info("len(commentStrList)=%d", len(commentStrList));
    
    if(commentStrList):
        for eachCommentStr in commentStrList:
            #parse each comment string into comment dict
            singleCmtDict = {
                'cmtIdx'            : 0,
                'commentCount'      : "",
                'content'           : "",
                'id'                : "",
                'mainCommentCount'  : "",
                'moodType'          : "",
                'moveFrom'          : "",
                'publishTime'       : "",
                'synchMiniBlog'     : "",
                'userAvatar'        : "",
                'userAvatarUrl'     : "",
                'userId'            : "",
                'userName'          : "",
                'userNickname'      : "",
            };
            #fisrt get the comment index
            foundCmtIdx = re.search('s(?P<cmtIdx>\d+)\.commentCount=', eachCommentStr);
            cmtIdx = foundCmtIdx.group("cmtIdx");
            cmtIdx = int(cmtIdx);
            logging.info("cmtIdx=%d", cmtIdx);
            singleCmtDict['cmtIdx'] = cmtIdx;
            
            #init some common values
            strSn = "s" + str(cmtIdx);

            #commentCount
            #s0.commentCount=0;
            foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);', eachCommentStr);
            commentCount = foundCommentCount.group("commentCount");
            singleCmtDict['commentCount'] = commentCount;
            logging.info("commentCount=%s", commentCount);

            #content
            #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
            foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="', eachCommentStr);
            content = foundContent.group("content");
            content = content.decode("unicode-escape");
            singleCmtDict['content'] = content;
            logging.info("content=%s", content);

            #id
            #s0.id="148749270";
            foundId = re.search(strSn + '\.id="(?P<id>\d+)";', eachCommentStr);
            id = foundId.group("id");
            singleCmtDict['id'] = id;
            logging.info("id=%s", id);

            #mainCommentCount
            #s0.mainCommentCount=0;
            foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);', eachCommentStr);
            mainCommentCount = foundMainCommentCount.group("mainCommentCount");
            singleCmtDict['mainCommentCount'] = mainCommentCount;
            logging.info("mainCommentCount=%s", mainCommentCount);

            #moodType
            #s0.moodType=0;
            foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);', eachCommentStr);
            moodType = foundMoodType.group("moodType");
            singleCmtDict['moodType'] = moodType;
            logging.info("moodType=%s", moodType);

            #moveFrom
            #s0.moveFrom="iphone";
            #s2.moveFrom=null;
            #s8.moveFrom="wap";
            #s699.moveFrom="";
            foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;', eachCommentStr);
            moveFrom = foundMoveFrom.group("moveFrom");
            singleCmtDict['moveFrom'] = moveFrom;
            logging.info("moveFrom=%s", moveFrom);

            #publishTime
            #s0.publishTime=1374626867596;
            foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);', eachCommentStr);
            publishTime = foundPublishTime.group("publishTime");
            singleCmtDict['publishTime'] = publishTime;
            logging.info("publishTime=%s", publishTime);

            #synchMiniBlog
            #s0.synchMiniBlog=-1;
            foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);', eachCommentStr);
            synchMiniBlog = foundSynchMiniBlog.group("synchMiniBlog");
            singleCmtDict['synchMiniBlog'] = synchMiniBlog;
            logging.info("synchMiniBlog=%s", synchMiniBlog);

            #userAvatar
            #s0.userAvatar=0;
            foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);', eachCommentStr);
            userAvatar = foundUserAvatar.group("userAvatar");
            singleCmtDict['userAvatar'] = userAvatar;
            logging.info("userAvatar=%s", userAvatar);

            #userAvatarUrl
            #s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
            foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;', eachCommentStr);
            userAvatarUrl = foundUserAvatarUrl.group("userAvatarUrl");
            singleCmtDict['userAvatarUrl'] = userAvatarUrl;
            logging.info("userAvatarUrl=%s", userAvatarUrl);

            #userId
            #s0.userId=186541395;
            foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);', eachCommentStr);
            userId = foundUserId.group("userId");
            singleCmtDict['userId'] = userId;
            logging.info("userId=%s", userId);

            #userName
            #s0.userName="ni_chen";
            foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;', eachCommentStr);
            userName = foundUserName.group("userName");
            singleCmtDict['userName'] = userName;
            logging.info("userName=%s", userName);

            #userNickname
            #s0.userNickname="Neysa";
            foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;', eachCommentStr);
            userNickname = foundUserNickname.group("userNickname");
            singleCmtDict['userNickname'] = userNickname;
            logging.info("userNickname=%s", userNickname);

    return commentStrList;

3.但是,对于

mainCommentCount不是0

比如:

mainCommentCount=1

的评论,说明其下是有对应的子评论的,所以还要想办法抓取出来。

经过分析,对应的发送的post data和response data分别是:

for 134875456 got sub commment

for 134875456 response dwr comment str

然后就去将其中的逻辑,用代码,再模拟出来,获得对应的数据,再去分析出来子评论的内容。

4.最后,完整的相关解析部分的代码为:

def getPlaincallRespDwrStr(c0ScriptName, c0MethodName, c0Param0, c0Param1, c0Param2):
    """
        get FeelingsBeanNew response DWR string
    """
    
    #typ1:
    # http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr
    # for: http://againinput4.blog.163.com/blog/static/172799491201010159650483/
    # [paras]
    # callCount=1
    # scriptSessionId=${scriptSessionId}187
    # c0-scriptName=BlogBeanNew
    # c0-methodName=getComments
    # c0-id=0
    # c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085
    # c0-param1=number:1
    # c0-param2=number:0
    # batchId=728048
    #http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr?&callCount=1&scriptSessionId=${scriptSessionId}187&c0-scriptName=BlogBeanNew&c0-methodName=getComments&c0-id=0&c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085&c0-param1=number:1&c0-param2=number:0&batchId=728048
    
    
    #type2:
    # callCount=1
    # scriptSessionId=${scriptSessionId}187
    # c0-scriptName=FeelingsBeanNew
    # c0-methodName=getRecentFeelingsComment
    # c0-id=0
    # c0-param0=string:134875456
    # c0-param1=number:1
    # c0-param2=number:0
    # batchId=705438
    
    #type3:
    # callCount=1
    # scriptSessionId=${scriptSessionId}187
    # c0-scriptName=FeelingsBeanNew
    # c0-methodName=getRecentFeelingCards
    # c0-id=0
    # c0-param0=number:186541395
    # c0-param1=number:0
    # c0-param2=number:20
    # batchId=292545

    logging.debug("get FeelingsBeanNew reponse DWR string for c0MethodName=%s, c0Param0=%s, c0Param1=%s, c0Param2=%s", c0MethodName, c0Param0, c0Param1, c0Param2);
    
    postDict = {
        'callCount'     :   '1',
        'scriptSessionId':  '${scriptSessionId}187',
        'c0-scriptName' :   c0ScriptName, #BlogBeanNew/FeelingsBeanNew
        'c0-methodName' :   c0MethodName, #getComments/getRecentFeelingsComment/getRecentFeelingCards
        'c0-id'         :   '0',
        'c0-param0'     :   c0Param0,
        'c0-param1'     :   c0Param1,
        'c0-param2'     :   c0Param2,
        'batchId'       :   '1', # should random generate number?
    };
    #http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr
    #http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingsComment.dwr
    #http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingCards.dwr
    plaincallDwrUrl = gConst['blogApi163'] + '/' + gVal['blogUser'] + '/' + "dwr/call/plaincall/" + c0ScriptName + "." + c0MethodName + ".dwr";
    logging.debug("plaincallDwrUrl=%s", plaincallDwrUrl);

    #Referer	http://api.blog.163.com/crossdomain.html?t=20100205
    headerDict = {
        'Referer'       :   "http://api.blog.163.com/crossdomain.html?t=20100205",
        'Content-Type'  :   "text/plain",
    };
    plaincallRespDwrStr = crifanLib.getUrlRespHtml(plaincallDwrUrl, postDict=postDict, headerDict=headerDict, postDataDelimiter='\r\n');
    logging.debug("plaincallRespDwrStr=%s", plaincallRespDwrStr);

    return plaincallRespDwrStr;

def fetchComments_feelingCard():
    """
        Get feeling card items, to use as comments
    """

    totalCmtDictList = [];
    totalMainCmtDictList = [];
    totalSubCmtDictList = [];
    
    # init before loop
    needGetMore = True;
    startIdx = 0;
    startNum = 1;
    onceGetNum = 1000; # get 1000 items once
    
    try :
        while needGetMore :
            # get resopnse dwr string
            
            # callCount=1
            # scriptSessionId=${scriptSessionId}187
            # c0-scriptName=FeelingsBeanNew
            # c0-methodName=getRecentFeelingCards
            # c0-id=0
            # c0-param0=number:186541395
            # c0-param1=number:0
            # c0-param2=number:20
            # batchId=292545
            getRecentFeelingCardsRespDwrStr = getPlaincallRespDwrStr(   "FeelingsBeanNew",
                                                                        "getRecentFeelingCards",
                                                                        "number:" + str(gVal['userId']),
                                                                        "number:" + str(startIdx),
                                                                        "number:" + str(onceGetNum));
            logging.debug("getRecentFeelingCardsRespDwrStr=%s", getRecentFeelingCardsRespDwrStr);
            curMainCmtDictList = parseMainCmtDwrStrToMainCmtDictList(getRecentFeelingCardsRespDwrStr);
            totalMainCmtDictList.extend(curMainCmtDictList);
            
            curGotMainCmtNum = len(curMainCmtDictList);
            if(curGotMainCmtNum < onceGetNum):
                #has got all comment, so quit
                needGetMore = False;
                logging.debug("Request %d comments, but only response %d comments, so no more comments, has got all comments", onceGetNum, curGotMainCmtNum);

        #add main comment dict list into total comment dict list
        logging.debug("Total got %d main comments dict", len(totalMainCmtDictList));
        totalCmtDictList.extend(totalMainCmtDictList);
        logging.debug("Total comments %d", len(totalCmtDictList));
        
        #after get all main comment dict, then try to find the sub comments
        for eachMainCmtDict in totalMainCmtDictList:
            #logging.info("eachMainCmtDict=%s", eachMainCmtDict);
            mainCommentCount = eachMainCmtDict['mainCommentCount'];
            #logging.info("mainCommentCount=%s", mainCommentCount);
            mainCommentCountInt = int(mainCommentCount);
            #logging.info("mainCommentCountInt=%d", mainCommentCountInt);
            if(mainCommentCountInt > 0):
                #has sub comment
                logging.debug("[%d] main comment has sub %d comments", eachMainCmtDict['curCmtIdx'], mainCommentCountInt);
                #1. get sub comment dwr string
                subCmtDwrStr = getFeelingCardSubCmtDwrStr(eachMainCmtDict['id']);
                #2. parse sub comment dwr string to sub comment dict
                curSubCmtDictList = parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr);
                totalSubCmtDictList.extend(curSubCmtDictList);
        
        #do some update for sub comment
        logging.debug("Total got %d sub comment dict", len(totalSubCmtDictList));
        if(totalSubCmtDictList):
            #update sub comment index
            subCmtStartIdx = len(totalMainCmtDictList);
            logging.debug("subCmtStartIdx=%d", subCmtStartIdx);
            for idx,eachSubCmtDict in enumerate(totalSubCmtDictList):
                eachSubCmtDict['curCmtIdx'] = subCmtStartIdx + idx;
                eachSubCmtDict['curCmtNum'] = eachSubCmtDict['curCmtIdx'] + 1;
            
            logging.debug("done for update sub comment index");
            #update sub comment's parent relation
            for idx,eachSubCmtDict in enumerate(totalSubCmtDictList):
                subCmtParentId = eachSubCmtDict['cardId'];
                for eachMainCmtDict in totalMainCmtDictList:
                    mainCmtId = eachMainCmtDict['id'];
                    if(subCmtParentId == mainCmtId):
                        logging.debug("sub cmt id=%s 's parent's id=%s, parent curCmtNum=%d", eachSubCmtDict['id'], mainCmtId, eachMainCmtDict['curCmtNum']);
                        eachSubCmtDict['parentCmtNum'] = eachMainCmtDict['curCmtNum'];
                
                #update sub comment's parent whose within sub comment list
                #s0.replyComId="-1";
                #s3.replyComId="72175292"
                curSubCmtReplyComId = eachSubCmtDict['replyComId']; #
                for singleSubCmtDict in totalSubCmtDictList:
                    subCmtId = singleSubCmtDict['id'];
                    subCmtCurCmtNum = singleSubCmtDict['curCmtNum'];
                    if(curSubCmtReplyComId == subCmtId):
                        logging.debug("sub cmt id=%s 's replyComId=%s, find correspoinding parent (sub) comment, whose curCmtNum=%d", subCmtId, curSubCmtReplyComId, subCmtCurCmtNum);
                        eachSubCmtDict['parentCmtNum'] = subCmtCurCmtNum;
                
            logging.debug("done for update sub comment's parent relation");
            totalCmtDictList.extend(totalSubCmtDictList);
    except :
        logging.debug("Fail for fetch the feeling card (index=[%d-%d]) for %s ", startIdx, startIdx + onceGetNum - 1, url);

    return totalCmtDictList;

def getFeelingCardSubCmtDwrStr(subCmtId):
    """
        input sub comment id, return sub comment response dwr string
    """
    # callCount=1
    # scriptSessionId=${scriptSessionId}187
    # c0-scriptName=FeelingsBeanNew
    # c0-methodName=getRecentFeelingsComment
    # c0-id=0
    # c0-param0=string:134875456
    # c0-param1=number:1
    # c0-param2=number:0
    # batchId=705438
    
    logging.debug("get sub comment for %s", subCmtId);

    getRecentFeelingsCommentRespDwrStr = getPlaincallRespDwrStr(    "FeelingsBeanNew",
                                                                    "getRecentFeelingsComment",
                                                                    "string:" + str(subCmtId),
                                                                    "number:1",
                                                                    "number:0");
    logging.debug("getRecentFeelingsCommentRespDwrStr=%s", getRecentFeelingsCommentRespDwrStr);

    return getRecentFeelingsCommentRespDwrStr;

def parseSingleDwrStrToCmtDict(singleCmtDwrStr):
    """
        parse single comment dwr string, main comment or sub comment, to comment dict
    """
    logging.debug("singleCmtDwrStr=%s", singleCmtDwrStr);

    #init values
    curCmtDict = {};
    
    singleMainCmtDict = {
        'curCmtIdx'         : 0,
        'curCmtNum'         : 0,
        'parentCmtNum'      : 0,
        'isSubComment'      : False,

        'commentCount'      : "",
        'mainCommentCount'  : "",
        'moodType'          : "",
        'userAvatar'        : "",
        'userAvatarUrl'     : "",
        'userName'          : "",
        'userNickname'      : "",
        #common part
        'content'           : "",
        'id'                : "",
        'moveFrom'          : "",
        'publishTime'       : "",
        'synchMiniBlog'     : "",
        'userId'            : "",
    };
    
    singleSubCmtDict = {
        'curCmtIdx'         : 0,
        'curCmtNum'         : 0,
        'parentCmtNum'      : 0,
        'isSubComment'      : True,
        
        'cardId'            : "", # is parent ID
        'ip'                : "",
        'ipName'            : "",
        'lastUpdateTime'    : "",
        'mainComId'         : "",
        'popup'             : "",
        'publisherAvatar'   : "",
        'publisherAvatarUrl': "",
        'publisherId'       : "",
        'publisherName'     : "",
        'publisherNickname' : "",
        'publisherUrl'      : "",
        'replyComId'        : "",
        'replyToUserId'     : "",
        'replyToUserName'   : "",
        'replyToUserNick'   : "",
        'spam'              : "",
        'subComments'       : "",
        'valid'             : "",
        #common part
        'content'           : "",
        'id'                : "",
        'moveFrom'          : "",
        'publishTime'       : "",
        'synchMiniBlog'     : "",
        'userId'            : "",
    };

    #1. check is main comment or sub comment
    #start with sN.cardId=, is sub comment
    foundCardId = re.search("^s\d+\.cardId=", singleCmtDwrStr);
    if(foundCardId):
        curCmtDict = singleSubCmtDict;
        curCmtDict['isSubComment'] = True;
        logging.debug("------- is sub comments");
    else:
        curCmtDict = singleMainCmtDict;
        curCmtDict['isSubComment'] = False;
        logging.debug("======= is main comments");

    #2. process common key and value

    #common key and value

    #fisrt get the comment index
    #main comment:
    #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
    #sub comment:
    #s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="
    #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";
    
    foundCurCmtIdx = re.search(r's(?P<curCmtIdx>\d+)\.content=".+?";s\1\.id="', singleCmtDwrStr);
    logging.debug("foundCurCmtIdx=%s", foundCurCmtIdx);
    curCmtIdx = foundCurCmtIdx.group("curCmtIdx");
    curCmtIdx = int(curCmtIdx);
    logging.debug("curCmtIdx=%d", curCmtIdx);
    if(not curCmtDict['isSubComment']):
        #only add for main comment
        #later, will update sub comment curCmtIdx and curCmtNum
        curCmtDict['curCmtIdx'] = curCmtIdx;
        curCmtDict['curCmtNum'] = curCmtIdx + 1;

    #init some common values
    strSn = "s" + str(curCmtIdx);
    
    #content
    #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="
    foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="', singleCmtDwrStr);
    content = foundContent.group("content");
    content = content.decode("unicode-escape");
    curCmtDict['content'] = content;
    logging.debug("content=%s", content);

    #id
    #s0.id="148749270";
    foundId = re.search(strSn + '\.id="(?P<id>\d+)";', singleCmtDwrStr);
    id = foundId.group("id");
    curCmtDict['id'] = id;
    logging.debug("id=%s", id);

    #moveFrom
    #s0.moveFrom="iphone";
    #s2.moveFrom=null;
    #s8.moveFrom="wap";
    #s699.moveFrom="";
    foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;', singleCmtDwrStr);
    moveFrom = foundMoveFrom.group("moveFrom");
    curCmtDict['moveFrom'] = moveFrom;
    logging.debug("moveFrom=%s", moveFrom);

    #publishTime
    #s0.publishTime=1374626867596;
    foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);', singleCmtDwrStr);
    publishTime = foundPublishTime.group("publishTime");
    curCmtDict['publishTime'] = publishTime;
    logging.debug("publishTime=%s", publishTime);

    #synchMiniBlog
    #s0.synchMiniBlog=-1;
    #in sub comment:
    #s0.synchMiniBlog=false;
    foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);', singleCmtDwrStr);
    synchMiniBlog = foundSynchMiniBlog.group("synchMiniBlog");
    curCmtDict['synchMiniBlog'] = synchMiniBlog;
    logging.debug("synchMiniBlog=%s", synchMiniBlog);

    #userId
    #s0.userId=186541395;
    foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);', singleCmtDwrStr);
    userId = foundUserId.group("userId");
    curCmtDict['userId'] = userId;
    logging.debug("userId=%s", userId);

    #3. process different key and value
    
    if(curCmtDict['isSubComment']):
        #process sub comment remaing field

    
        #sub comment dwr string:
       
        #sample 1: #s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;

        #sample 2:
        # s0.cardId="133211376";s0.content="\u4ECE\u9AD8\u4E2D\u5C31\u5F00\u59CB\u7684\u5417\uFF1F\u597D\u597D\u53BB\u533B\u9662\u68C0\u67E5\u4E00\u4E0B\u5427\uFF0C\u73B0\u5728\u6709\u75C5\u4E00\u5B9A\u4E0D\u8981\u62D6\u7740\uFF0C\u8981\u4E0D\u5C0F\u75C5\u4E5F\u4F1A\u53D8\u6210\u5927\u75C5\uFF0C\u5230\u65F6\u53EF\u6CA1\u6709\u540E\u6094\u836F\u5403\u3002";s0.id="72192291";s0.ip="115.170.58.191";s0.ipName=null;s0.lastUpdateTime=1348561288469;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1348468815327;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;

        # s1[0]=s2;s1[1]=s3;s1[2]=s4;

        # s2.cardId="133211376";s2.content="\u4E00\u76F4\u60F3\u67E5\uFF0C\u4F46\u662F\u6CA1\u6709\u533B\u7597\u4FDD\u9669<img src=\"http://b.bst.126.net/common/portrait/face/preview/face2.gif\"  >\u3002\u6211\u5F97\u5148\u95EE\u6E05\u695A\u4E00\u4E0B";s2.id="72175292";s2.ip="147.46.115.126";s2.ipName=null;s2.lastUpdateTime=0;s2.mainComId="72192291";s2.moveFrom=null;s2.popup=false;s2.publishTime=1348471820683;s2.publisherAvatar=0;s2.publisherAvatarUrl=null;s2.publisherId=186541395;s2.publisherName="ni_chen";s2.publisherNickname="Neysa";s2.publisherUrl=null;s2.replyComId="72192291";s2.replyToUserId=26959367;s2.replyToUserName="chenyuanyuan0913";s2.replyToUserNick="\u6C89\u7F18\u6E90";s2.spam=0;s2.subComments=s5;s2.synchMiniBlog=false;s2.userId=186541395;s2.valid=0;
        #s3.cardId="133211376";s3.content="\u522B\u62D6\u5EF6\uFF0C\u505A\u4E2A\u68C0\u67E5\u82B1\u4E0D\u4E86\u591A\u5C11\u94B1\u7684\uFF0C\u522B\u5230\u65F6\u771F\u751F\u75C5\u4E86\uFF0C\u90A3\u53EF\u82B1\u5F97\u4E0D\u662F\u4E00\u70B9\u534A\u70B9\u7684\u3002\u6709\u65F6\u95F4\u4E86\u5C31\u8D76\u7D27\u53BB\uFF0C\u4E00\u5B9A\u8981\u53BB\u554A\uFF0C\u6CA1\u4EC0\u4E48\u4E8B\u5C31\u653E\u5FC3\u4E86\u3002\u8BB0\u5F97\u6211\u4EEC\u5BBF\u820D\u90A3\u4E2A\u5C0F\u59D1\u5A18\u5417\uFF0C\u90A3\u53EF\u662F\u771F\u5B9E\u7684\u6559\u8BAD\u554A";s3.id="72227357";s3.ip="115.170.26.179";s3.ipName=null;s3.lastUpdateTime=0;s3.mainComId="72192291";s3.moveFrom=null;s3.popup=false;s3.publishTime=1348560697833;s3.publisherAvatar=0;s3.publisherAvatarUrl=null;s3.publisherId=26959367;s3.publisherName="chenyuanyuan0913";s3.publisherNickname="\u6C89\u7F18\u6E90";s3.publisherUrl=null;s3.replyComId="72175292";s3.replyToUserId=186541395;s3.replyToUserName="ni_chen";s3.replyToUserNick="Neysa";s3.spam=0;s3.subComments=s6;s3.synchMiniBlog=false;s3.userId=186541395;s3.valid=0;

        # s4.cardId="133211376";s4.content="\u55EF\uFF0C\u77E5\u9053\u5566<img src=\"http://b.bst.126.net/common/portrait/face/preview/face47.gif\"  >";s4.id="72206314";s4.ip="147.46.115.126";s4.ipName=null;s4.lastUpdateTime=0;s4.mainComId="72192291";s4.moveFrom=null;s4.popup=false;s4.publishTime=1348561288458;s4.publisherAvatar=0;s4.publisherAvatarUrl=null;s4.publisherId=186541395;s4.publisherName="ni_chen";s4.publisherNickname="Neysa";s4.publisherUrl=null;s4.replyComId="72227357";s4.replyToUserId=26959367;s4.replyToUserName="chenyuanyuan0913";s4.replyToUserNick="\u6C89\u7F18\u6E90";s4.spam=0;s4.subComments=s7;s4.synchMiniBlog=false;s4.userId=186541395;s4.valid=0;
        
        #sample 3:
        #s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
        
        #sample 4:
        #s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
        
        #sample 5:
        #s0.cardId="111039854";s0.content="\u65B0\u53D1\u578B\u771F\u5F97\u5F88\u6F02\u4EAE\u554A\u2026\u2026\u53EF\u4EE5\u4F20\u4E00\u7EC4\u7167\u7247\u8BA9\u59D0\u59D0\u770B\u770B\u5417\uFF1F";s0.id="58333672";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1251121541764;s0.mainComId="-1";s0.moveFrom="";s0.popup=false;s0.publishTime=1251121541764;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName=null;s0.replyToUserNick=null;s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
        
        
        #cardId
        #s0.cardId="134875456";
        foundCardId = re.search(strSn + '\.cardId="?(?P<cardId>.*?)"?;', singleCmtDwrStr);
        cardId = foundCardId.group("cardId");
        curCmtDict['cardId'] = cardId;
        logging.debug("cardId=%s", cardId);

        #ip
        #s0.ip="203.234.215.66";
        #s0.ip=null;
        foundIp = re.search(strSn + '\.ip="?(?P<ip>.*?)"?;', singleCmtDwrStr);
        ip = foundIp.group("ip");
        if(not re.search("\d+\.\d+\.\d+\.\d+", ip)):
            ip = "";
        curCmtDict['ip'] = ip;
        logging.debug("ip=%s", ip);

        #ipName
        #s0.ipName=null;
        foundIpName = re.search(strSn + '\.ipName=(?P<ipName>.+?);', singleCmtDwrStr);
        ipName = foundIpName.group("ipName");
        curCmtDict['ipName'] = ipName;
        logging.debug("ipName=%s", ipName);

        #lastUpdateTime
        #s0.lastUpdateTime=1351380367156;
        foundLastUpdateTime = re.search(strSn + '\.lastUpdateTime=(?P<lastUpdateTime>\d+);', singleCmtDwrStr);
        lastUpdateTime = foundLastUpdateTime.group("lastUpdateTime");
        curCmtDict['lastUpdateTime'] = lastUpdateTime;
        logging.debug("lastUpdateTime=%s", lastUpdateTime);

        #mainComId
        #s0.mainComId="-1";
        foundMainComId = re.search(strSn + '\.mainComId="?(?P<mainComId>.*?)"?;', singleCmtDwrStr);
        mainComId = foundMainComId.group("mainComId");
        curCmtDict['mainComId'] = mainComId;
        logging.debug("mainComId=%s", mainComId);

        #popup
        #s0.popup=false;
        foundPopup = re.search(strSn + '\.popup=(?P<popup>.+?);', singleCmtDwrStr);
        popup = foundPopup.group("popup");
        curCmtDict['popup'] = popup;
        logging.debug("popup=%s", popup);

        #publisherAvatar
        #s0.publisherAvatar=0;
        foundPublisherAvatar = re.search(strSn + '\.publisherAvatar=(?P<publisherAvatar>\d+);', singleCmtDwrStr);
        publisherAvatar = foundPublisherAvatar.group("publisherAvatar");
        curCmtDict['publisherAvatar'] = publisherAvatar;
        logging.debug("publisherAvatar=%s", publisherAvatar);

        #publisherAvatarUrl
        #s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
        #s2.publisherAvatarUrl=null;
        foundPublisherAvatarUrl = re.search(strSn + '\.publisherAvatarUrl="?(?P<publisherAvatarUrl>.*?)"?;', singleCmtDwrStr);
        publisherAvatarUrl = foundPublisherAvatarUrl.group("publisherAvatarUrl");
        curCmtDict['publisherAvatarUrl'] = publisherAvatarUrl;
        logging.debug("publisherAvatarUrl=%s", publisherAvatarUrl);

        #publisherId
        #s0.publisherId=55976067;
        foundPublisherId = re.search(strSn + '\.publisherId=(?P<publisherId>\d+);', singleCmtDwrStr);
        publisherId = foundPublisherId.group("publisherId");
        curCmtDict['publisherId'] = publisherId;
        logging.debug("publisherId=%s", publisherId);

        #publisherName
        #s0.publisherName="chenlin198412@126";
        foundPublisherName = re.search(strSn + '\.publisherName="?(?P<publisherName>.*?)"?;', singleCmtDwrStr);
        publisherName = foundPublisherName.group("publisherName");
        curCmtDict['publisherName'] = publisherName;
        logging.debug("publisherName=%s", publisherName);

        #publisherNickname
        #s0.publisherNickname="Lynn";
        foundPublisherNickname = re.search(strSn + '\.publisherNickname="?(?P<publisherNickname>.*?)"?;', singleCmtDwrStr);
        publisherNickname = foundPublisherNickname.group("publisherNickname");
        publisherNicknameUni = publisherNickname.decode('unicode-escape');
        curCmtDict['publisherNickname'] = publisherNicknameUni;
        logging.debug("publisherNickname=%s", publisherNickname);

        #publisherUrl
        #s0.publisherUrl=null;
        foundPublisherUrl = re.search(strSn + '\.publisherUrl="?(?P<publisherUrl>.*?)"?;', singleCmtDwrStr);
        publisherUrl = foundPublisherUrl.group("publisherUrl");
        curCmtDict['publisherUrl'] = publisherUrl;
        logging.debug("publisherUrl=%s", publisherUrl);

        #replyComId
        #s0.replyComId="-1";
        foundReplyComId = re.search(strSn + '\.replyComId="?(?P<replyComId>.*?)"?;', singleCmtDwrStr);
        replyComId = foundReplyComId.group("replyComId");
        curCmtDict['replyComId'] = replyComId;
        logging.debug("replyComId=%s", replyComId);

        #replyToUserId
        #s0.replyToUserId=186541395;
        foundReplyToUserId = re.search(strSn + '\.replyToUserId=(?P<replyToUserId>\d+);', singleCmtDwrStr);
        replyToUserId = foundReplyToUserId.group("replyToUserId");
        curCmtDict['replyToUserId'] = replyToUserId;
        logging.debug("replyToUserId=%s", replyToUserId);

        #replyToUserName
        #s0.replyToUserName="ni_chen";
        #s0.replyToUserName="";
        #s0.replyToUserName=null;
        foundReplyToUserName = re.search(strSn + '\.replyToUserName="?(?P<replyToUserName>.*?)"?;', singleCmtDwrStr);
        replyToUserName = foundReplyToUserName.group("replyToUserName");
        curCmtDict['replyToUserName'] = replyToUserName;
        logging.debug("replyToUserName=%s", replyToUserName);

        #replyToUserNick
        #s0.replyToUserNick="Neysa";
        #s0.replyToUserNick=null;
        foundReplyToUserNick = re.search(strSn + '\.replyToUserNick="?(?P<replyToUserNick>.*?)"?;', singleCmtDwrStr);
        replyToUserNick = foundReplyToUserNick.group("replyToUserNick");
        curCmtDict['replyToUserNick'] = replyToUserNick;
        logging.debug("replyToUserNick=%s", replyToUserNick);

        #spam
        #s0.spam=0;
        foundSpam = re.search(strSn + '\.spam=(?P<spam>\d+);', singleCmtDwrStr);
        spam = foundSpam.group("spam");
        curCmtDict['spam'] = spam;
        logging.debug("spam=%s", spam);

        #subComments
        #s0.subComments=s1;
        foundSubComments = re.search(strSn + '\.subComments=(?P<subComments>.+?);', singleCmtDwrStr);
        subComments = foundSubComments.group("subComments");
        curCmtDict['subComments'] = subComments;
        logging.debug("subComments=%s", subComments);

        #valid
        #s0.valid=0;
        foundValid = re.search(strSn + '\.valid=(?P<valid>\d+);', singleCmtDwrStr);
        valid = foundValid.group("valid");
        curCmtDict['valid'] = valid;
        logging.debug("valid=%s", valid);
    else:
        #process main comment remaing field


        #main comment dwr string:
        #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";

        #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";

        #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";

        #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";


        #commentCount
        #s0.commentCount=0;
        foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);', singleCmtDwrStr);
        commentCount = foundCommentCount.group("commentCount");
        curCmtDict['commentCount'] = commentCount;
        logging.debug("commentCount=%s", commentCount);


        #mainCommentCount
        #s0.mainCommentCount=0;
        foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);', singleCmtDwrStr);
        mainCommentCount = foundMainCommentCount.group("mainCommentCount");
        curCmtDict['mainCommentCount'] = mainCommentCount;
        logging.debug("mainCommentCount=%s", mainCommentCount);

        #moodType
        #s0.moodType=0;
        foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);', singleCmtDwrStr);
        moodType = foundMoodType.group("moodType");
        curCmtDict['moodType'] = moodType;
        logging.debug("moodType=%s", moodType);

        #userAvatar
        #s0.userAvatar=0;
        foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);', singleCmtDwrStr);
        userAvatar = foundUserAvatar.group("userAvatar");
        curCmtDict['userAvatar'] = userAvatar;
        logging.debug("userAvatar=%s", userAvatar);

        #userAvatarUrl
        #s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";
        foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;', singleCmtDwrStr);
        userAvatarUrl = foundUserAvatarUrl.group("userAvatarUrl");
        curCmtDict['userAvatarUrl'] = userAvatarUrl;
        logging.debug("userAvatarUrl=%s", userAvatarUrl);

        #userName
        #s0.userName="ni_chen";
        foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;', singleCmtDwrStr);
        userName = foundUserName.group("userName");
        curCmtDict['userName'] = userName;
        logging.debug("userName=%s", userName);

        #userNickname
        #s0.userNickname="Neysa";
        foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;', singleCmtDwrStr);
        userNickname = foundUserNickname.group("userNickname");
        curCmtDict['userNickname'] = userNickname;
        logging.debug("userNickname=%s", userNickname);

    return curCmtDict;

def parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr):
    """
        parse sub comment dwr string to sub comment dict list
            split to single sub comment dwr string list
            convert each sub comment dwr string to dict
    """
    subCmtDictList = [];
    
    # //#DWR-INSERT
    # //#DWR-REPLY
    # var s0={};var s1=[];s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0;
    # dwr.engine._remoteHandleCallback('1','0',[s0]);
    
    subCmtStrList = re.findall(r's\d+\.cardId=.+?s\d+\.valid=\d+;(?:\s)', subCmtDwrStr);
    #logging.info("subCmtStrList=%s", subCmtStrList);
    logging.debug("len(subCmtStrList)=%d", len(subCmtStrList));
    
    if(subCmtStrList):
        for singleSubCmtDwrStr in subCmtStrList:
            singleSubCmtDict = parseSingleDwrStrToCmtDict(singleSubCmtDwrStr);
            subCmtDictList.append(singleSubCmtDict);

    return subCmtDictList;

def parseMainCmtDwrStrToMainCmtDictList(respDwrReplyStr):
    """
        Parse main comment response DWR-REPLY string, into comment dict list
    """
    commentDictList = [];
    #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa";

    #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa";

    #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa";

    #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa";
    
    mainCmtDwrStrList = [];

    #mainCmtDwrStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr);
    mainCmtDwrStrList = re.findall(r's\d+\.commentCount=.+?s\d+\.userNickname=".+?";(?:\s)', respDwrReplyStr);
    #logging.info("mainCmtDwrStrList=%s", mainCmtDwrStrList);
    logging.debug("len(mainCmtDwrStrList)=%d", len(mainCmtDwrStrList));
    
    if(mainCmtDwrStrList):
        for eachMainCmtDwrStr in mainCmtDwrStrList:
            #parse each main comment string into comment dict
            singleMainCmtDict = parseSingleDwrStrToCmtDict(eachMainCmtDwrStr);

            #add single comment dict into list
            commentDictList.append(singleMainCmtDict);

    return commentDictList;

#------------------------------------------------------------------------------
def fillComments_fellingCard(destCmtDict, srcCmtDict):
    """
        fill source comments dictionary into destination comments dictionary
            note:
            here srcCmtDict may be is main comment dict or sub comment dict
    """
    logging.debug("--------- source comment: idx=%d, num=%d ---------", srcCmtDict['curCmtIdx'], srcCmtDict['curCmtNum']);
    #for item in srcCmtDict.items() :
    #    logging.debug("%s", item);
    destCmtDict['id'] = srcCmtDict['curCmtNum'];

    if(srcCmtDict['isSubComment']):
        destCmtDict['author'] = srcCmtDict['publisherNickname'];
    else:
        destCmtDict['author'] = srcCmtDict['userNickname'];
    #logging.info("done for author");

    if(srcCmtDict['isSubComment']):
        destCmtDict['author_email'] = srcCmtDict['publisherName'];#s0.publisherName="chenlin198412@126";
    else:
        destCmtDict['author_email'] = "";
    #logging.info("done for author_email");
        
    if(srcCmtDict['isSubComment']):
        destCmtDict['author_url'] = saxutils.escape(genNeteaseUserUrl(srcCmtDict['publisherName']));
    else:
        destCmtDict['author_url'] = saxutils.escape(gVal['blogEntryUrl']);
    #logging.info("done for author_url");
        
    if(srcCmtDict['isSubComment']):
        destCmtDict['author_IP'] = srcCmtDict['ip'];
    else:
        destCmtDict['author_IP'] = "";
    #logging.info("done for author_IP");

    # method 1:
    #epoch1000 = srcCmtDict['publishTime']
    #epoch = float(epoch1000) / 1000
    #localTime = time.localtime(epoch)
    #gmtTime = time.gmtime(epoch)
    # method 2:
    
    #s0.publishTime=1374626867596;
    #s4.publishTime=1348561288458;
    publishTimeStr = srcCmtDict['publishTime'];
    #logging.info("publishTimeStr=%s", publishTimeStr);
    publishTimeStrInt = int(publishTimeStr);
    publishTimeStrIntSec = publishTimeStrInt/1000;
    publishTimeStrIntSecStr = str(publishTimeStrIntSec);
    localTime = crifanLib.timestampToDatetime(publishTimeStrIntSecStr);
    #logging.info("localTime=%s", localTime);
    #pubTimeStr = srcCmtDict['shortPublishDateStr'] + " " + srcCmtDict['publishTimeStr'];
    #localTime = datetime.strptime(pubTimeStr, "%Y-%m-%d %H:%M:%S");
    gmtTime = crifanLib.convertLocalToGmt(localTime);
    destCmtDict['date'] = localTime.strftime("%Y-%m-%d %H:%M:%S");
    destCmtDict['date_gmt'] = gmtTime.strftime("%Y-%m-%d %H:%M:%S");
    #logging.info("done for date and date_gmt");

    # handle some speical condition
    #logging.debug("before decode, coment content:\n%s", srcCmtDict['content']);
    #cmtContent = srcCmtDict['content'].decode('unicode-escape'); # convert from \uXXXX to character
    cmtContent = srcCmtDict['content'];
    #logging.debug("after decode, coment content:\n%s", cmtContent);
    destCmtDict['content'] = cmtContent;
    #logging.info("done for content");
    
    destCmtDict['approved'] = 1;
    destCmtDict['type'] = '';
    destCmtDict['parent'] = srcCmtDict['parentCmtNum'];
    destCmtDict['user_id'] = 0;

    logging.debug("author=%s", destCmtDict['author']);
    logging.debug("author_email=%s", destCmtDict['author_email']);
    logging.debug("author_IP=%s", destCmtDict['author_IP']);
    logging.debug("author_url=%s", destCmtDict['author_url']);
    logging.debug("date=%s", destCmtDict['date']);
    logging.debug("date_gmt=%s", destCmtDict['date_gmt']);
    logging.debug("content=%s", destCmtDict['content']);
    logging.debug("parent=%s", destCmtDict['parent']);

    return destCmtDict;

#------------------------------------------------------------------------------
# fetch and parse comments 
# return the parsed dict value
def fetchAndParseComments(url, html):
    cmtRespDictList = [];
    parsedCommentsList = [];

    if(url == gVal['special']['feelingCard']['url']):
        cmtRespDictList = fetchComments_feelingCard();
        if(cmtRespDictList) :
            # got valid comments, now proess it
            for cmtDict in cmtRespDictList :
                comment = {};
                #fill all comment field
                comment = fillComments_fellingCard(comment, cmtDict);
                parsedCommentsList.append(comment);
    else:
        #extract comments if exist
        soup = htmlToSoup(html);
        cmtRespDictList = fetchComments(url, soup);
        #logging.info("cmtRespDictList=%s", cmtRespDictList);
        if(cmtRespDictList) :
            # got valid comments, now proess it
            for cmtDict in cmtRespDictList :
                comment = {};
                #fill all comment field
                comment = fillComments(comment, cmtDict);
                parsedCommentsList.append(comment);

    return parsedCommentsList;

 

【总结】

其实分析逻辑,获取评论数据,不是很难。

麻烦的是写代码,去提取评论数据,比较琐碎,是个体力活。。。


关于抓取网易163博客的FeelingCard心情随笔的完整的代码,可以去看:

BlogNetease.py

 

转载请注明:在路上 » 【记录】用Python解析网易163博客的心情随笔FeelingCard返回的DWR-REPLY数据

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

网友最新评论 (2)

  1. 无意中朋友发给我一份这样的数据,然后共了一个半小时写了一个解析,我的操作是 挨个按它的调用顺序,把代码解析成json然后直接就可以按json去解析数据了
    alan3年前 (2020-11-19)回复
  2. 这个确实是体力活,我也是用正则解析它的心情随笔的
    bkdwei5年前 (2019-02-17)回复
92 queries in 0.177 seconds, using 22.31MB memory