asp.net - different view source content in browser and from httpwebrequest in c# -
i trying scrape content page: https://www.google.com/search?hl=en&biw=1920&bih=956&tbm=shop&q=xenon+12640&oq=xenon+12640&aq=f&gs_l=serp.3...3743.3743.0.3905.1.1.0.0.0.0.0.0..0.0.ekh..0.0.hq3xs7axfdu&sei=dr_mt_wom6no2awe25mtca&gbv=2
the problem experiencing opening url in browser need scrape scraping same link in code, 2 (important) pieces missing, reviews number , ratings, below price , seller info. here screenshot internal web client in c#: http://gyazo.com/908a37c7f70712fba1f82ec90a604d4d.png?1338822369
here code trying content:
public string navget(string inurl, cookiecontainer incookiecontainer, bool gzip, string proxyaddress, int proxyport,string proxyusername, string proxypassword) { try { this.currenturl = inurl; httpwebrequest webrequest = (httpwebrequest)webrequest.create(inurl); webrequest.timeout = this.timeoutsetting; webrequest.cookiecontainer = incookiecontainer; if (proxyaddress == "0" || proxyport == 0) { } else { webrequest.proxy = new webproxy(proxyaddress, proxyport); // use login credentials access proxy networkcredential networkcredential = new networkcredential(proxyusername, proxypassword); webrequest.proxy.credentials = networkcredential; } uri destination = webrequest.address; webrequest.keepalive = true; webrequest.method = "get"; webrequest.accept = "*/*"; webrequest.headers.add("accept-language", "en-us"); if (gzip) { webrequest.headers.add("accept-encoding", "gzip, deflate"); } webrequest.allowautoredirect = true; webrequest.useragent = "mozilla/4.0 (compatible; msie 6.0; windows nt 5.1; sv1; funwebproducts; .net clr 1.1.4322; .net clr 2.0.50727)"; webrequest.contenttype = "text/xml"; //webrequest.cookiecontainer.add(incookiecontainer.getcookies(destination)); try { string strsessionid = incookiecontainer.getcookies(destination)["phpsessid"].value; webrequest.headers.add("cookie", "user_ok=1;phpsessid=" + strsessionid); } catch (exception ex2) { } httpwebresponse webresponse = (httpwebresponse)webrequest.getresponse(); if (webrequest.haveresponse) { // first handle cookies foreach(cookie retcookie in webresponse.cookies) { bool cookiefound = false; foreach(cookie oldcookie in incookiecontainer.getcookies(destination)) { if (retcookie.name.equals(oldcookie.name)) { oldcookie.value = retcookie.value; cookiefound = true; } } if (!cookiefound) incookiecontainer.add(retcookie); } // read response stream responsestream = responsestream = webresponse.getresponsestream(); if (webresponse.contentencoding.tolower().contains("gzip")) { responsestream = new gzipstream(responsestream, compressionmode.decompress); } else if (webresponse.contentencoding.tolower().contains("deflate")) { responsestream = new deflatestream(responsestream, compressionmode.decompress); } streamreader stream = new streamreader(responsestream, system.text.encoding.default); string responsestring = stream.readtoend(); stream.close(); this.currenturl = webresponse.responseuri.tostring(); this.currentaddress = webrequest.address.tostring(); setviewstate(responsestring); return responsestring; } throw new exception("no response received host."); return "an error encountered"; } catch(exception ex) { //messagebox.show("navget:" + ex.message); return ex.message; } } thanks in advance help. laziale
looks happens because reviews number , ratings generated dynamically using java script (probably ajax or else). in case need analyze additional traffic takes place when page loaded in browser , find data transfered or analize javascript code see how it's generated.
Comments
Post a Comment