以文本方式查看主题 - Foxtable(狐表) (http://foxtable.com/bbs/index.asp) -- 专家坐堂 (http://foxtable.com/bbs/list.asp?boardid=2) ---- [求助]能否抓取网页隐藏在源码中的文字? (http://foxtable.com/bbs/dispbbs.asp?boardid=2&id=81037) |
-- 作者:dqlgood -- 发布时间:2016/2/17 10:14:00 -- [求助]能否抓取网页隐藏在源码中的文字? 我抓取东方财富“千股千评”网页表格数据,网址:http://data.eastmoney.com/stockcomment/ 应用代码 Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://data.eastmoney.com/stockcomment/") Do Until
web.ReadyState
= 4 Application.DoEvents Loop Dim pg = web.Document.GetElementById("gopage") pg.SetAttribute("Value",
"1") For Each a As object In web.Document.GetElementById("PageCont").GetElementsByTagName("a") If a.GetAttribute("ClassName")
= "btn_link" Then a.InvokeMember("click") Exit For End If Next Do Until
web.Document.GetElementById("PageCont").GetElementsByTagName("span")(0).InnerText = 1 Application.DoEvents Loop Dim trs = web.Document.GetElementById("dt_1").GetElementsByTagName("tr") For i As Integer = 1 To trs.count - 1 Dim tds = trs(i).GetElementsByTagName("td") For j As Integer = 0 To tds.count - 1 output.show(tds(j).innerText) Next Next 如愿获得表格数据,但检查网页源码时发现有一部分文字隐藏在源码中不在网页中显示出来,如图: 我想获取如图,以下文字中的红色文字部分,能实现吗?
600000,浦发银行,1,停牌,-,-,-,6.95,18.15,46.34","600004,白云机场,1,走势形态良好‚建议介入。,12.69,2.84%,0.46,11.75,12.59,15.54","600005,武钢股份,1,横向整理‚不跟为主。,2.81,3.69%,0.43,-,2.79,25.69","600006,东风汽车,1,继续震荡格局‚不入为宜。,6.05,4.31%,1.26,27.33,5.98,16.37","600007,中国国贸,1,强势特征明显‚跟进为主。,14.49,3.06%,0.28,23.63,14.44,11.42","600008,首创股份,1,震荡盘整态势‚观望。,7.25,4.32%,0.73,53.78,7.18,15.36","600009,上海机场,1,仍有上行空间‚近期加仓。………… |
-- 作者:大红袍 -- 发布时间:2016/2/17 10:16:00 -- 不可以 |
-- 作者:大红袍 -- 发布时间:2016/2/17 10:39:00 -- 要换一种方法处理,红色代码自己变通
Dim url As String = "http://data.eastmoney.com/stockcomment/" Dim rqst As System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create(url) Dim rsps As System.Net.HttpWebResponse = rqst.GetResponse Dim stm As System.IO.Stream = rsps.GetResponseStream() Dim reader As New System.IO.StreamReader(stm, EnCoding.Default) Dim str As String = reader.ReadToEnd rsps.Close Dim s1 As String = "defjson:{data:" Dim ary() As String = str.split(",") |
-- 作者:dqlgood -- 发布时间:2016/2/17 11:07:00 -- 这种方法能换页吗? 谢谢!
|
-- 作者:大红袍 -- 发布时间:2016/2/17 11:15:00 -- 呃,不能...... |
-- 作者:大红袍 -- 发布时间:2016/2/17 11:20:00 -- 换一种方法
Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://data.eastmoney.com/stockcomment/") Do Until web.ReadyState = 4 Dim sr As new IO.StreamReader(web.DocumentStream, Encoding.GetEncoding( web.Document.Encoding)) Dim s1 As String = "defjson:{data:" Dim ary() As String = str.split(",")
|