以文本方式查看主题 - Foxtable(狐表) (http://foxtable.com/bbs/index.asp) -- 专家坐堂 (http://foxtable.com/bbs/list.asp?boardid=2) ---- 求助: lsspan(0).InnerHtml这个位置有两个不同的值,阅读数与播放数,怎么写代码让这个位置的值,对应分别都识别为阅读数与播放数呢? (http://foxtable.com/bbs/dispbbs.asp?boardid=2&id=92571) |
||||
-- 作者:李孝春 -- 发布时间:2016/11/7 15:02:00 -- 求助: lsspan(0).InnerHtml这个位置有两个不同的值,阅读数与播放数,怎么写代码让这个位置的值,对应分别都识别为阅读数与播放数呢? 求助: lsspan(0).InnerHtml这个位置有两个不同的值,阅读数与播放数,怎么写代码让这个位置的值,对应分别都识别为阅读数与播放数呢? 如果是阅读数,就将该值的数据写入阅读数列 如果是播放数,就将该值的数据写入播放数列
lsspan(1).InnerHtml lsspan(2).InnerHtml的值及位置不变化。 Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts() As String = 阅读数.Split("阅读 or 播放") 这样写对不对? 【下面代码红色部分怎么更改呢?怎么判断呢?】 Dim lsspan = lis(i).GetElementsByTagName("span") str &= lsspan(0).InnerHtml & " " & lsspan(1).InnerHtml & " " & lsspan(2).InnerHtml & vbcrlf & vbcrlf Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts() As String = 阅读数.Split("阅读") Dim 播放数 As String = lsspan(0).InnerHtml Dim Parts2() As String = 播放数.Split("播放") Dim 评论数 As String = lsspan(1).InnerHtml Dim Parts1() As String = 评论数.Split("评论") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 str &= Parts(0) & vbcrlf str &= Parts1(0) & vbcrlf Dim abc As String= lsspan(2).InnerHtml Dim Parts2() As String = abc.Split(" ") str &= Parts2(0) & vbcrlf \'Output.Show(bt.innerText & " " & bt.GetAttribute("content")) dr("单位") = bt.innerText & " " & bt.GetAttribute("content") \'msgbox(bt.innerText & " " & bt.GetAttribute("content")) dr("文章标题") = a.InnerHtml dr("发布时间") = Parts2(0) & vbcrlf dr("阅读数") = Parts(0) dr("评论数") = Parts1(0) \'Dim lsa1 = lis(i).GetElementsByTagName("p") \'For Each p As object In lsa1 \'If p.GetAttribute("className") = "abstract" Then \'dr("正文") = p.InnerHtml \'End If \'Next dr("网址") = a.GetAttribute("href") End If Next Next output.Show(str) [此贴子已经被作者于2016/11/7 16:59:43编辑过]
|
||||
-- 作者:有点色 -- 发布时间:2016/11/7 18:22:00 -- 呃,这个是基础问题
If lsspan(0).InnerHtml.contains("阅读") Then
Else If lsspan(0).InnerHtml.contains("播放") Then
End If |
||||
-- 作者:李孝春 -- 发布时间:2016/11/7 20:46:00 -- 回复:(有点色)?呃,这个是基础问题?... 按照代码完善,出现下面两种错误 都不是想达到的效果 求解啊! 文章 网址 阅读数 评论数 发布时间 都要能够完整显示出来
运行效果1: ![]() ![]() 代码如下:
DataTables("头条文章").DataRows.Clear Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://toutiao.com/m6192786832/") Do Until web.ReadyState = 4 Application.DoEvents Loop Do Until web.DocumentText.contains("没有更多啦") web.Document.Window.ScrollTo(0, 0) web.Document.Window.ScrollTo(0, web.Document.Body.ScrollRectangle.Height) Application.DoEvents Loop Dim lis = web.Document.GetElementById("content-left").GetElementsByTagName("li") Dim str As String = "" For i As Integer = 0 To lis.count-1 Dim lsa = lis(i).GetElementsByTagName("a") For Each a As object In lsa Dim dr As DataRow = DataTables("头条文章").AddNew() If a.GetAttribute("className") = "title-box link" Then str &= a.InnerHtml & vbcrlf str &= a.GetAttribute("href") & vbcrlf dr("网址") = a.GetAttribute("href") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 dr("单位") = bt.innerText & " " & bt.GetAttribute("content") dr("文章标题") = a.InnerHtml Else If a.GetAttribute("className") = "y-left" Then Dim lsspan = lis(i).GetElementsByTagName("span") If lsspan(0).InnerHtml.contains("阅读") Then str &= lsspan(0).InnerHtml & " " & lsspan(1).InnerHtml & " " & lsspan(2).InnerHtml & vbcrlf & vbcrlf Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts() As String = 阅读数.Split("阅读") dr("阅读数") = Parts(0) Dim 评论数 As String = lsspan(1).InnerHtml Dim Parts1() As String = 评论数.Split("评论") dr("评论数") = Parts1(0) dr("发布时间") = lsspan(2).InnerHtml \' Else If lsspan(0).InnerHtml.contains("播放") Then str &= lsspan(0).InnerHtml & " " & lsspan(1).InnerHtml & " " & lsspan(2).InnerHtml & vbcrlf & vbcrlf Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts2() As String = 阅读数.Split("播放") dr("阅读数") = Parts2(0) output.Show(Parts2(0)) Dim 评论数 As String = lsspan(1).InnerHtml Dim Parts1() As String = 评论数.Split("评论")\' dr("评论数") = Parts1(0) dr("发布时间") = lsspan(2).InnerHtml End If End If End If End If Next Next output.Show(str) 运行效果2:
运行代码2: DataTables("头条文章").DataRows.Clear Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://toutiao.com/m6192786832/") Do Until web.ReadyState = 4 Application.DoEvents Loop Do Until web.DocumentText.contains("没有更多啦") web.Document.Window.ScrollTo(0, 0) web.Document.Window.ScrollTo(0, web.Document.Body.ScrollRectangle.Height) Application.DoEvents Loop Dim lis = web.Document.GetElementById("content-left").GetElementsByTagName("li") Dim str As String = "" For i As Integer = 0 To lis.count-1 Dim lsa = lis(i).GetElementsByTagName("a") For Each a As object In lsa If a.GetAttribute("className") = "title-box link" Then Dim dr As DataRow = DataTables("头条文章").AddNew() str &= a.InnerHtml & vbcrlf str &= a.GetAttribute("href") & vbcrlf If a.GetAttribute("className") = "y-left" Then Dim lsspan = lis(i).GetElementsByTagName("span") If lsspan(0).InnerHtml.contains("阅读") Then str &= lsspan(0).InnerHtml & " " & lsspan(1).InnerHtml & " " & lsspan(2).InnerHtml & vbcrlf & vbcrlf Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts() As String = 阅读数.Split("阅读") dr("阅读数") = Parts(0) MessageBox.Show(Parts(0)) Dim 评论数 As String = lsspan(1).InnerHtml Dim Parts1() As String = 评论数.Split("评论") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 dr("单位") = bt.innerText & " " & bt.GetAttribute("content") dr("文章标题") = a.InnerHtml dr("发布时间") = lsspan(2).InnerHtml dr("评论数") = Parts1(0) \' dr("网址") = a.GetAttribute("href") Else If lsspan(0).InnerHtml.contains("播放") Then str &= lsspan(0).InnerHtml & " " & lsspan(1).InnerHtml & " " & lsspan(2).InnerHtml & vbcrlf & vbcrlf Dim 阅读数 As String = lsspan(0).InnerHtml Dim Parts2() As String = 阅读数.Split("播放") dr("阅读数") = Parts2(0) MessageBox.Show(Parts2(0)) Dim 评论数 As String = lsspan(1).InnerHtml Dim Parts1() As String = 评论数.Split("评论") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 \'Output.Show(bt.innerText & " " & bt.GetAttribute("content")) dr("单位") = bt.innerText & " " & bt.GetAttribute("content") \'msgbox(bt.innerText & " " & bt.GetAttribute("content")) dr("文章标题") = a.InnerHtml dr("发布时间") = lsspan(2).InnerHtml dr("评论数") = Parts1(0) \'Dim lsa1 = lis(i).GetElementsByTagName("p") \'For Each p As object In lsa1 \'If p.GetAttribute("className") = "abstract" Then \'dr("正文") = p.InnerHtml \'End If \'Next \' dr("网址") = a.GetAttribute("href") End If End If End If End If Next Next output.Show(str) [此贴子已经被作者于2016/11/7 20:55:54编辑过]
|
||||
-- 作者:有点蓝 -- 发布时间:2016/11/7 21:49:00 -- 第一步,输出网页内容进行分析 DataTables("头条文章").DataRows.Clear Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://toutiao.com/m6192786832/") Do Until web.ReadyState = 4 Application.DoEvents Loop Do Until web.DocumentText.contains("没有更多啦") web.Document.Window.ScrollTo(0, 0) web.Document.Window.ScrollTo(0, web.Document.Body.ScrollRectangle.Height) Application.DoEvents Loop Output.Show(web.Document.body.Innerhtml) 可以把输出结果放到一些html编辑器中格式化方便分析,我使用的是vs的自动格式化功能,如图
[此贴子已经被作者于2016/12/7 10:11:00编辑过]
|
||||
-- 作者:李孝春 -- 发布时间:2016/11/7 21:57:00 -- 回复:(有点蓝)第一步,输出网页内容就行分析DataTa... 嗯 我是直接使用chrome浏览器的开发者工具 第二步呢?
|
||||
-- 作者:有点蓝 -- 发布时间:2016/11/7 21:57:00 -- Dim lis = web.Document.GetElementById("content-left").GetElementsByTagName("li") Dim str As String = "" For i As Integer = 0 To lis.count-1 Dim dr As DataRow = DataTables("头条文章").AddNew() Dim lsa = lis(i).GetElementsByTagName("a") |
||||
-- 作者:李孝春 -- 发布时间:2016/11/7 22:15:00 -- 回复:(有点蓝)[upload=png,1.png]UploadFile/2016-... 嗯 遍历到了LI下面的元素之后怎么判断“阅读”与“播放”呢? span会在视频图文下 有四个值 在图文下 有三个值 ![]() ![]() class="y-left" 但是结果没有达到理想
|
||||
-- 作者:有点蓝 -- 发布时间:2016/11/7 22:25:00 -- 第三步,从6楼图片结合4楼网页源代码可以看出,有图片的新闻a标签不止一个,有几个图片就多出几个a标签,标题就在第一个a标签里,所以只要获取第一个a标签即可,为了保险起见,观察标题的标签都是以css类“title-box link”设置的,所以就从这个类名称取值安全一点 代码如下 Dim lsa = lis(i).GetElementsByTagName("a") For Each a As object In lsa If a.GetAttribute("className") = "title-box link" Then str &= a.InnerHtml & vbcrlf str &= a.GetAttribute("href") & vbcrlf dr("网址") = a.GetAttribute("href") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 dr("单位") = bt.innerText & " " & bt.GetAttribute("content") dr("文章标题") = a.InnerHtml Exit For End If Next 同理,分析阅读量位于class名称为“y-left”的DIV标签里,把网页字符去掉,并分割,可以取出阅读/评论和时间3块内容,代码如下: Dim divs = lis(i).GetElementsByTagName("div") For Each div As object In divs If div.GetAttribute("className") = "y-left" Then Dim arr() As String = div.InnerText.Replace(" ","").split("?") Dim lsspan As String = arr(0) dr("阅读数") = lsspan.Replace("阅读","").Replace("播放","") dr("评论数") = arr(1).Replace("评论","") dr("发布时间") = arr(2) Exit For \' End If Next |
||||
-- 作者:有点蓝 -- 发布时间:2016/11/7 22:27:00 -- 完整代码看: \'\'\' ‘注意这里的三个单引号 DataTables("头条文章").DataRows.Clear Dim web As new System.Windows.Forms.WebBrowser web.Navigate("http://toutiao.com/m6192786832/") Do Until web.ReadyState = 4 Application.DoEvents Loop Do Until web.DocumentText.contains("没有更多啦") web.Document.Window.ScrollTo(0, 0) web.Document.Window.ScrollTo(0, web.Document.Body.ScrollRectangle.Height) Application.DoEvents Loop Dim lis = web.Document.GetElementById("content-left").GetElementsByTagName("li") Dim str As String = "" For i As Integer = 0 To lis.count-1 Dim dr As DataRow = DataTables("头条文章").AddNew() Dim lsa = lis(i).GetElementsByTagName("a") For Each a As object In lsa If a.GetAttribute("className") = "title-box link" Then str &= a.InnerHtml & vbcrlf str &= a.GetAttribute("href") & vbcrlf dr("网址") = a.GetAttribute("href") Dim bt As System.Windows.Forms.HtmlElement bt = web.Document.GetElementByID("keywords") \'今日头条单位标题 dr("单位") = bt.innerText & " " & bt.GetAttribute("content") dr("文章标题") = a.InnerHtml Exit For End If Next Dim divs = lis(i).GetElementsByTagName("div") For Each div As object In divs If div.GetAttribute("className") = "y-left" Then Dim arr() As String = div.InnerText.Replace(" ","").split("?") ’论坛无法显示这种特殊符号,所以显示成?号了,到网页源码中拷贝这个特殊符号(见下图)到代码编辑器中即可 Dim lsspan As String = arr(0) dr("阅读数") = lsspan.Replace("阅读","").Replace("播放","") dr("评论数") = arr(1).Replace("评论","") dr("发布时间") = arr(2) Exit For \' End If Next Next [此贴子已经被作者于2016/11/7 22:55:12编辑过]
|
||||
-- 作者:有点蓝 -- 发布时间:2016/11/7 22:31:00 -- 一定的编程基础知识+网页基础+一定的耐心分析,代码其实并不复杂。 以后类似的网页分析未必会再给你处理,请自己理解
|