2007-04-18 14:50:58| 分类: Visual Basic | 标签: |举报 |字号大中小 订阅
Dim X As New MSXML2.XMLHTTP
Dim Y As New MSXML2.XMLHTTP
Dim i As Integer
Dim body
Dim strBegin
Dim strEnd
Dim strOther, strChange
Dim struse
Dim j As Integer
Dim url
Dim Art_ID
Dim Art_Content
Function GetInfo(body, strBegin, strEnd, strMsg)
Dim m, n, struse
m = InStr(body, strBegin)
n = InStr(body, strEnd)
'MsgBox m & " " & n
'MsgBox strMsg
GetInfo = Mid(body, m, n - m)
GetInfo = Replace(GetInfo, strBegin, "")
GetInfo = Replace(GetInfo, "<BR>", vbCrLf)
End Function
Function GetInfo2(body, strBegin, strEnd, strOther)
Dim m, n, m2, n2, struse
m = InStr(body, strBegin)
n = InStr(body, strEnd)
'MsgBox m & " " & n
GetInfo2 = Mid(body, m, n - m)
GetInfo2 = Replace(GetInfo2, strBegin, "")
GetInfo2 = Replace(GetInfo2, strOther, "")
GetInfo2 = Replace(GetInfo2, vbCrLf, "")
GetInfo2 = Replace(GetInfo2, Chr(9), "")
GetInfo2 = Replace(GetInfo2, "<BR>", vbCrLf)
End Function
Function GetInfo3(body, strBegin, strEnd)
Dim m, n, struse
m = InStr(j, body, strBegin)
n = InStr(j, body, strEnd)
j = n + Len(strEnd)
GetInfo3 = Mid(body, m, n - m)
GetInfo3 = Replace(GetInfo3, strBegin, "")
End Function
'未找到尾部字符/字符串时候的处理函数
Function GetInfo4(body, strBegin, strEnd, strOther)
Dim m, n, m2, n2, struse
m = InStr(body, strBegin)
n = InStr(body, strEnd)
If n < 1 Then
strEnd = "人以上</TD></TR>"
n = InStr(body, strEnd)
End If
'MsgBox m & " " & n
GetInfo4 = Mid(body, m, n - m)
GetInfo4 = Replace(GetInfo4, strBegin, "")
GetInfo4 = Replace(GetInfo4, strOther, "")
End Function
Public Function StrTotal(body, struse) As Integer
Dim count
Dim pos1, pos2
count = 0
pos2 = 1
Do
If StrTotal Then
pos1 = InStr(pos2, body, struse, vbBinaryCompare)
Else
pos1 = InStr(pos2, body, struse, vbTextCompare)
End If
If pos1 > 0 Then
count = count + 1
pos2 = pos1 + 1
End If
Loop Until pos1 < 1
StrTotal = count
End Function
Private Sub CID_KeyPress(KeyAscii As Integer)
If (KeyAscii < vbKey0 Or KeyAscii > vbKey9) Then KeyAscii = 1
End Sub
Private Sub Command1_Click()
If Trim(pageStart.Text) = 0 Or Trim(pageNum.Text) = 0 Or Trim(CID.Text) = 0 Then
MsgBox "警告:请正确输入页码和类别!", vbOKOnly + vbInformation, "信息"
Exit Sub
Else
If Int(pageNum.Text) < Int(pageStart.Text) Then
MsgBox "警告:结束页不能小于开始页!", vbOKOnly + vbInformation, "信息"
Exit Sub
End If
End If
Command1.Caption = "正在分析数据..."
Command1.Enabled = False
CID.Enabled = False
pageStart.Enabled = False
pageNum.Enabled = False
Dim conn As New ADODB.Connection
Dim rs As New ADODB.Recordset
Dim rs2 As New ADODB.Recordset
'使用数据源来连接数据库
conn.open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + App.Path & "\data\article.mdb" + ";Persist Security Info=False"
rs.open "select * from Info", conn, adOpenDynamic, adLockPessimistic
Dim Index
Index = CInt(CInt(pageNum.Text) - CInt(pageStart.Text) + 1)
ccrpProgressBar2.Min = 1
ccrpProgressBar2.Max = Index
ccrpProgressBar2.Visible = True
Dim gindex
gindex = 1
For gi = pageStart.Text To pageNum.Text
Dim ClassID
ClassID = CID.Text
'定义抓取的对象
url = "http://www.sellgreat.com/index.htm?module=article&action=article_list&type_id=" & ClassID & "&custom_id=" & ClassID & "&page="
url = url & gi
X.open "get", url, False
X.send
If StrConv(X.responseBody, vbUnicode) <> "" Then
Dim strInfo, e, f
Dim totalArticle
Dim urlSource, urlSource2
Dim articleID
Dim articleUrl
urlSource = StrConv(X.responseBody, vbUnicode)
totalArticle = StrTotal(StrConv(X.responseBody, vbUnicode), ".html"" target=""_blank"">")
ccrpProgressBar1.Min = 1
ccrpProgressBar1.Max = totalArticle
ccrpProgressBar1.Visible = True
j = 1
For i = 1 To totalArticle
'取详细页ID
e = "link=html%2Farticle%2F"
f = ".html"" target=""_blank"
articleID = GetInfo3(urlSource, e, f)
articleID = Replace(articleID, "%2F", "/")
Art_ID = articleID
articleUrl = "http://www.sellgreat.com/html/article/" & articleID & ".html"
Y.open "get", articleUrl, False
Y.send
urlSource2 = StrConv(Y.responseBody, vbUnicode)
'标题
e = "<TITLE>"
f = "</TITLE>"
Art_Title = GetInfo(urlSource2, e, f, "名称")
'内容
e = "<font article_content"">"
f = "</font></div>"
Art_Content = GetInfo(urlSource2, e, f, "内容")
'判断是否重复
Set rs2 = conn.Execute("select Art_ID from Info where Art_ID='" & Art_ID & "'")
If rs2.EOF Then
'开始入库
rs.AddNew
rs.fields(1) = Art_Title
rs.fields(2) = Art_Content
rs.fields(3) = Art_ID
rs.fields(4) = ClassID
rs.Update
End If
'进度条
ccrpProgressBar1.Value = i
Next
ccrpProgressBar1.Visible = False
ccrpProgressBar1.Value = 1
End If
'进度条
ccrpProgressBar2.Value = gindex
gindex = CInt(CInt(gindex) + 1)
Next
ccrpProgressBar2.Visible = False
ccrpProgressBar2.Value = 1
Label2.Caption = "信息存取成功!"
Command1.Caption = "开始"
Command1.Enabled = True
CID.Enabled = True
pageStart.Enabled = True
pageNum.Enabled = True
rs.Close
rs2.Close
Set rs = Nothing
Set rs2 = Nothing
conn.Close
Set conn = Nothing
End Sub
Private Sub Command2_Click()
End
End Sub
Private Sub Form_Load()
ccrpProgressBar1.Visible = False
ccrpProgressBar2.Visible = False
End Sub
Private Sub pageNum_KeyPress(KeyAscii As Integer)
If (KeyAscii < vbKey0 Or KeyAscii > vbKey9) Then KeyAscii = 1
End Sub
Private Sub pageStart_KeyPress(KeyAscii As Integer)
If (KeyAscii < vbKey0 Or KeyAscii > vbKey9) Then KeyAscii = 1
End Sub
评论