发新话题
打印

[讨论]用ASP针对星星中文网写的采集入READ8数据库

同题,只支持单本采集
现在的问题是想  用ASP 返回 UNLIX 时间戳 写入数据库


还有一个问题  是     书籍表 和 书籍章节表 最后的一个字段  MD5值是怎么来的???

知道的朋友请帮忙...


附上源码


/xinxinadmin/

jason.asp
<%
'检测URL地址
function isurl(url1)
  Set xmlHttp = CreateObject("Msxml2.XMLHTTP") '创建对象
  On Error Resume Next               '错误处理
  xmlHttp.Open "POST", url1, False '用 "POST" 方法异步打开连接
  xmlHttp.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"  '发送表单数据
  xmlHttp.Send  '向服务器发送 HTTP 请求
  If Err.Number = 0 Then   '如果成功
    isurl=url1
  Else
    response.write"<script>alert('URL地址不合法!');history.go(-1);</script>"
        response.end
  End If
  Set xmlHttp = nothing   '释放对象
end function

'================================================
'作  用:检查组件是否已经安装
'参  数:strClassString ----组件名
'返回值:True  ----已经安装
'        False ----没有安装
'================================================
Function IsObjInstalled(objName)
        On Error Resume Next
        IsObjInstalled = False
        Err = 0
        Dim testObj
        SET testObj = Server.CreateObject(objName)
        IF(0 = Err)THEN IsObjInstalled = True
        SET testObj = NOTHING
        Err = 0
End Function

'生成随机数字
function show()
dim id
id=CStr(Int((99999 * Rnd) + 1))
show=id
end function

'================================================
'作  用                :替换字符串中的远程文件为本地文件并保存远程文件
'sHTML                : 要替换的字符串
'sSavePath        : 保存文件的路径
'sExt                : 执行替换的扩展名
'================================================
Function ReplaceRemoteUrl(sHTML, sSavePath, sExt)
        Dim s_Content
        s_Content = sHTML
        IF(IsObjInstalled("Microsoft.XMLHTTP") = False)THEN
                ReplaceRemoteUrl = s_Content
                Exit Function
        END IF
       
        Dim re, RemoteFile, RemoteFileurl, SaveFileName, SaveFileType
        SET re = new RegExp
        re.IgnoreCase  = True
        re.Global = True
        re.Pattern = "((http|https|ftp|rtsp|mms):(\/\/|\\\\){1}((\w)+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(" & sExt & ")))"
        SET RemoteFile = re.Execute(s_Content)
        IF(Err<>0)THEN
                Exit Function
        END IF
        For Each RemoteFileurl in RemoteFile
                SaveFileType = Mid(RemoteFileurl, InstrRev(RemoteFileurl, ".") + 1)
                SaveFileName = sSavePath&Replace(Replace(Replace(Now(),"-",""),":","")," ","")&show()&"."&SaveFileType
                Call SaveRemoteFile(SaveFileName, RemoteFileurl)
                s_Content = Replace(s_Content,RemoteFileurl,SaveFileName)
        Next
        ReplaceRemoteUrl = s_Content
End Function

'================================================
'作  用:保存远程的文件到本地
'参  数:LocalFileName ------ 本地文件名
'                 RemoteFileUrl ------ 远程文件URL
'返回值:True  ----成功
'        False ----失败
'================================================
Sub SaveRemoteFile(s_LocalFileName,s_RemoteFileUrl)
        Dim Ads, Retrieval, GetRemoteData
        On Error Resume Next
        SET Retrieval = Server.CreateObject("Microsoft.XMLHTTP")
        With Retrieval
                .Open "Get", s_RemoteFileUrl, False, "", ""
                .Send
                GetRemoteData = .ResponseBody
        End With
        SET Retrieval = NOTHING
        SET Ads = Server.CreateObject("Adodb.Stream")
        With Ads
                .Type = 1
                .Open
                .Write GetRemoteData
                .SaveToFile Server.MapPath(s_LocalFileName), 2
                .Cancel()
                .Close()
        End With
        SET Ads=NOTHING
End Sub
'=================================================
'过程名:getHTTPPage
'作  用:获取页面内容
'参  数:url ----绝对地址
'=================================================

function getHTTPPage(url)
        on error resume next
        dim http
        set http=Server.createobject("Microsoft.XMLHTTP")
        Http.open "GET",url,0
        Http.send()
        if Http.readystate<>4 then
                exit function
        end if
        getHTTPPage=bytes2BSTR(Http.responseBody)
        set http=nothing
        if err.number<>0 then err.Clear  
end function

Function bytes2BSTR(vIn)
        dim strReturn
        dim i,ThisCharCode,NextCharCode
        strReturn = ""
        For i = 1 To LenB(vIn)
                ThisCharCode = AscB(MidB(vIn,i,1))
                If ThisCharCode < &H80 Then
                        strReturn = strReturn & Chr(ThisCharCode)
                Else
                        NextCharCode = AscB(MidB(vIn,i+1,1))
                        strReturn = strReturn & Chr(CLng(ThisCharCode) * &H100 + CInt(NextCharCode))
                        i = i + 1
                End If
        Next
        bytes2BSTR = strReturn
End Function


Function NewString(wstr,strng)
        NewString=Instr(wstr,strng)
End Function

'-------------------------------------------------
'创建多级目录,可以创建不存在的根目录
'参数:要创建的目录名称,可以是多级
'返回逻辑值,True成功,False失败
'创建目录的根目录从当前目录开始
'---------------------------------------------------
Function CreateMultiFolder(ByVal CFolder)
Dim objFSO,PhCreateFolder,CreateFolderArray,CreateFolder
Dim i,ii,CreateFolderSub,PhCreateFolderSub,BlInfo
BlInfo = False
CreateFolder = CFolder
On Error Resume Next
Set objFSO = Server.CreateObject("Scripting.FileSystemObject")
If Err Then
Err.Clear()
Exit Function
End If
CreateFolder = Replace(CreateFolder,"\","/")
If Left(CreateFolder,1)="/" Then
CreateFolder = Right(CreateFolder,Len(CreateFolder)-1)
End If
If Right(CreateFolder,1)="/" Then
CreateFolder = Left(CreateFolder,Len(CreateFolder)-1)
End If
CreateFolderArray = Split(CreateFolder,"/")
For i = 0 to UBound(CreateFolderArray)
CreateFolderSub = ""
For ii = 0 to i
CreateFolderSub = CreateFolderSub & CreateFolderArray(ii) & "/"
Next
PhCreateFolderSub = Server.MapPath(CreateFolderSub)
If Not objFSO.FolderExists(PhCreateFolderSub) Then
objFSO.CreateFolder(PhCreateFolderSub)
End If
Next
If Err Then
Err.Clear()
Else
BlInfo = True
End If
CreateMultiFolder = BlInfo
End Function

%>

xin.asp
<form id="form1" name="form1" method="post" action="xinxin.asp">
  <table width="550" border="0" align="center" cellpadding="0" cellspacing="1" bgcolor="#00CC66">
    <tr>
      <td height="25" colspan="2" align="center" bgcolor="#FFFFFF">书籍采集(本采集程序只针对'星星中文网')请输入书籍信息</td>
    </tr>
    <tr>
      <td width="118" height="25" align="right" bgcolor="#FFFFFF">小说分类:</td>
      <td width="426" height="25" bgcolor="#FFFFFF">
        <select name="type_id" id="type_id">
          <option value="1">玄幻魔法</option>
          <option value="2">武侠仙侠</option>
          <option value="3">历史军事</option>
          <option value="4">都市言情</option>
          <option value="5">网络游戏</option>
          <option value="6">推理灵异</option>
          <option value="7">散文诗词</option>
          <option value="8">科幻动漫</option>
        </select>
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">书名:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_title" type="text" id="book_title" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">录入者:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="user_name" type="text" id="user_name" value="admin" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">作者:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_author" type="text" id="book_author" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">作品简介:</td>
      <td height="25" bgcolor="#FFFFFF"><textarea name="book_intro" rows="5" id="book_intro"></textarea></td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">作品类别:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_attrib" type="text" id="book_attrib" value="公开作品" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">书名拼音首字母:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_initial" type="text" id="book_initial" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">章节总数:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_newchapterid" type="text" id="book_newchapterid" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">书籍字数:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="book_size" type="text" id="book_size" />
      </td>
    </tr>
    <tr>
      <td height="25" align="right" bgcolor="#FFFFFF">书籍采集地址:</td>
      <td height="25" bgcolor="#FFFFFF">
        <input name="url" type="text" id="url" size="50" />
      </td>
    </tr>
    <tr>
      <td height="25" colspan="2" align="center" bgcolor="#FFFFFF">
        <input type="submit" name="Submit" value="提交" />
      
        
          
        <input type="reset" name="Submit2" value="重置" />
      </td>
    </tr>
  </table>

xinxin.asp
<!--#include file="jason.asp"-->
<%   
book_title=request.Form("book_title")
user_name=request.Form("user_name")
book_author=request.Form("book_author")
type_id=request.Form("type_id")
book_intro=request.Form("book_intro")
book_attrib=request.Form("book_attrib")
book_initial=request.Form("book_initial")
book_newchapterid=request.Form("book_newchapterid")
book_size=request.Form("book_size")
url=request.Form("url")
if book_title="" or user_name="" or book_author="" or type_id="" or book_intro="" or book_attrib="" or book_initial="" or book_newchapterid="" or book_size="" or url="" then
response.Write "<script>alert('请确保各信息填写正确');history.back();</script>"
response.End()
end if
isurl(url)
strconnection  =  "dsn=mysqldsn;driver={myodbd  driver};server=localhost;uid=root;pwd=123456;database=read8"   
set  adodataconn  =  server.createobject("adodb.connection")   
adodataconn.open  strconnection  
set gb2312=adodataconn.Execute("SET NAMES 'gb2312'")  '就是这句起的作用
strquery  =  "insert into rd8_read8_book(book_title,user_name,book_author,type_id,book_actor,book_intro,book_time,book_updatetime,book_attrib,book_initial,book_newchapterid,book_size,book_state,book_recommend,book_md5) value('"&book_title&"','"&user_name&"','"&book_author&"','"&type_id&"','','"&book_intro&"','"&time()&"','"&time()&"','"&book_attrib&"','"&book_initial&"','"&book_newchapterid&"','"&book_size&"',1,0,'8b89c32fa193efe7bbe925a98ebf99c1')"   
adodataconn.execute(strquery)   
sql="select * from rd8_read8_book where book_title = '"&book_title&"' "
set rs = adodataconn.execute(sql)
response.Write rs("book_id")   
%>   
<%
Server.ScriptTimeOut=36000 '设定操作超时的时间
wstr = getHTTPPage(url) '取得页面内容
if err.number=0 then '如果获取成功
        start=newstring(wstr,"<div id=""title"">") '要获取的内容在网页中的开始位置(通过唯一的标志寻找)
        over=newstring(wstr,"<div id=""copyright"">") '要获取的内容在网页中结束位置(通过唯一的标志寻找)
        wstr=mid(wstr,start,over-start) '获取想要的内容
        wstr=replace(wstr,"</script></div>","</script></div><form action=xinxin_1.asp method=post>")
        wstr=replace(wstr,"<a href=""","<input type=checkbox name=list checked value=")
        wstr=replace(wstr,".html"">","><input type=text name=list_name value=""")
        wstr=replace(wstr,"</a>",""">")
end if
%>
<div align=center>
<%response.write wstr%><br>
<input name="list_url" type="hidden" id="list_url" value="<%=url%>">
<input name="book_id" type="hidden" id="book_id" value="<%=rs("book_id")%>">
<input name="book_title" type="hidden" id="book_title" value="<%=rs("book_title")%>">
<input name="type_id" type="hidden" id="type_id" value="<%=rs("type_id")%>">
<input name="book_author" type="hidden" id="book_author" value="<%=rs("book_author")%>">
<input class="button" type=button value="全部选定" onclick="this.value=check(this.form.list)">
<input class="button" type="submit" value=" 下一步 ">
<%
rs.close
set rs = nothing
adodataconn.close
set adodataconn = nothing
%>

xinxin_1.asp

<!--#include file="jason.asp"-->
<%
Server.ScriptTimeOut=36000 '设定操作超时的时间
'设定/获取各参数值
book_id = Request.form("book_id")
title=request.Form("book_title")
username=request.Form("book_author")
type_id=request.Form("type_id")
if book_id = "" then
response.Write "<script>alert('book_id为空,请检测源程序!');history.back();</script>"
end if

SaveFileTypeExt = "jpg|gif|bmp|png"        '需要保存的图片文件扩展名,当SaveToLocal=False是此项无效
SavePath = "/upload/"        '图片等文件保存路径,最后要带 /,当SaveToLocal=False是此项无效
list=request("list")
list=split(list,",")
list_url=request("list_url")
list_url=replace(list_url,"/index.html","")
list_name=request("list_name")
list_name=split(list_name,",")
list_pos=Ubound(list)
view_id2=request("view_id2")
set fso=server.CreateObject("Scripting.FileSystemObject")
response.Write "<div align=center><Br><span style='font-size: 16px;'>正在获取章节名称(采集站:星星中文网)......</span><Br><Br></div>"
'on error resume next

'连接数据库
strconnection  =  "dsn=mysqldsn;driver={myodbd  driver};server=localhost;uid=root;pwd=123456;database=read8"   
set  adodataconn  =  server.createobject("adodb.connection")   
adodataconn.open  strconnection  
set gb2312=adodataconn.Execute("SET NAMES 'gb2312'")  '就是这句起的作用

'创建目录
folder="../html/" & type_id & "/"&book_id&"/"
if CreateMultiFolder(folder)=true then
response.Write folder & "目录创建成功<br>"
else
response.Write folder & "目录已存在<br>"
response.End()
end if

'循环获取开始
for i = 0 to list_pos

        '获取章节URL地址
        url = ""&list_url&"/"&trim(list(i))&".html" '新闻来源的页面

        '获取内容
        wstr = getHTTPPage(url) '取得页面内容
        if err.number=0 then '如果获取成功
                start=newstring(wstr,"<div id=""content"">")+18 '要获取的内容在网页中的开始位置(通过唯一的标志寻找)
                over=newstring(wstr,"<div id=""footlink"">") '要获取的内容在网页中结束位置(通过唯一的标志寻找)
                wstr=mid(wstr,start,over-start) '获取想要的内容
                wstr=replace(wstr,"<br />","<br>")
                wstr=replace(wstr,"<div id=""adbottom""><script type=""text/javascript"" src=""/configs/article/adfoot.js""></script>","")
                wstr=replace(wstr,"</div>","<br>")
                wstr=replace(wstr,"<img src=""","<img src=""http://www.ixxw.net/")
        end if
       
        '下载保存内容中的图片
        wstr = ReplaceRemoteUrl(wstr, SavePath, SaveFileTypeExt)
       
        '返回内容长度
        chapter_words=len(wstr)
       
        strquery  =  "insert into rd8_read8_chapter(chapter_title,book_id,chapter_orderid,chapter_time,chapter_updatetime,chapter_new,chapter_words,volume_id,chapter_md5) value('"&list_name(i)&"','"&book_id&"','"&i&"','"&time()&"','"&time()&"',0,'"&chapter_words&"',1,'28be7c320937ce778120351941112e96')"   
        adodataconn.execute(strquery)
         
        '生成静态
        set fout=fso.createtextfile(Server.MapPath("/HTML/"&type_id&"/"&book_id&"/"&i&".html"))
        fout.writeline wstr

        Response.Write list_name(i)&"成功生成<br>"
        fout.close

next
set fso=nothing
response.Write "成功获取全部章节<br>"

'生成章节列表页
sql="SELECT * FROM `rd8_read8_chapter` WHERE `book_id` = '"&book_id&"' ORDER BY `chapter_orderid` ASC"
set rs = adodataconn.execute(sql)
dim content
do while not rs.eof
content=content & "<div class=""chapter""><a href="""&rs("chapter_orderid")&".html"" title="""&rs("chapter_title")&""">"&rs("chapter_title")&"</a></div>"
rs.movenext
loop
rs.close
set rs=nothing

'生成章节列表页
'读出模版
Set fso3=Server.CreateObject("Scripting.FileSystemObject")
Set htmlwrite=fso3.OpenTextFile(Server.MapPath("/xinxinadmin/index.html"))
do while not htmlwrite.atendofstream                                                                        '如果不到文件结束,一直循环
showindex=showindex & htmlwrite.readline
loop
'替换模换中内容
showindex=replace(showindex,"$title$",title)
showindex=replace(showindex,"$book_id$",book_id)
showindex=replace(showindex,"$username$",username)
showindex=replace(showindex,"$content$",content)
'删除原文件
'set fso1=createobject("scripting.filesystemobject")
'fso1.deletefile(server.MapPath("/html/"&book_id&"/1/index.html"))
'set fso1=nothing
'生成
set fso=server.CreateObject("Scripting.FileSystemObject")
set fout=fso.createtextfile(Server.MapPath("/html/"&type_id&"/"&book_id&"/index.html"))
fout.writeline showindex
fout.close
response.write("<font color='ff0000'>成功生成章节列表页</font><br>")
response.write("<font color='ff0000'>采集完成</font><br>")

adodataconn.close
set adodataconn = nothing
%>

index.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Language" content="zh-CN" />
<meta http-equiv="Widow-target" content="_top" />
<meta name="Keywords" content="$title$-小说频道" />
<meta name="Description" content="$title$-小说频道" />
<meta name="Author" content="$username$-$title$-小说频道" />
<title>$username$-$title$-小说频道-最新章节</title>
<link href="/include/book.css" rel="stylesheet" type="text/css" />
<script src="/include/js/common.js" type="text/javascript"></script>
<script src="/include/js/read.js" type="text/javascript"></script>
<script type="text/javascript"><!--//--><![CDATA[//><!--
        var click_num=0;
        var vote_num=0;
//--><!]]></script>
<script src="/bookstat.php?bid=$book_id$" type="text/javascript"></script>
</head>
<body onload="javascript:externallinks();">
<div id="page">
        <div id="topann"><iframe scrolling="no" frameborder="0" src="/data/ann/top.html"></iframe></div>
        <div id="head">
                <div id="na"><a href="">小说频道首页</a> -> <a href="/bookroom.php">总书库</a> -><a href="/bookinfo.php?bid=$book_id$" title="$title$">《$title$》</a></div>
                <div id="font_lang"><a href="javascript:changeFontLang(1)">简->繁</a></div>
                <div id="pagelink"><a href="/bookreview.php?bid=$book_id$" rel="external">评论</a> <a href="/bookstat.php?vbid=$book_id$" target="voteiframe">投票</a></div>
                <div id="fave"><a href="/bookstat.php?mbid=$book_id$&mcid=" title="$title$">收藏书签</a></div>
        </div>
        <hr />
        <div id="title" title="$title$">$title$</div>
        <div id="author">作者: <a href="/authorinfo.php?target=$username$" rel="external">$username$</a></div>
        <div id="info">点击: <script type="text/javascript">document.write(click_num);</script>  投票: <script type="text/javascript">document.write(vote_num);</script></div>
        <div id="chapterlist">
                        <div class="vol">正文</div>
                        $content$               
                        </div>
        <div id="vote"><iframe name="voteiframe"></iframe></div>
        <hr />
        <div id="footann"><iframe scrolling="no" frameborder="0" src="/data/ann/foot.html"></iframe></div>
        <div id="copyright">
                Copyright © 2006 小说频道 All Rights Reserved<br />
                本作品系网友发表,仅代表作者本人观点,与本站立场无关。<br />
                如有章节错误、排版不齐或版权疑问、作品内容有违相关法律等情况,请联系管理员。
        </div>
</div>
<script src="/include/js/lang.js" type="text/javascript"></script>
<script src="/include/js/f_lang.js" type="text/javascript"></script>
</body>
</html>



注:  只对星星中文网有效.运行文件是  xin.asp   index.html只是章节列表页的模版  (章节阅读页的模版还没做!)

TOP

对了.运行上面的程序还有一个问题..

需要安装MYODBC3.51驱动    并且在数据源里建立MYSQLDSN连接

水平很烂.写的东东也烂...有能力的朋友帮我看看要怎么加批量采集  (星星中文网http://www.ixxw.net/的小说列表页有两个变量)   

TOP

路过

TOP

晕...这都有的写

TOP

发新话题