同题,只支持单本采集
现在的问题是想 用ASP 返回 UNLIX 时间戳 写入数据库
还有一个问题 是 书籍表 和 书籍章节表 最后的一个字段 MD5值是怎么来的???
知道的朋友请帮忙...
附上源码
/xinxinadmin/
jason.asp
<%
'检测URL地址
function isurl(url1)
Set xmlHttp = CreateObject("Msxml2.XMLHTTP") '创建对象
On Error Resume Next '错误处理
xmlHttp.Open "POST", url1, False '用 "POST" 方法异步打开连接
xmlHttp.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" '发送表单数据
xmlHttp.Send '向服务器发送 HTTP 请求
If Err.Number = 0 Then '如果成功
isurl=url1
Else
response.write"<script>alert('URL地址不合法!');history.go(-1);</script>"
response.end
End If
Set xmlHttp = nothing '释放对象
end function
'================================================
'作 用:检查组件是否已经安装
'参 数:strClassString ----组件名
'返回值:True ----已经安装
' False ----没有安装
'================================================
Function IsObjInstalled(objName)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim testObj
SET testObj = Server.CreateObject(objName)
IF(0 = Err)THEN IsObjInstalled = True
SET testObj = NOTHING
Err = 0
End Function
'生成随机数字
function show()
dim id
id=CStr(Int((99999 * Rnd) + 1))
show=id
end function
'================================================
'作 用 :替换字符串中的远程文件为本地文件并保存远程文件
'sHTML : 要替换的字符串
'sSavePath : 保存文件的路径
'sExt : 执行替换的扩展名
'================================================
Function ReplaceRemoteUrl(sHTML, sSavePath, sExt)
Dim s_Content
s_Content = sHTML
IF(IsObjInstalled("Microsoft.XMLHTTP") = False)THEN
ReplaceRemoteUrl = s_Content
Exit Function
END IF
Dim re, RemoteFile, RemoteFileurl, SaveFileName, SaveFileType
SET re = new RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "((http|https|ftp|rtsp|mms):(\/\/|\\\\){1}((\w)+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(" & sExt & ")))"
SET RemoteFile = re.Execute(s_Content)
IF(Err<>0)THEN
Exit Function
END IF
For Each RemoteFileurl in RemoteFile
SaveFileType = Mid(RemoteFileurl, InstrRev(RemoteFileurl, ".") + 1)
SaveFileName = sSavePath&Replace(Replace(Replace(Now(),"-",""),":","")," ","")&show()&"."&SaveFileType
Call SaveRemoteFile(SaveFileName, RemoteFileurl)
s_Content = Replace(s_Content,RemoteFileurl,SaveFileName)
Next
ReplaceRemoteUrl = s_Content
End Function
'================================================
'作 用:保存远程的文件到本地
'参 数:LocalFileName ------ 本地文件名
' RemoteFileUrl ------ 远程文件URL
'返回值:True ----成功
' False ----失败
'================================================
Sub SaveRemoteFile(s_LocalFileName,s_RemoteFileUrl)
Dim Ads, Retrieval, GetRemoteData
On Error Resume Next
SET Retrieval = Server.CreateObject("Microsoft.XMLHTTP")
With Retrieval
.Open "Get", s_RemoteFileUrl, False, "", ""
.Send
GetRemoteData = .ResponseBody
End With
SET Retrieval = NOTHING
SET Ads = Server.CreateObject("Adodb.Stream")
With Ads
.Type = 1
.Open
.Write GetRemoteData
.SaveToFile Server.MapPath(s_LocalFileName), 2
.Cancel()
.Close()
End With
SET Ads=NOTHING
End Sub
'=================================================
'过程名:getHTTPPage
'作 用:获取页面内容
'参 数:url ----绝对地址
'=================================================
function getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject("Microsoft.XMLHTTP")
Http.open "GET",url,0
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
end function
Function bytes2BSTR(vIn)
dim strReturn
dim i,ThisCharCode,NextCharCode
strReturn = ""
For i = 1 To LenB(vIn)
ThisCharCode = AscB(MidB(vIn,i,1))
If ThisCharCode < &H80 Then
strReturn = strReturn & Chr(ThisCharCode)
Else
NextCharCode = AscB(MidB(vIn,i+1,1))
strReturn = strReturn & Chr(CLng(ThisCharCode) * &H100 + CInt(NextCharCode))
i = i + 1
End If
Next
bytes2BSTR = strReturn
End Function
Function NewString(wstr,strng)
NewString=Instr(wstr,strng)
End Function
'-------------------------------------------------
'创建多级目录,可以创建不存在的根目录
'参数:要创建的目录名称,可以是多级
'返回逻辑值,True成功,False失败
'创建目录的根目录从当前目录开始
'---------------------------------------------------
Function CreateMultiFolder(ByVal CFolder)
Dim objFSO,PhCreateFolder,CreateFolderArray,CreateFolder
Dim i,ii,CreateFolderSub,PhCreateFolderSub,BlInfo
BlInfo = False
CreateFolder = CFolder
On Error Resume Next
Set objFSO = Server.CreateObject("Scripting.FileSystemObject")
If Err Then
Err.Clear()
Exit Function
End If
CreateFolder = Replace(CreateFolder,"\","/")
If Left(CreateFolder,1)="/" Then
CreateFolder = Right(CreateFolder,Len(CreateFolder)-1)
End If
If Right(CreateFolder,1)="/" Then
CreateFolder = Left(CreateFolder,Len(CreateFolder)-1)
End If
CreateFolderArray = Split(CreateFolder,"/")
For i = 0 to UBound(CreateFolderArray)
CreateFolderSub = ""
For ii = 0 to i
CreateFolderSub = CreateFolderSub & CreateFolderArray(ii) & "/"
Next
PhCreateFolderSub = Server.MapPath(CreateFolderSub)
If Not objFSO.FolderExists(PhCreateFolderSub) Then
objFSO.CreateFolder(PhCreateFolderSub)
End If
Next
If Err Then
Err.Clear()
Else
BlInfo = True
End If
CreateMultiFolder = BlInfo
End Function
%>
xin.asp
<form id="form1" name="form1" method="post" action="xinxin.asp">
<table width="550" border="0" align="center" cellpadding="0" cellspacing="1" bgcolor="#00CC66">
<tr>
<td height="25" colspan="2" align="center" bgcolor="#FFFFFF">书籍采集(本采集程序只针对'星星中文网')请输入书籍信息</td>
</tr>
<tr>
<td width="118" height="25" align="right" bgcolor="#FFFFFF">小说分类:</td>
<td width="426" height="25" bgcolor="#FFFFFF">
<select name="type_id" id="type_id">
<option value="1">玄幻魔法</option>
<option value="2">武侠仙侠</option>
<option value="3">历史军事</option>
<option value="4">都市言情</option>
<option value="5">网络游戏</option>
<option value="6">推理灵异</option>
<option value="7">散文诗词</option>
<option value="8">科幻动漫</option>
</select>
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">书名:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_title" type="text" id="book_title" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">录入者:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="user_name" type="text" id="user_name" value="admin" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">作者:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_author" type="text" id="book_author" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">作品简介:</td>
<td height="25" bgcolor="#FFFFFF"><textarea name="book_intro" rows="5" id="book_intro"></textarea></td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">作品类别:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_attrib" type="text" id="book_attrib" value="公开作品" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">书名拼音首字母:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_initial" type="text" id="book_initial" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">章节总数:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_newchapterid" type="text" id="book_newchapterid" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">书籍字数:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="book_size" type="text" id="book_size" />
</td>
</tr>
<tr>
<td height="25" align="right" bgcolor="#FFFFFF">书籍采集地址:</td>
<td height="25" bgcolor="#FFFFFF">
<input name="url" type="text" id="url" size="50" />
</td>
</tr>
<tr>
<td height="25" colspan="2" align="center" bgcolor="#FFFFFF">
<input type="submit" name="Submit" value="提交" />
<input type="reset" name="Submit2" value="重置" />
</td>
</tr>
</table>
xinxin.asp
<!--#include file="jason.asp"-->
<%
book_title=request.Form("book_title")
user_name=request.Form("user_name")
book_author=request.Form("book_author")
type_id=request.Form("type_id")
book_intro=request.Form("book_intro")
book_attrib=request.Form("book_attrib")
book_initial=request.Form("book_initial")
book_newchapterid=request.Form("book_newchapterid")
book_size=request.Form("book_size")
url=request.Form("url")
if book_title="" or user_name="" or book_author="" or type_id="" or book_intro="" or book_attrib="" or book_initial="" or book_newchapterid="" or book_size="" or url="" then
response.Write "<script>alert('请确保各信息填写正确');history.back();</script>"
response.End()
end if
isurl(url)
strconnection = "dsn=mysqldsn;driver={myodbd driver};server=localhost;uid=root;pwd=123456;database=read8"
set adodataconn = server.createobject("adodb.connection")
adodataconn.open strconnection
set gb2312=adodataconn.Execute("SET NAMES 'gb2312'") '就是这句起的作用
strquery = "insert into rd8_read8_book(book_title,user_name,book_author,type_id,book_actor,book_intro,book_time,book_updatetime,book_attrib,book_initial,book_newchapterid,book_size,book_state,book_recommend,book_md5) value('"&book_title&"','"&user_name&"','"&book_author&"','"&type_id&"','','"&book_intro&"','"&time()&"','"&time()&"','"&book_attrib&"','"&book_initial&"','"&book_newchapterid&"','"&book_size&"',1,0,'8b89c32fa193efe7bbe925a98ebf99c1')"
adodataconn.execute(strquery)
sql="select * from rd8_read8_book where book_title = '"&book_title&"' "
set rs = adodataconn.execute(sql)
response.Write rs("book_id")
%>
<%
Server.ScriptTimeOut=36000 '设定操作超时的时间
wstr = getHTTPPage(url) '取得页面内容
if err.number=0 then '如果获取成功
start=newstring(wstr,"<div id=""title"">") '要获取的内容在网页中的开始位置(通过唯一的标志寻找)
over=newstring(wstr,"<div id=""copyright"">") '要获取的内容在网页中结束位置(通过唯一的标志寻找)
wstr=mid(wstr,start,over-start) '获取想要的内容
wstr=replace(wstr,"</script></div>","</script></div><form action=xinxin_1.asp method=post>")
wstr=replace(wstr,"<a href=""","<input type=checkbox name=list checked value=")
wstr=replace(wstr,".html"">","><input type=text name=list_name value=""")
wstr=replace(wstr,"</a>",""">")
end if
%>
<div align=center>
<%response.write wstr%><br>
<input name="list_url" type="hidden" id="list_url" value="<%=url%>">
<input name="book_id" type="hidden" id="book_id" value="<%=rs("book_id")%>">
<input name="book_title" type="hidden" id="book_title" value="<%=rs("book_title")%>">
<input name="type_id" type="hidden" id="type_id" value="<%=rs("type_id")%>">
<input name="book_author" type="hidden" id="book_author" value="<%=rs("book_author")%>">
<input class="button" type=button value="全部选定" onclick="this.value=check(this.form.list)">
<input class="button" type="submit" value=" 下一步 ">
<%
rs.close
set rs = nothing
adodataconn.close
set adodataconn = nothing
%>
xinxin_1.asp
<!--#include file="jason.asp"-->
<%
Server.ScriptTimeOut=36000 '设定操作超时的时间
'设定/获取各参数值
book_id = Request.form("book_id")
title=request.Form("book_title")
username=request.Form("book_author")
type_id=request.Form("type_id")
if book_id = "" then
response.Write "<script>alert('book_id为空,请检测源程序!');history.back();</script>"
end if
SaveFileTypeExt = "jpg|gif|bmp|png" '需要保存的图片文件扩展名,当SaveToLocal=False是此项无效
SavePath = "/upload/" '图片等文件保存路径,最后要带 /,当SaveToLocal=False是此项无效
list=request("list")
list=split(list,",")
list_url=request("list_url")
list_url=replace(list_url,"/index.html","")
list_name=request("list_name")
list_name=split(list_name,",")
list_pos=Ubound(list)
view_id2=request("view_id2")
set fso=server.CreateObject("Scripting.FileSystemObject")
response.Write "<div align=center><Br><span style='font-size: 16px;'>正在获取章节名称(采集站:星星中文网)......</span><Br><Br></div>"
'on error resume next
'连接数据库
strconnection = "dsn=mysqldsn;driver={myodbd driver};server=localhost;uid=root;pwd=123456;database=read8"
set adodataconn = server.createobject("adodb.connection")
adodataconn.open strconnection
set gb2312=adodataconn.Execute("SET NAMES 'gb2312'") '就是这句起的作用
'创建目录
folder="../html/" & type_id & "/"&book_id&"/"
if CreateMultiFolder(folder)=true then
response.Write folder & "目录创建成功<br>"
else
response.Write folder & "目录已存在<br>"
response.End()
end if
'循环获取开始
for i = 0 to list_pos
'获取章节URL地址
url = ""&list_url&"/"&trim(list(i))&".html" '新闻来源的页面
'获取内容
wstr = getHTTPPage(url) '取得页面内容
if err.number=0 then '如果获取成功
start=newstring(wstr,"<div id=""content"">")+18 '要获取的内容在网页中的开始位置(通过唯一的标志寻找)
over=newstring(wstr,"<div id=""footlink"">") '要获取的内容在网页中结束位置(通过唯一的标志寻找)
wstr=mid(wstr,start,over-start) '获取想要的内容
wstr=replace(wstr,"<br />","<br>")
wstr=replace(wstr,"<div id=""adbottom""><script type=""text/javascript"" src=""/configs/article/adfoot.js""></script>","")
wstr=replace(wstr,"</div>","<br>")
wstr=replace(wstr,"<img src=""","<img src=""http://www.ixxw.net/")
end if
'下载保存内容中的图片
wstr = ReplaceRemoteUrl(wstr, SavePath, SaveFileTypeExt)
'返回内容长度
chapter_words=len(wstr)
strquery = "insert into rd8_read8_chapter(chapter_title,book_id,chapter_orderid,chapter_time,chapter_updatetime,chapter_new,chapter_words,volume_id,chapter_md5) value('"&list_name(i)&"','"&book_id&"','"&i&"','"&time()&"','"&time()&"',0,'"&chapter_words&"',1,'28be7c320937ce778120351941112e96')"
adodataconn.execute(strquery)
'生成静态
set fout=fso.createtextfile(Server.MapPath("/HTML/"&type_id&"/"&book_id&"/"&i&".html"))
fout.writeline wstr
Response.Write list_name(i)&"成功生成<br>"
fout.close
next
set fso=nothing
response.Write "成功获取全部章节<br>"
'生成章节列表页
sql="SELECT * FROM `rd8_read8_chapter` WHERE `book_id` = '"&book_id&"' ORDER BY `chapter_orderid` ASC"
set rs = adodataconn.execute(sql)
dim content
do while not rs.eof
content=content & "<div class=""chapter""><a href="""&rs("chapter_orderid")&".html"" title="""&rs("chapter_title")&""">"&rs("chapter_title")&"</a></div>"
rs.movenext
loop
rs.close
set rs=nothing
'生成章节列表页
'读出模版
Set fso3=Server.CreateObject("Scripting.FileSystemObject")
Set htmlwrite=fso3.OpenTextFile(Server.MapPath("/xinxinadmin/index.html"))
do while not htmlwrite.atendofstream '如果不到文件结束,一直循环
showindex=showindex & htmlwrite.readline
loop
'替换模换中内容
showindex=replace(showindex,"$title$",title)
showindex=replace(showindex,"$book_id$",book_id)
showindex=replace(showindex,"$username$",username)
showindex=replace(showindex,"$content$",content)
'删除原文件
'set fso1=createobject("scripting.filesystemobject")
'fso1.deletefile(server.MapPath("/html/"&book_id&"/1/index.html"))
'set fso1=nothing
'生成
set fso=server.CreateObject("Scripting.FileSystemObject")
set fout=fso.createtextfile(Server.MapPath("/html/"&type_id&"/"&book_id&"/index.html"))
fout.writeline showindex
fout.close
response.write("<font color='ff0000'>成功生成章节列表页</font><br>")
response.write("<font color='ff0000'>采集完成</font><br>")
adodataconn.close
set adodataconn = nothing
%>
index.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Language" content="zh-CN" />
<meta http-equiv="Widow-target" content="_top" />
<meta name="Keywords" content="$title$-小说频道" />
<meta name="Description" content="$title$-小说频道" />
<meta name="Author" content="$username$-$title$-小说频道" />
<title>$username$-$title$-小说频道-最新章节</title>
<link href="/include/book.css" rel="stylesheet" type="text/css" />
<script src="/include/js/common.js" type="text/javascript"></script>
<script src="/include/js/read.js" type="text/javascript"></script>
<script type="text/javascript"><!--//--><![CDATA[//><!--
var click_num=0;
var vote_num=0;
//--><!]]></script>
<script src="/bookstat.php?bid=$book_id$" type="text/javascript"></script>
</head>
<body onload="javascript:externallinks();">
<div id="page">
<div id="topann"><iframe scrolling="no" frameborder="0" src="/data/ann/top.html"></iframe></div>
<div id="head">
<div id="na"><a href="">小说频道首页</a> -> <a href="/bookroom.php">总书库</a> -><a href="/bookinfo.php?bid=$book_id$" title="$title$">《$title$》</a></div>
<div id="font_lang"><a href="javascript:changeFontLang(1)">简->繁</a></div>
<div id="pagelink"><a href="/bookreview.php?bid=$book_id$" rel="external">评论</a> <a href="/bookstat.php?vbid=$book_id$" target="voteiframe">投票</a></div>
<div id="fave"><a href="/bookstat.php?mbid=$book_id$&mcid=" title="$title$">收藏书签</a></div>
</div>
<hr />
<div id="title" title="$title$">$title$</div>
<div id="author">作者: <a href="/authorinfo.php?target=$username$" rel="external">$username$</a></div>
<div id="info">点击: <script type="text/javascript">document.write(click_num);</script> 投票: <script type="text/javascript">document.write(vote_num);</script></div>
<div id="chapterlist">
<div class="vol">正文</div>
$content$
</div>
<div id="vote"><iframe name="voteiframe"></iframe></div>
<hr />
<div id="footann"><iframe scrolling="no" frameborder="0" src="/data/ann/foot.html"></iframe></div>
<div id="copyright">
Copyright © 2006 小说频道 All Rights Reserved<br />
本作品系网友发表,仅代表作者本人观点,与本站立场无关。<br />
如有章节错误、排版不齐或版权疑问、作品内容有违相关法律等情况,请联系管理员。
</div>
</div>
<script src="/include/js/lang.js" type="text/javascript"></script>
<script src="/include/js/f_lang.js" type="text/javascript"></script>
</body>
</html>
注: 只对星星中文网有效.运行文件是 xin.asp index.html只是章节列表页的模版 (章节阅读页的模版还没做!)