Google SiteMap 生成工具 (PHP & ASP) - IM286 - 重庆森林

[ 2005-06-08 08:49:04 | 作者: Admin ] 字号: | |
ASP
<%
' sitemap_gen.asp
' A simple script to automatically produce sitemaps for a webserver, in the Google Sitemap Protocol (GSP)
' by Francesco Passantino
' www.iteam5.net/francesco/sitemap
' v0.2 released 5 june 2005 (Listing a directory tree recursively improvement)
'
' BSD 2.0 license,
' http://www.opensource.org/licenses/bsd-license.php
' 收集整理:  重庆森林@im286.com

session("server")="http://www.xxx.com" '你的域名
vDir = "/blueidea"       '制作SiteMap的目录,相对目录(相对于根目录而言)

set objfso = CreateObject("Scripting.FileSystemObject")
root = Server.MapPath(vDir)

response.ContentType = "text/xml"
response.write "<?xml version='1.0' encoding='UTF-8'?>"
response.write "<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'>"

Set objFolder = objFSO.GetFolder(root)
'response.write getfilelink(objFolder.Path,objFolder.dateLastModified)
Set colFiles = objFolder.Files
For Each objFile In colFiles
  response.write getfilelink(objFile.Path,objfile.dateLastModified)
Next
ShowSubFolders(objFolder)

response.write "</urlset>"
set fso = nothing

Sub ShowSubFolders(objFolder)
  Set colFolders = objFolder.SubFolders
  For Each objSubFolder In colFolders
    if folderpermission(objSubFolder.Path) then
      response.write getfilelink(objSubFolder.Path,objSubFolder.dateLastModified)
      Set colFiles = objSubFolder.Files
      For Each objFile In colFiles
        response.write getfilelink(objFile.Path,objFile.dateLastModified)
      Next
      ShowSubFolders(objSubFolder)
    end if
  Next
End Sub

Function getfilelink(file,datafile)
  file=replace(file,root,"")
  file=replace(file,"\","/")
  If FileExtensionIsBad(file) then Exit Function
  if month(datafile)<10 then filedatem="0"
  if day(datafile)<10 then filedated="0"
  filedate=year(datafile)&"-"&filedatem&month(datafile)&"-"&filedated&day(datafile)
  getfilelink = "<url><loc>"&server.htmlencode(session("server")&vDir&file)&"</loc><lastmod>"&filedate&"</lastmod><changefreq>daily</changefreq><priority>1.0</priority></url>"
  Response.Flush
End Function

Function Folderpermission(pathName)

  '需要过滤的目录(不列在SiteMap里面)
  PathExclusion=Array("\temp","\_vti_cnf","_vti_pvt","_vti_log","cgi-bin")
  Folderpermission =True
  for each PathExcluded in PathExclusion
    if instr(ucase(pathName),ucase(PathExcluded))>0 then
      Folderpermission = False
      exit for
    end if
  next
End Function

Function FileExtensionIsBad(sFileName)
  Dim sFileExtension, bFileExtensionIsValid, sFileExt
  'modify for your file extension (http://www.googleguide.com/file_type.html)
  Extensions = Array("png","gif","jpg","jpeg","zip","pdf","ps","html","htm","asp","php","wk1","wk2","wk3","wk4","wk5","wki","wks","wku","lwp","mw","xls","ppt","doc","swf","wks","wps","wdb","wri","rtf","ans","txt")
'设置列表的文件名,扩展名不在其中的话SiteMap则不会收录该扩展名的文件

  if len(trim(sFileName)) = 0 then
    FileExtensionIsBad = true
    Exit Function
  end if

  sFileExtension = right(sFileName, len(sFileName) - instrrev(sFileName, "."))
  bFileExtensionIsValid = false  'assume extension is bad
  for each sFileExt in extensions
    if ucase(sFileExt) = ucase(sFileExtension) then
      bFileExtensionIsValid = True
      exit for
    end if
  next
  FileExtensionIsBad = not bFileExtensionIsValid
End Function
%>

PHP
<?php
header('Content-type: application/xml; charset="GB2312"',true);
?>
<!--
@author Tobias Kluge, enarion.net
@version 0.2, 2005-06-05 17:40 PT
@status working
@update Aditya Naik, so1o@so1o.net
@Licence: LGPL

editor:    MildSeven@im286.com

-->
<?php
$website = "http://my.xxxxx.com"; /* change this */
$page_root = "/usr/local/psa/home/vhosts/subdomains/my/httpdocs";    /* change this */

/* maybe change this: */
$changefreq = "weekly"; //"always", "hourly", "daily", "weekly", "monthly", "yearly" and "never".
$priority = 0.8;
/* this sets the last modification date of all pages to the current date */
$last_modification = date("Y-m-d\TH:i:s") . substr(date("O"),0,3) . ":" . substr(date("O"),3);

/* list of allowed directories */
$allow_dir[] = "web";

/* list of disallowed directories */
$disallow_dir[] = "admin";
$disallow_dir[] = "_notes";

/* list of disallowed file types */
$disallow_file[] = ".inc";
$disallow_file[] = ".old";
$disallow_file[] = ".save";
$disallow_file[] = ".txt";
$disallow_file[] = ".js";
$disallow_file[] = "~";
$disallow_file[] = ".LCK";
$disallow_file[] = ".zip";
$disallow_file[] = ".ZIP";
$disallow_file[] = ".CSV";
$disallow_file[] = ".csv";
$disallow_file[] = ".css";
$disallow_file[] = ".class";
$disallow_file[] = ".jar";
$disallow_file[] = ".mno";
$disallow_file[] = ".bak";
$disallow_file[] = ".lck";
$disallow_file[] = ".BAK";

/* simple compare function: equals */
function ar_contains($key, $array) {
 foreach ($array as $val) {
  if ($key == $val) {
    return true;
  }
 }
 return false;
}

/* better compare function: contains */
function fl_contains($key, $array) {
 foreach ($array as $val) {
  $pos = strpos($key, $val);
  if ($pos === FALSE) continue;
  return true;
 }

 return false;
}

/* this function changes a substring($old_offset) of each array element to $offset */
function changeOffset($array, $old_offset, $offset) {
 $res = array();
 foreach ($array as $val) {
 $res[] = str_replace($old_offset, $offset, $val);
 }
 return $res;
}

/* this walks recursivly through all directories starting at page_root and
 adds all files that fits the filter criterias */
// taken from Lasse Dalegaard, http://php.net/opendir
function getFiles($directory, $directory_orig = "", $directory_offset="") {
 global $disallow_dir, $disallow_file, $allow_dir;

 if ($directory_orig == "") $directory_orig = $directory;

 if($dir = opendir($directory)) {
 // Create an array for all files found
 $tmp = Array();

 // Add the files
 while($file = readdir($dir)) {
   // Make sure the file exists
   if($file != "." && $file != ".." && $file[0] != '.' ) {
 // If it's a directiry, list all files within it
       //echo "point1<br>";
 if(is_dir($directory . "/" . $file)) {
         //echo "point2<br>";
   $disallowed_abs = fl_contains($directory."/".$file, $disallow_dir); // handle directories with pathes
        $disallowed = ar_contains($file, $disallow_dir); // handle directories only without pathes
        $allowed_abs = fl_contains($directory."/".$file, $allow_dir);
        $allowed = ar_contains($file, $allow_dir);
        if ($disallowed || $disallowed_abs) continue;
        if ($allowed_abs || $allowed){
 $tmp2 = changeOffset(getFiles($directory . "/" . $file, $directory_orig, $directory_offset), $directory_orig, $directory_offset);
 if(is_array($tmp2)) {
 $tmp = array_merge($tmp, $tmp2);
 }
        }
   } else { // files
        if (fl_contains($file, $disallow_file)) continue;
   array_push($tmp, str_replace($directory_orig, $directory_offset, $directory."/".$file));
   }
   }
 }

 // Finish off the function
 closedir($dir);
 return $tmp;
 }
}

$a = getFiles($page_root);

echo '<?xml version="1.0" encoding="UTF-8"?>';
?>

<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
<?
foreach ($a as $file) {
?>
 <url>
 <loc><? echo utf8_encode($website.$file); ?></loc>
 <lastmod><? echo utf8_encode(date("Y-m-d\TH:i:s", filectime($page_root.$file)). substr(date("O"),0,3) . ":" . substr(date("O"),3));?></lastmod>
 <changefreq><? echo utf8_encode($changefreq); ?></changefreq>
 <priority><? echo utf8_encode($priority); ?></priority>
 </url>
<?
}
?>
</urlset>
Share
评论Feed 评论Feed: http://www.85815.com/feed.asp?q=comment&id=285
UTF-8 Encoding 引用链接: http://www.85815.com/trackback.asp?id=285&key=
这篇日志没有评论.
发表
表情图标
[smile] [confused] [cool] [cry]
[eek] [angry] [wink] [sweat]
[lol] [stun] [razz] [redface]
[rolleyes] [sad] [yes] [no]
[heart] [star] [music] [idea]
UBB代码
转换链接
表情图标
悄悄话
用户名:   密码:   注册?
验证码 * 请输入验证码