C#个人常用标准库 > SiteMapTool


一个对sitemap网站地图的序列化工具


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ZmjTool
{
    public class SiteMapTool : XmlTool
    {
        /// <summary>
        /// 有的网站的sitemap是一行一行的,不是xml格式的。。
        /// http://www.ljhonggan.com/sitemap.xml
        /// </summary>
        /// <param name="strs"></param>
        /// <returns></returns>
        public static bool TryGetUrl(Uri srcurl, string strs, out string[] urls)
        {
            List<string> lus = new List<string>();
            var us = strs.Replace("\r", string.Empty).Split('\n');
            foreach (var item in us)
            {
                try
                {
                    Uri ui;
                    var src = System.Security.SecurityElement.FromString(item)?.Text;
                    if (src == null) continue;
                    if (src.Contains(srcurl.Host)) ui = new Uri(src.StartsWith("http") ? src : $"https://{src}");
                    else ui = new Uri(srcurl, src);
                    lus.Add(ui.AbsoluteUri);
                }
                catch (UriFormatException) { continue; }
            }
            urls = lus.ToArray();
            return true;
        }
        /// <summary>
        /// 获取所有的url
        /// </summary>
        /// <param name="rooturl"></param>
        /// <param name="ele"></param>
        /// <returns></returns>
        public static List<string> GetAllUrl(List<string> array, Uri srcurl, System.Security.SecurityElement ele)
        {
            array = array ?? new List<string>();
            if (ele.Children == null) return array;
            foreach (System.Security.SecurityElement item in ele.Children)
            {
                if (item.Tag.ToLower().Equals("url")) array.AddRange(GetElements(item, "loc").Select(x => HttpTool.TryGetAbsoluteUrl(x.Text, srcurl)?.AbsoluteUri ?? string.Empty).Where(x => x != string.Empty));
                array = GetAllUrl(array, srcur, ele);
            }
            return array;
        }

        /// <summary>
        /// 获取所有的url
        /// </summary>
        /// <param name="rooturl"></param>
        /// <param name="ele"></param>
        /// <returns></returns>
        public static List<string> GetAllSiteMap(List<string> array, Uri srcurl, System.Security.SecurityElement ele)
        {
            array = array ?? new List<string>();
            if (ele.Children == null) return array;
            foreach (System.Security.SecurityElement item in ele.Children)
            {
                if (item.Tag.ToLower().Equals("sitemap")) array.AddRange(GetElements(item, "loc").Select(x => HttpTool.TryGetAbsoluteUrl(x.Text, srcurl)?.AbsoluteUri ?? string.Empty).Where(x => x != string.Empty));
                array = GetAllSiteMap(array, srcurl, ele);
            }
            return array;
        }
    }
}