一个对sitemap网站地图的序列化工具
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ZmjTool
{
public class SiteMapTool : XmlTool
{
/// <summary>
/// 有的网站的sitemap是一行一行的,不是xml格式的。。
/// http://www.ljhonggan.com/sitemap.xml
/// </summary>
/// <param name="strs"></param>
/// <returns></returns>
public static bool TryGetUrl(Uri srcurl, string strs, out string[] urls)
{
List<string> lus = new List<string>();
var us = strs.Replace("\r", string.Empty).Split('\n');
foreach (var item in us)
{
try
{
Uri ui;
var src = System.Security.SecurityElement.FromString(item)?.Text;
if (src == null) continue;
if (src.Contains(srcurl.Host)) ui = new Uri(src.StartsWith("http") ? src : $"https://{src}");
else ui = new Uri(srcurl, src);
lus.Add(ui.AbsoluteUri);
}
catch (UriFormatException) { continue; }
}
urls = lus.ToArray();
return true;
}
/// <summary>
/// 获取所有的url
/// </summary>
/// <param name="rooturl"></param>
/// <param name="ele"></param>
/// <returns></returns>
public static List<string> GetAllUrl(List<string> array, Uri srcurl, System.Security.SecurityElement ele)
{
array = array ?? new List<string>();
if (ele.Children == null) return array;
foreach (System.Security.SecurityElement item in ele.Children)
{
if (item.Tag.ToLower().Equals("url")) array.AddRange(GetElements(item, "loc").Select(x => HttpTool.TryGetAbsoluteUrl(x.Text, srcurl)?.AbsoluteUri ?? string.Empty).Where(x => x != string.Empty));
array = GetAllUrl(array, srcur, ele);
}
return array;
}
/// <summary>
/// 获取所有的url
/// </summary>
/// <param name="rooturl"></param>
/// <param name="ele"></param>
/// <returns></returns>
public static List<string> GetAllSiteMap(List<string> array, Uri srcurl, System.Security.SecurityElement ele)
{
array = array ?? new List<string>();
if (ele.Children == null) return array;
foreach (System.Security.SecurityElement item in ele.Children)
{
if (item.Tag.ToLower().Equals("sitemap")) array.AddRange(GetElements(item, "loc").Select(x => HttpTool.TryGetAbsoluteUrl(x.Text, srcurl)?.AbsoluteUri ?? string.Empty).Where(x => x != string.Empty));
array = GetAllSiteMap(array, srcurl, ele);
}
return array;
}
}
}