SiteMapElement
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using ZmjTool;
namespace ZmjConvert
{
public class SiteMapElement : CHtmlElement<SiteMapElement>
{
public static bool TryParse(string htmlstr, ref IEnumerable<SiteMapElement> eles, ref string message)
{
try
{
htmlstr = Regex.Replace(htmlstr, @"[\f\n\r\t\v]", string.Empty);
//var lhtmlstr = htmlstr.ToLower().Trim();
//if (!lhtmlstr.Contains("loc") && !(lhtmlstr.Contains("sitemap") || lhtmlstr.Contains("url"))) throw new ArgumentException("不是有效的sitemap文档");
eles = Parse(htmlstr);
message = string.Empty;
return true;
}
catch (Exception e)
{
eles = null;
message = "sitemap文档解析异常:" + e.Message;
return false;
}
}
/// <summary>
/// 获取所有的url
/// </summary>
/// <param name="rooturl"></param>
/// <param name="ele"></param>
/// <returns></returns>
public static List<Uri> GetAllUrl(List<Uri> array, Uri srcurl, IEnumerable<SiteMapElement> eles)
{
array = array ?? new List<Uri>();
foreach (SiteMapElement item in eles)
{
if (item.TagName.Equals("url")) array.AddRange(item["loc"].Select(x => new Uri(srcurl, System.Security.SecurityElement.FromString(x.InnerText).Text)));
array = GetAllUrl(array, srcurl, item.Childrens.ToArray());
}
return array;
}
/// <summary>
/// 获取所有的url
/// </summary>
/// <param name="rooturl"></param>
/// <param name="ele"></param>
/// <returns></returns>
public static List<Uri> GetAllSiteMap(List<Uri> array, Uri srcurl, IEnumerable<SiteMapElement> eles)
{
array = array ?? new List<Uri>();
foreach (SiteMapElement item in eles)
{
if (item.TagName.Equals("sitemap")) array.AddRange(item["loc"].Select(x => new Uri(srcurl, System.Security.SecurityElement.FromString(x.InnerText).Text)));
array = GetAllSiteMap(array, srcurl, item.Childrens.ToArray());
}
return array;
}
}
}