Simple Batch Html Validation
After struggling with writing a suitable html parser, I began to feel uncomfortable that I was solving a problem that had already been solved by someone, more than once, on the internet. So rather than giving in to "Not Invented Here" syndrome, I discovered HtmlAgility which seems a nice compromise between a lashed-together home-brew solution and an HtmlTidy root-and-branch linting.
using HtmlAgilityPack;
using System;
using System.IO;
using System.Linq;
namespace HtmlAgilityTest
{
class Program
{
static void Main(string[] args)
{
if (args.Length == 0)
return;
string directory = args[0];
if (!Directory.Exists(directory))
{
Console.WriteLine("Directory does not exist");
return;
}
foreach (string htmlFile in Directory.GetFiles(directory, "*.htm?"))
{
string logFile = Path.Combine(directory, Path.GetFileNameWithoutExtension(htmlFile) + ".txt");
if (File.Exists(logFile))
File.Delete(logFile);
HtmlDocument doc = new HtmlDocument()
{
OptionFixNestedTags = true
};
doc.Load(htmlFile);
if (doc.ParseErrors.Any())
{
using (TextWriter writer = new StreamWriter(logFile, false))
{
foreach (var error in doc.ParseErrors)
{
writer.WriteLine(string.Format("{0},{1}: {2}", error.Line, error.LinePosition, error.Reason));
}
}
}
}
}
}
}