1. <?php
  2. /**
  3. * HTML Validator for Local Utility (Memory-Optimized)
  4. *
  5. * This script validates HTML files, focusing on common errors.
  6. * Designed for low memory usage, suitable for local use.
  7. */
  8. /**
  9. * Function to validate an HTML file.
  10. *
  11. * @param string $filename The path to the HTML file.
  12. * @return array An array of error messages. Empty array if no errors.
  13. */
  14. function validateHTML(string $filename): array
  15. {
  16. $errors = [];
  17. if (!file_exists($filename)) {
  18. $errors[] = "File not found: " . $filename;
  19. return $errors;
  20. }
  21. $html = file_get_contents($filename);
  22. if ($html === false) {
  23. $errors[] = "Error reading file: " . $filename;
  24. return $errors;
  25. }
  26. // Basic HTML structure check
  27. if (!preg_match('/^<!DOCTYPE html>/i', $html)) {
  28. $errors[] = "Missing or incorrect doctype declaration.";
  29. }
  30. // Check for basic tag syntax/closing
  31. if (strpos($html, '</html>') === false) {
  32. $errors[] = "Missing closing </html> tag.";
  33. }
  34. // Check for common errors (can be expanded)
  35. if (strpos($html, 'javascript:') !== false) {
  36. $errors[] = "Inline JavaScript detected. Consider external files.";
  37. }
  38. if (strpos($html, 'eval(') !== false) {
  39. $errors[] = "eval() detected. Security risk. Remove or reconsider.";
  40. }
  41. // More sophisticated validation (using regex - can be slow for large files)
  42. if (preg_match('/<script[^>]*type="text\/javascript"[^>]*>/i', $html)) {
  43. $errors[] = "Script tag with type='text/javascript' detected. Consider using a more modern approach.";
  44. }
  45. //Check for invalid attributes
  46. if (preg_match('/(\s+href=["\']?)\s*#([\s\S]*?)\s*(["\'])/i', $html)) {
  47. $errors[] = "Invalid anchor link usage detected.";
  48. }
  49. //check for potential XSS vulnerabilities
  50. if (preg_match('/<img src=[\'"](.*?)[\'">/i', $html)) {
  51. $errors[] = "Potential XSS vulnerability: Image source not properly sanitized.";
  52. }
  53. return $errors;
  54. }
  55. // Example usage:
  56. if (isset($_GET['file'])) {
  57. $filename = $_GET['file'];
  58. $errors = validateHTML($filename);
  59. if (!empty($errors)) {
  60. echo "<h2>HTML Validation Errors for " . htmlspecialchars($filename) . ":</h2>";
  61. echo "<ul>";
  62. foreach ($errors as $error) {
  63. echo "<li>" . htmlspecialchars($error) . "</li>";
  64. }
  65. echo "</ul>";
  66. } else {
  67. echo "<h2>HTML Validation for " . htmlspecialchars($filename) . " successful!</h2>";
  68. }
  69. } else {
  70. echo "Usage: ?file=path/to/your/html/file.html";
  71. }
  72. ?>

Add your comment