1. import org.jsoup.Jsoup;
  2. import org.jsoup.nodes.Document;
  3. import org.jsoup.nodes.Element;
  4. import org.jsoup.select.Elements;
  5. public class GracefulHTMLParser {
  6. public static String extractData(String html) {
  7. try {
  8. // Attempt to parse the HTML
  9. Document doc = Jsoup.parse(html);
  10. // Example: Extract all paragraph text
  11. Elements paragraphs = doc.select("p");
  12. StringBuilder data = new StringBuilder();
  13. for (Element paragraph : paragraphs) {
  14. data.append(paragraph.text()).append(" ");
  15. }
  16. return data.toString().trim();
  17. } catch (Exception e) {
  18. // Handle parsing errors gracefully
  19. System.err.println("Error parsing HTML: " + e.getMessage());
  20. return "ERROR: HTML parsing failed."; // Return an error message
  21. }
  22. }
  23. public static void main(String[] args) {
  24. // Example usage
  25. String html1 = "<p>This is the first paragraph.</p><p>And this is the second.</p>";
  26. String html2 = "<p>This is some HTML.</p><script>alert('Error!');</script>"; //Example of problematic HTML
  27. String html3 = null; //Example of null HTML
  28. String data1 = extractData(html1);
  29. System.out.println("Data from html1: " + data1);
  30. String data2 = extractData(html2);
  31. System.out.println("Data from html2: " + data2);
  32. String data3 = extractData(html3);
  33. System.out.println("Data from html3: " + data3);
  34. }
  35. }

Add your comment