import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GracefulHTMLParser {
public static String extractData(String html) {
try {
// Attempt to parse the HTML
Document doc = Jsoup.parse(html);
// Example: Extract all paragraph text
Elements paragraphs = doc.select("p");
StringBuilder data = new StringBuilder();
for (Element paragraph : paragraphs) {
data.append(paragraph.text()).append(" ");
}
return data.toString().trim();
} catch (Exception e) {
// Handle parsing errors gracefully
System.err.println("Error parsing HTML: " + e.getMessage());
return "ERROR: HTML parsing failed."; // Return an error message
}
}
public static void main(String[] args) {
// Example usage
String html1 = "<p>This is the first paragraph.</p><p>And this is the second.</p>";
String html2 = "<p>This is some HTML.</p><script>alert('Error!');</script>"; //Example of problematic HTML
String html3 = null; //Example of null HTML
String data1 = extractData(html1);
System.out.println("Data from html1: " + data1);
String data2 = extractData(html2);
System.out.println("Data from html2: " + data2);
String data3 = extractData(html3);
System.out.println("Data from html3: " + data3);
}
}
Add your comment