import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MetadataTokenizer {
public static List<String> tokenizeMetadata(String metadata) {
List<String> tokens = new ArrayList<>();
// Define regular expressions for different metadata elements
String[] patterns = {
"name=(?<name>[\\w\\s]+)", // Name: captures alphanumeric characters and spaces
"description=(?<description>[\\w\\s\\.,;:'\"]+)", // Description: alphanumeric, spaces, punctuation
"author=(?<author>[\\w\\s]+)", // Author: alphanumeric and spaces
"date=(?<date>[\\d]{4}-[\\d]{2}-[\\d]{2})", // Date: YYYY-MM-DD format
"version=(?<version>[\\d.]+)", // Version: digits and dots
"tags=(?<tags>[\\w\\s,-]+)" // Tags: alphanumeric, spaces, and commas
};
// Iterate through the patterns
for (String pattern : patterns) {
Pattern regex = Pattern.compile(pattern);
Matcher matcher = regex.matcher(metadata);
while (matcher.find()) {
tokens.add(matcher.group()); // Add the matched group to the tokens
}
}
return tokens;
}
public static void main(String[] args) {
String metadata = "Name=My Application\n" +
"Description=This is a sample application, for testing.\n" +
"Author=John Doe\n" +
"Date=2023-10-27\n" +
"Version=1.2.3\n" +
"Tags=sample, testing, automation";
List<String> tokens = tokenizeMetadata(metadata);
for (String token : tokens) {
System.out.println(token);
}
}
}
Add your comment