1. function tokenizeLogStream(logStream, regexPatterns) {
  2. const tokens = [];
  3. for (const line of logStream.split('\n')) {
  4. for (const pattern of regexPatterns) {
  5. const match = line.match(pattern);
  6. if (match) {
  7. tokens.push({
  8. type: pattern.source, // Store the regex pattern as token type
  9. value: match[0],
  10. // Add more fields as needed (e.g., captured groups)
  11. });
  12. }
  13. }
  14. }
  15. return tokens;
  16. }
  17. // Example usage:
  18. // Define regex patterns for tokenization. Important to compile these for performance
  19. const regexPatterns = [
  20. { source: /\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]/g }, // Date/time
  21. { source: /ERROR: (.*)/g }, // Error messages
  22. { source: /WARN: (.*)/g }, // Warning messages
  23. { source: /INFO: (.*)/g }, // Info messages
  24. { source: /(\w+)=(\w+)/g } // Key-value pairs
  25. ];
  26. // Simulate a log stream (replace with your actual log data)
  27. const logStream = `
  28. 2023-10-27 10:00:00 [INFO] Application started
  29. 2023-10-27 10:00:05 ERROR: Failed to connect to database
  30. user=john
  31. password=secret
  32. 2023-10-27 10:00:10 [WARN] Low disk space
  33. `;
  34. const tokens = tokenizeLogStream(logStream, regexPatterns);
  35. console.log(JSON.stringify(tokens, null, 2)); // Output tokens as JSON

Add your comment