1. import argparse
  2. import re
  3. from datetime import datetime
  4. def tokenize_date(date_string):
  5. """
  6. Tokenizes a date string into year, month, and day components.
  7. Handles various date formats.
  8. """
  9. # Define regular expressions for common date formats
  10. patterns = [
  11. r"(\d{4})-(\d{2})-(\d{2})", # YYYY-MM-DD
  12. r"(\d{2})/(\d{2})/(\d{4})", # MM/DD/YYYY
  13. r"(\d{2})-(\d{2})-(\d{4})", # MM-DD-YYYY
  14. r"(\d{4})/(\d{2})/(\d{2})", # YYYY/MM/DD
  15. r"(\d{2} months? ago)", #e.g., 2 months ago
  16. r"(\d+) months? ago", #e.g., 2 months ago
  17. r"today",
  18. r"yesterday",
  19. r"tomorrow"
  20. ]
  21. for pattern in patterns:
  22. match = re.search(pattern, date_string, re.IGNORECASE)
  23. if match:
  24. try:
  25. if pattern == r"(\d{4})-(\d{2})-(\d{2})":
  26. year, month, day = map(int, match.groups())
  27. return year, month, day
  28. elif pattern == r"(\d{2})/(\d{2})/(\d{4})":
  29. month, day, year = map(int, match.groups())
  30. return year, month, day
  31. elif pattern == r"(\d{2})-(\d{2})-(\d{4})":
  32. month, day, year = map(int, match.groups())
  33. return year, month, day
  34. elif pattern == r"(\d{4})/(\d{2})/(\d{2})":
  35. year, month, day = map(int, match.groups())
  36. return year, month, day
  37. elif pattern == r"(\d{2} months? ago)":
  38. num_months = int(match.group(1))
  39. today = datetime.now()
  40. past_date = today - datetime.timedelta(days=num_months * 30) #Approximate
  41. return past_date.year, past_date.month, past_date.day
  42. elif pattern == r"(\d+) months? ago":
  43. num_months = int(match.group(1))
  44. today = datetime.now()
  45. past_date = today - datetime.timedelta(days=num_months * 30) #Approximate
  46. return past_date.year, past_date.month, past_date.day
  47. elif pattern == r"today":
  48. return datetime.now().year, datetime.now().month, datetime.now().day
  49. elif pattern == r"yesterday":
  50. yesterday = datetime.now() - datetime.timedelta(days=1)
  51. return yesterday.year, yesterday.month, yesterday.day
  52. elif pattern == r"tomorrow":
  53. tomorrow = datetime.now() + datetime.timedelta(days=1)
  54. return tomorrow.year, tomorrow.month, tomorrow.day
  55. except ValueError:
  56. return None # Handle cases where conversion to int fails
  57. return None # No match found
  58. def main():
  59. """
  60. CLI interface for date tokenization.
  61. """
  62. parser = argparse.ArgumentParser(description="Tokenize date strings.")
  63. parser.add_argument("date_string", help="The date string to tokenize.")
  64. args = parser.parse_args()
  65. token = tokenize_date(args.date_string)
  66. if token:
  67. print(f"Year: {token[0]}, Month: {token[1]}, Day: {token[2]}")
  68. else:
  69. print("Invalid date string.")
  70. if __name__ == "__main__":
  71. main()

Add your comment