import argparse
import re
from datetime import datetime
def tokenize_date(date_string):
"""
Tokenizes a date string into year, month, and day components.
Handles various date formats.
"""
# Define regular expressions for common date formats
patterns = [
r"(\d{4})-(\d{2})-(\d{2})", # YYYY-MM-DD
r"(\d{2})/(\d{2})/(\d{4})", # MM/DD/YYYY
r"(\d{2})-(\d{2})-(\d{4})", # MM-DD-YYYY
r"(\d{4})/(\d{2})/(\d{2})", # YYYY/MM/DD
r"(\d{2} months? ago)", #e.g., 2 months ago
r"(\d+) months? ago", #e.g., 2 months ago
r"today",
r"yesterday",
r"tomorrow"
]
for pattern in patterns:
match = re.search(pattern, date_string, re.IGNORECASE)
if match:
try:
if pattern == r"(\d{4})-(\d{2})-(\d{2})":
year, month, day = map(int, match.groups())
return year, month, day
elif pattern == r"(\d{2})/(\d{2})/(\d{4})":
month, day, year = map(int, match.groups())
return year, month, day
elif pattern == r"(\d{2})-(\d{2})-(\d{4})":
month, day, year = map(int, match.groups())
return year, month, day
elif pattern == r"(\d{4})/(\d{2})/(\d{2})":
year, month, day = map(int, match.groups())
return year, month, day
elif pattern == r"(\d{2} months? ago)":
num_months = int(match.group(1))
today = datetime.now()
past_date = today - datetime.timedelta(days=num_months * 30) #Approximate
return past_date.year, past_date.month, past_date.day
elif pattern == r"(\d+) months? ago":
num_months = int(match.group(1))
today = datetime.now()
past_date = today - datetime.timedelta(days=num_months * 30) #Approximate
return past_date.year, past_date.month, past_date.day
elif pattern == r"today":
return datetime.now().year, datetime.now().month, datetime.now().day
elif pattern == r"yesterday":
yesterday = datetime.now() - datetime.timedelta(days=1)
return yesterday.year, yesterday.month, yesterday.day
elif pattern == r"tomorrow":
tomorrow = datetime.now() + datetime.timedelta(days=1)
return tomorrow.year, tomorrow.month, tomorrow.day
except ValueError:
return None # Handle cases where conversion to int fails
return None # No match found
def main():
"""
CLI interface for date tokenization.
"""
parser = argparse.ArgumentParser(description="Tokenize date strings.")
parser.add_argument("date_string", help="The date string to tokenize.")
args = parser.parse_args()
token = tokenize_date(args.date_string)
if token:
print(f"Year: {token[0]}, Month: {token[1]}, Day: {token[2]}")
else:
print("Invalid date string.")
if __name__ == "__main__":
main()
Add your comment