#!/usr/bin/env python3
"""
Modified j3.py for joblegi.xyz
Features: 
1. Automatic daily date updates (via script execution)
2. SEO Slugs (wfh.hstn.me style)
3. Advanced Global Job Schema
"""

import os
import json
import re
import datetime
import argparse
from jinja2 import Template
from bs4 import BeautifulSoup

# Define the main domain
MAIN_DOMAIN = "https://joblegi.xyz"

def slugify(title):
    """Creates a URL-friendly slug from the job title."""
    slug = title.lower()
    slug = re.sub(r'[^a-z0-9\s-]', '', slug)
    slug = re.sub(r'\s+', '-', slug).strip('-')
    return slug

def clean_visible_body(html):
    """Cleans HTML for the job description display."""
    soup = BeautifulSoup(html, "html.parser")
    for a_tag in soup.find_all('a'):
        a_tag.unwrap()
    for span in soup.find_all('span'):
        span.unwrap()
    for tag in soup.find_all(True):
        tag.attrs.pop('class', None)
    return str(soup)

# UPDATED TEMPLATE: Matching wfh.hstn.me style
template_html = r"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{{ job_title }} | Remote Job</title>
    <style>
        :root { --blue: #2563eb; --dark: #111827; --gray: #4b5563; }
        body { font-family: system-ui, -apple-system, sans-serif; background: #f9fafb; color: var(--dark); line-height: 1.6; margin: 0; }
        .nav { background: white; padding: 1rem; border-bottom: 1px solid #e5e7eb; text-align: center; font-weight: bold; font-size: 1.2rem; color: var(--blue); }
        .container { max-width: 750px; margin: 40px auto; padding: 0 20px; }
        .card { background: white; padding: 40px; border-radius: 12px; border: 1px solid #e5e7eb; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
        .company { color: var(--blue); font-weight: 700; text-transform: uppercase; font-size: 0.85rem; letter-spacing: 1px; }
        h1 { font-size: 2rem; margin: 10px 0; line-height: 1.2; }
        .meta { display: flex; gap: 15px; color: var(--gray); font-size: 0.9rem; margin-bottom: 25px; }
        .badge { background: #dbeafe; color: #1e40af; padding: 3px 10px; border-radius: 5px; font-weight: 600; font-size: 0.75rem; }
        .description { font-size: 1.05rem; border-top: 1px solid #f3f4f6; padding-top: 25px; }
        .apply-box { text-align: center; margin-top: 40px; position: sticky; bottom: 20px; }
        .apply-btn { 
            display: inline-block; background: var(--blue); color: white; padding: 16px 45px; 
            border-radius: 8px; text-decoration: none; font-weight: 700; font-size: 1.2rem;
            box-shadow: 0 10px 15px -3px rgba(37, 99, 235, 0.3);
        }
    </style>
    
    <script type="application/ld+json">
    {{ schema_json | safe }}
    </script>
</head>
<body>
    <div class="nav">Remote Jobs USA</div>
    <main class="container">
        <div class="card">
            <div class="company">{{ company_name }}</div>
            <h1>{{ job_title }}</h1>
            <div class="meta">
                <span class="badge">REMOTE</span>
                <span>📍 {{ location }}</span>
                <span>🕒 Posted: {{ date_posted }}</span>
            </div>
            <div class="description">
                {{ job_body | safe }}
            </div>
            <div class="apply-box">
                <a href="{{ apply_url }}" class="apply-btn">Apply Now</a>
            </div>
        </div>
    </main>
</body>
</html>
"""

def process_job(job_record, force_company=None):
    job_title = job_record.get("job_title", "Untitled Job")
    slug = slugify(job_title)
    
    # Filename matches slug exactly
    file_name = f"{slug}.html"
    job_full_url = f"{MAIN_DOMAIN}/job/{file_name}"
    
    raw_body = "".join(job_record.get("body_parts", []))
    visible_body = clean_visible_body(raw_body)
    
    company_name = force_company if force_company else job_record.get("company_name", "Wfh")
    
    # AUTOMATIC DATE LOGIC: Uses current date every time script runs
    date_today = datetime.date.today().isoformat()
    expiry_date = (datetime.date.today() + datetime.timedelta(days=30)).isoformat()
    
    # Global Countries List for Schema
    countries = [
        "United States", "Japan", "India", "United Kingdom", "Brazil", 
        "Australia", "Indonesia", "Germany", "Netherlands", "Sweden", 
        "Canada", "Mexico", "France", "Spain", "South Korea", "Kenya", 
        "Nigeria", "South Africa", "Singapore", "United Arab Emirates", 
        "Philippines", "Italy", "Switzerland", "Denmark", "Norway", 
        "Ireland", "Poland", "Malaysia", "Thailand", "Vietnam", 
        "Bangladesh", "Saudi Arabia", "Qatar", "Argentina", "Chile", "Colombia"
    ]

    job_schema = {
        "@context": "https://schema.org",
        "@type": "JobPosting",
        "title": job_title,
        "datePosted": date_today,
        "validThrough": expiry_date,
        "description": visible_body,
        "url": job_full_url,
        "jobLocationType": "TELECOMMUTE",
        "applicantLocationRequirements": [{"@type": "Country", "name": c} for c in countries],
        "employmentType": "FULL_TIME",
        "hiringOrganization": {
            "@type": "Organization",
            "name": company_name,
            "sameAs": MAIN_DOMAIN
        },
        "jobLocation": {"@type": "Place", "address": {"@type": "PostalAddress", "addressCountry": "US"}},
        "directApply": True
    }

    return {
        "job_title": job_title,
        "file_name": file_name,
        "job_url": job_full_url,
        "apply_url": job_record.get("apply_url", job_full_url), 
        "job_body": visible_body,
        "company_name": company_name,
        "location": "Remote",
        "date_posted": date_today,
        "schema_json": json.dumps(job_schema, indent=2)
    }

def main(ndjson_file, output_dir, force_company):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    with open(ndjson_file, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip(): continue
            try:
                job_data = process_job(json.loads(line), force_company)
                
                template = Template(template_html)
                html_content = template.render(**job_data)
                
                output_path = os.path.join(output_dir, job_data['file_name'])
                with open(output_path, "w", encoding="utf-8") as out_f:
                    out_f.write(html_content)
                print(f"Generated: {job_data['file_name']} with date {job_data['date_posted']}")
            except Exception as e:
                print(f"Error: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("ndjson_file")
    parser.add_argument("output_dir")
    parser.add_argument("--force-company", default=None)
    args = parser.parse_args()
    main(args.ndjson_file, args.output_dir, args.force_company)