#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Location-Centric JobPosting Optimizer (v4: SPEC SHEET & SMART TAGS)
===================================================================

This version introduces a professional "Spec Sheet" table layout and
intelligent hashtag generation.

NEW ABILITIES:
1. SPEC SHEET BODY: A clean HTML data table (Role, Pay, Location, Perks).
   - Looks official and high-trust.
   - Zebra-striped for readability.
2. SMART TAGS: Auto-generates SEO hashtags (#CityJobs #Role) at the footer.
3. REMOVED HR: Transitions are handled by spacing only.
4. DEEP SCAN PRESERVED: Still hunts for "Tips", "Bonuses" in the body text.

Usage:
python3 10-16-improver-v4.py -i input.ndjson -o output.ndjson
"""

import argparse
import json
import random
import re
import logging
import locale
import hashlib
from tqdm import tqdm
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, timezone

# ==========================
# Locale initialization
# ==========================
try:
    locale.setlocale(locale.LC_ALL, "")
except locale.Error:
    try:
        locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
    except locale.Error:
        locale.setlocale(locale.LC_ALL, "C.UTF-8")

# ==========================
# Constants & Defaults
# ==========================
MAX_TITLE_LEN_BASE = 68 
DESIRED_DEFAULT_FALLBACK_LOGO_URL = "https://bgcareers.us.com/logo.png"
DEFAULT_CURRENCY = "USD"

DEFAULT_FULL_TIME_HOURS_PER_WEEK = 40
DEFAULT_PART_TIME_HOURS_PER_WEEK = 20 
WEEKS_PER_YEAR = 52
MONTHS_PER_YEAR = 12

SALARY_ADJUSTMENT_FACTORS = [1.0, 0.95, 1.05]
WEIGHTED_SALARY_ADJUSTMENT_FACTORS = [1.0, 1.0] + SALARY_ADJUSTMENT_FACTORS

SALARY_UNIT_MAP = {
    'HOUR': 'hr', 'HOURLY': 'hr', 'DAY': 'day', 'DAILY': 'day',
    'WEEK': 'wk', 'WEEKLY': 'wk', 'MONTH': 'mo', 'MONTHLY': 'mo',
    'YEAR': 'yr', 'ANNUAL': 'yr', 'ANNUALLY': 'yr', 'PROJECT': 'project'
}
NORMALIZED_SALARY_UNITS = {
    "HOUR": "HOUR", "HOURLY": "HOUR", "DAY": "PROJECT", "DAILY": "PROJECT",
    "WEEK": "WEEK", "WEEKLY": "WEEK", "MONTH": "MONTH", "MONTHLY": "MONTH",
    "YEAR": "YEAR", "ANNUAL": "YEAR", "ANNUALLY": "YEAR", "PROJECT": "PROJECT"
}

EXPERIENCE_LEVEL_MONTHS = {
    "Entry-Level": (0, 12),
    "Mid-Level": (13, 60),
    "Senior-Level": (61, float('inf'))
}
EXPERIENCE_LEVEL_KEYWORDS = {
    "Entry-Level": ["entry level", "no experience", "graduate", "junior", "trainee", "intern"],
    "Mid-Level": ["mid level", "intermediate", "associate", "experienced"],
    "Senior-Level": ["senior", "lead", "principal", "expert", "staff level", "manager"]
}

RECENT_POST_DAYS = 2
CLOSING_SOON_DAYS = 7

# ==========================
# INTELLIGENCE MAPS
# ==========================

ROLE_CONTEXT_MAP = {
    "warehouse": {"hook": "Immediate Start", "synonym": "Warehouse Associate", "vibe": "active", "tag": "#Warehouse"},
    "driver": {"hook": "Earn Cash Daily", "synonym": "Driver Opportunity", "vibe": "gig", "tag": "#Driver"},
    "delivery": {"hook": "Flexible Routes", "synonym": "Courier", "vibe": "gig", "tag": "#Delivery"},
    "nurse": {"hook": "Flexible Shifts", "synonym": "Nursing Role", "vibe": "care", "tag": "#Healthcare"},
    "retail": {"hook": "Store Discounts", "synonym": "Retail Associate", "vibe": "customer", "tag": "#Retail"},
    "sales": {"hook": "High Commission", "synonym": "Sales Representative", "vibe": "money", "tag": "#Sales"},
    "server": {"hook": "Tips + Hourly", "synonym": "Hospitality Role", "vibe": "customer", "tag": "#Hospitality"},
    "customer service": {"hook": "Remote Options", "synonym": "Support Specialist", "vibe": "office", "tag": "#CustomerService"},
    "security": {"hook": "Weekly Pay", "synonym": "Security Officer", "vibe": "guard", "tag": "#Security"}
}

HIDDEN_PERKS_MAP = {
    r"\bdaily pay\b": "Daily Pay Avail",
    r"\bweekly pay\b": "Weekly Pay",
    r"\bsign[- ]?on bonus\b": "Sign-On Bonus",
    r"\btips\b": "Tips + Base",
    r"\btraining provided\b": "Paid Training",
    r"\bflexible schedule\b": "Flexible Sched",
    r"\bimmediate start\b": "Immediate Start",
    r"\bno experience\b": "No Exp Req"
}

HIRING_SYNS = ["Hiring", "Start ASAP", "Now Hiring", "Apply Now", "Urgent", "Open Now"]
ENTRY_SYNS = ["Entry Level", "No Experience", "Junior", "Training", "Student"]
URGENCY_TAGS_NEW_TITLE = ["New", "Just Posted", "Fresh"]
URGENCY_TAGS_CLOSING_TITLE = ["Closing", "Urgent", "Last Chance"]

# ==================================================
# TITLE TEMPLATES (Visual Interrupts)
# ==================================================
GEO_FOCUSED_TITLE_TEMPLATES = {
    "FULL_TIME": [
        "{city} Full-Time: {role}",
        "{role} (FT) - {city}",
        "[Full-Time] {role} - {city}, {state}",
        "{city} Career: {role} (Full-Time)",
        "Hiring: {role} (Full-Time) in {city}",
        "{role} - {company} ({city})",
        "{city} Opening: {role} (FT)"
    ],
    "PART_TIME": [
        "[Part-Time] {role} - {city}",
        "({city}) Part-Time: {role}",
        "[{urgency_tag}] {role} - {city} (PT)",
        "Start ASAP: {role} ({city})",
        "Flexible Hours: {role} - {city}",
        "Student Friendly: {role} ({city})",
        "Weekend Shift: {role} - {city}",
        "{salary_compact} {role} - {city} (PT)",
        "Earn Extra Cash: {role} ({city})",
        "{city} Needs: {role} (Part-Time)",
        "Urgent Hire in {city}: {role}",
        "{city} Job: {role} (No Exp Req*)",
        "{role} needed in {city} (Apply Now)",
        "Hiring in {city}: {role} (Part-Time)",
        "{role} (Part-Time) @ {company} - {city}"
    ],
    "FLEXIBLE": [
        "[Flex] {role} - {city}",
        "{role} (Gig) - Earn in {city}",
        "Make Your Schedule: {role} ({city})",
        "{city} Side Hustle: {role}",
        "({city}) {role} - Contract/Gig",
        "Driver/Gig: {role} in {city}",
        "Start Today: {role} ({city})"
    ]
}

# ==========================
# Argparse
# ==========================

def parse_args():
    p = argparse.ArgumentParser(description="Optimize Local JobPosting NDJSON (v4: Spec Sheet).")
    p.add_argument('-i','--input', default='all-schemas.ndjson', help='Input NDJSON file')
    p.add_argument('-o','--output', default='schema.ndjson', help='Output NDJSON file')
    p.add_argument('--seed', type=int, default=None, help='Seed for reproducible randomness')
    p.add_argument('--logo_cdn', default=DESIRED_DEFAULT_FALLBACK_LOGO_URL, help='Default fallback logo URL')
    p.add_argument('--currency', default=DEFAULT_CURRENCY, help='Default currency')
    p.add_argument('-v','--verbose', action='store_true', help='Enable debug logging')
    p.add_argument('--full_time_hours', type=int, default=DEFAULT_FULL_TIME_HOURS_PER_WEEK)
    p.add_argument('--part_time_hours', type=int, default=DEFAULT_PART_TIME_HOURS_PER_WEEK)
    p.add_argument('--enable_salary_adjustment', action='store_true')
    
    title_group = p.add_mutually_exclusive_group()
    title_group.add_argument('--no-change-in-title', action='store_true')
    title_group.add_argument('--slightly-improve-title', action='store_true')
    
    return p.parse_args()

# ==========================
# Utilities
# ==========================

def title_case(s: str) -> str:
    if not s: return ""
    words = [w.capitalize() if not (w.isupper() and len(w) > 1) else w for w in s.split()]
    return " ".join(words)

def to_dhaka_offset(dt: str) -> str:
    if not dt: return ""
    try:
        parsed_dt = datetime.fromisoformat(dt.replace('Z', '+00:00'))
        dhaka_tz = timezone(timedelta(hours=6))
        return parsed_dt.astimezone(dhaka_tz).isoformat()
    except ValueError: return dt

def to_midnight(dt_str: str) -> str:
    if not dt_str: return ""
    try:
        dt_obj = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
        target_tz = dt_obj.tzinfo or timezone(timedelta(hours=6))
        dt_obj_midnight = dt_obj.astimezone(target_tz).replace(hour=23, minute=59, second=59, microsecond=0)
        return dt_obj_midnight.isoformat()
    except (IndexError, ValueError): return dt_str

def normalize_url(url: str) -> str:
    if not url: return ""
    url = url.split('?',1)[0].split('#',1)[0]
    if url.startswith('//'): url = 'https://' + url[2:]
    elif url.startswith('http://'): url = 'https://' + url[7:]
    elif not url.startswith('https://'): url = 'https://' + url.lstrip('/')
    if not re.search(r'\.\w{2,5}(?:/)?$', url.split('/')[-1]) and not url.endswith('/'):
        url += '/'
    return url

def get_currency_symbol(currency_code: str) -> str:
    symbols = {"USD":"$", "EUR":"€", "GBP":"£", "JPY":"¥", "CAD":"CA$", "AUD":"A$", "INR":"₹", "BDT":"৳"}
    return symbols.get(str(currency_code).upper(), "$")

# ==========================
# Intelligence helpers
# ==========================

def get_primary_skill(skills_value) -> str:
    if not skills_value: return ""
    processed = ""
    if isinstance(skills_value, str):
        processed = skills_value
    elif isinstance(skills_value, list):
        string_skills = []
        for item in skills_value:
            if isinstance(item, str) and item.strip():
                string_skills.append(item.strip())
        processed = ", ".join(string_skills)
    else:
        processed = str(skills_value).strip()
    if not processed: return ""
    try:
        return next((s.strip() for s in re.split(r'[,;/]', processed) if s.strip()), "")
    except TypeError: return ""

def get_location_details(rec: dict) -> tuple[str, str, str]:
    job_loc_data = rec.get('jobLocation')
    city, state, country = "", "", "US"
    if isinstance(job_loc_data, dict):
        address_data = job_loc_data.get('address')
        if isinstance(address_data, dict):
            city = address_data.get('addressLocality', '')
            state = address_data.get('addressRegion', '')
            country = address_data.get('addressCountry', country)
    elif isinstance(job_loc_data, list) and job_loc_data:
        first_loc = job_loc_data[0]
        if isinstance(first_loc, dict):
            address_data = first_loc.get('address')
            if isinstance(address_data, dict):
                city = address_data.get('addressLocality', '')
                state = address_data.get('addressRegion', '')
                country = address_data.get('addressCountry', country)
    if not city and state: city = state
    return str(city), str(state), str(country)

def get_employment_types_info(rec: dict) -> dict:
    et_input = rec.get('employmentType')
    normalized_types = []
    if isinstance(et_input, list):
        for item in et_input:
            if isinstance(item, str) and item.strip():
                norm_item = item.replace('_', '-').strip().lower()
                normalized_types.append(title_case(norm_item.replace(" time", "-Time")))
    elif isinstance(et_input, str) and et_input.strip():
        norm_item = et_input.replace('_', '-').strip().lower()
        normalized_types.append(title_case(norm_item.replace(" time", "-Time")))

    unique_types = sorted(list(set(normalized_types)))
    
    is_full_time = "Full-Time" in unique_types
    is_part_time = "Part-Time" in unique_types
    
    template_key = "PART_TIME" 
    chosen_for_title = "Part-Time"
    chosen_for_description = "Part-Time"
    
    if is_full_time and not is_part_time:
        template_key = "FULL_TIME"
        chosen_for_title = "Full-Time"
        chosen_for_description = "Full-Time"
    elif is_part_time:
        template_key = "PART_TIME"
        chosen_for_title = "Part-Time"
        chosen_for_description = "Part-Time"
    elif any(x in unique_types for x in ["Contract", "Temporary", "Seasonal", "Gig"]):
        template_key = "FLEXIBLE"
        chosen_for_title = "Flexible"
        chosen_for_description = "Flexible"
    else:
        template_key = "PART_TIME"
        chosen_for_title = "Part-Time"
        chosen_for_description = "Part-Time"

    schema_list = [t.upper().replace('-', '_') for t in unique_types] if unique_types else ["PART_TIME"]

    return {
        'title_display': chosen_for_title,
        'chosen_for_description': chosen_for_description,
        'schema_list': schema_list,
        'template_key': template_key 
    }

def get_industries_info(rec: dict) -> dict:
    industry_input = rec.get('industry')
    processed_industries = []
    if isinstance(industry_input, list):
        for item in industry_input:
            if isinstance(item, str) and item.strip():
                processed_industries.append(title_case(item.replace('&', 'and').strip()))
    elif isinstance(industry_input, str) and industry_input.strip():
        processed_industries.append(title_case(industry_input.replace('&', 'and').strip()))
    unique_industries = sorted(list(set(processed_industries)))
    if not unique_industries:
        return {'display_list': [], 'title_display': "", 'schema_list': []}
    return {
        'display_list': unique_industries,
        'title_display': random.choice(unique_industries),
        'schema_list': unique_industries
    }

def get_experience_level_info(rec: dict) -> dict:
    exp_req = rec.get("experienceRequirements", {})
    months_exp = None
    level_tag = ""
    description_tag = ""
    if isinstance(exp_req, dict):
        months_str = exp_req.get("monthsOfExperience")
        desc_str = exp_req.get("description", "").lower()
        try: months_exp = int(months_str) if months_str is not None else None
        except: pass
        
        if months_exp is not None:
            for level, (min_m, max_m) in EXPERIENCE_LEVEL_MONTHS.items():
                if min_m <= months_exp <= max_m:
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        else:
            for level, keywords in EXPERIENCE_LEVEL_KEYWORDS.items():
                if any(kw in desc_str for kw in keywords):
                    description_tag = level
                    level_tag = level.split('-')[0]
                    break
        if not level_tag and any(syn.lower() in desc_str for syn in ENTRY_SYNS):
            description_tag, level_tag = "Entry-Level", "Entry"
            
    return {"title_tag": level_tag, "description_tag": description_tag}

def get_job_urgency_tags(date_posted_str: str, valid_through_str: str, rec_id: str) -> dict:
    urgency = {"title_tag": "", "description_key": None}
    now_utc = datetime.now(timezone.utc)
    if date_posted_str:
        try:
            posted_dt = datetime.fromisoformat(date_posted_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if (now_utc - posted_dt).days <= RECENT_POST_DAYS:
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_NEW_TITLE), "description_key": "new"})
        except ValueError: pass
    if valid_through_str:
        try:
            valid_dt = datetime.fromisoformat(valid_through_str.replace('Z', '+00:00')).astimezone(timezone.utc)
            if timedelta(days=0) <= (valid_dt - now_utc) <= timedelta(days=CLOSING_SOON_DAYS):
                urgency.update({"title_tag": random.choice(URGENCY_TAGS_CLOSING_TITLE), "description_key": "closing"})
        except ValueError: pass
    return urgency

def to_k_notation(num_val: float, currency_symbol: str) -> str:
    if abs(num_val) >= 1000:
        k_val = num_val / 1000.0
        return f"{currency_symbol}{k_val:.1f}k".replace(".0k", "k")
    return f"{currency_symbol}{int(num_val)}"

def format_salary_details(rec: dict, currency_symbol: str = "$", enable_dynamic_adjustment: bool = False,
                          chosen_emp_type: str = "Full-Time", full_time_hours: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                          part_time_hours: int = DEFAULT_PART_TIME_HOURS_PER_WEEK) -> dict:
    base = rec.get('baseSalary', {})
    val_obj = base.get('value', {})
    if not isinstance(base, dict): base = {}
    if not isinstance(val_obj, dict): val_obj = {}

    minv_raw = val_obj.get('minValue', base.get('minValue'))
    maxv_raw = val_obj.get('maxValue', base.get('maxValue'))
    unit_raw = str(val_obj.get('unitText', base.get('unitText', ''))).upper()
    primary_unit_normalized = NORMALIZED_SALARY_UNITS.get(unit_raw, "PROJECT")

    def parse_salary_value(s_val):
        if s_val is None: return None
        if isinstance(s_val, (int, float)): return float(s_val)
        if isinstance(s_val, str):
            s_val_cleaned = str(s_val).replace(currency_symbol, '').replace(',', '').strip()
            if "negotiable" in s_val_cleaned.lower() or not s_val_cleaned: return "Negotiable"
            try: return float(s_val_cleaned)
            except ValueError: return None
        return None
    
    def pretty_round(val):
        if val is None: return None
        return round(val * 4) / 4

    min_val_num = parse_salary_value(minv_raw)
    max_val_num = parse_salary_value(maxv_raw)

    if min_val_num == "Negotiable" or max_val_num == "Negotiable":
        return {"primary_display": "Negotiable", "is_negotiable": True, "conversions": {}}
    if min_val_num is None and max_val_num is None:
        return {"primary_display": "", "is_negotiable": False, "conversions": {}}

    if enable_dynamic_adjustment:
        adjustment_factor = random.choice(WEIGHTED_SALARY_ADJUSTMENT_FACTORS)
        if isinstance(min_val_num, (int, float)): min_val_num *= adjustment_factor
        if isinstance(max_val_num, (int, float)): max_val_num *= adjustment_factor

    primary_value_for_conversion = None
    primary_display_val = "Error"
    
    if min_val_num is not None and max_val_num is not None:
        primary_value_for_conversion = (min_val_num + max_val_num) / 2.0
        primary_display_val = f"{to_k_notation(min_val_num, currency_symbol)}-{to_k_notation(max_val_num, currency_symbol)}"
    elif max_val_num is not None:
        primary_value_for_conversion = max_val_num
        primary_display_val = f"Up to {to_k_notation(max_val_num, currency_symbol)}"
    elif min_val_num is not None:
        primary_value_for_conversion = min_val_num
        primary_display_val = to_k_notation(min_val_num, currency_symbol)

    primary_unit_display = SALARY_UNIT_MAP.get(primary_unit_normalized, "")
    primary_salary_str = f"{primary_display_val}{'/' + primary_unit_display if primary_unit_display and primary_unit_display != 'project' else ''}"

    conversions = {}
    hours_per_week = part_time_hours if "part-time" in chosen_emp_type.lower() else full_time_hours

    if primary_value_for_conversion is not None and primary_unit_normalized != "PROJECT":
        annual_equiv = None
        if primary_unit_normalized == "HOUR":
            annual_equiv = primary_value_for_conversion * hours_per_week * WEEKS_PER_YEAR
        elif primary_unit_normalized == "WEEK":
            annual_equiv = primary_value_for_conversion * WEEKS_PER_YEAR
        elif primary_unit_normalized == "MONTH":
            annual_equiv = primary_value_for_conversion * MONTHS_PER_YEAR
        elif primary_unit_normalized == "YEAR":
            annual_equiv = primary_value_for_conversion

        if annual_equiv is not None:
            if hours_per_week > 0:
                hourly_val = (annual_equiv / WEEKS_PER_YEAR) / hours_per_week
                pretty_hourly = pretty_round(hourly_val)
                conversions["HOUR"] = f"{currency_symbol}{pretty_hourly:.2f}/hr"
            if primary_unit_normalized != "YEAR":
                conversions["YEAR"] = f"{to_k_notation(annual_equiv, currency_symbol)}/yr"
            conversions["WEEK"] = f"{to_k_notation(annual_equiv / WEEKS_PER_YEAR, currency_symbol)}/wk"

    return {
        "primary_display": primary_salary_str,
        "primary_raw_min": min_val_num,
        "primary_raw_max": max_val_num,
        "primary_unit_normalized": primary_unit_normalized,
        "is_negotiable": False,
        "conversions": conversions
    }

# ==========================
# Content assembly
# ==========================

def clean_role_and_company(original_title: str, org_name_from_ho: str) -> tuple[str, str]:
    org_name = str(org_name_from_ho or "").strip()
    role = re.sub(r'\s*\(.*?[mfvdix].*?\)\s*', '', str(original_title), flags=re.IGNORECASE).strip()
    role = re.sub(r"\s+jobs?\b", "", role, flags=re.IGNORECASE).strip()
    
    fluff = [r'\bpart[- ]?time\b', r'\bfull[- ]?time\b', r'\bhiring\b', r'\bneeded\b', r'\burgent\b', r'\bimmediate\b']
    for f in fluff:
        role = re.sub(f, '', role, flags=re.IGNORECASE)
    
    company, final_role = org_name, role

    if not company:
        preps = ["at", "for", "with"]
        for prep in preps:
            match = re.search(rf"^(.*?)\s+{re.escape(prep)}\s+([\w\s.,'&()-]+)$", role, flags=re.IGNORECASE)
            if match and 2 <= len(match.group(2).split()) <= 5:
                company, final_role = match.group(2).strip(), match.group(1).strip()
                break

    if company:
        final_role = re.sub(rf"\s*\b{re.escape(company)}\b", "", final_role, flags=re.IGNORECASE).strip(" -|,")

    if not company: company = "Local Employer"
    if not final_role: final_role = "Team Member"
    return final_role.strip(), company.strip()

def extract_hidden_perks(text_body: str) -> list:
    """Scans the raw HTML body for hidden perk keywords."""
    if not text_body: return []
    soup = BeautifulSoup(text_body, 'html.parser')
    clean_text = soup.get_text(" ", strip=True).lower()
    
    found_perks = []
    for pattern, label in HIDDEN_PERKS_MAP.items():
        if re.search(pattern, clean_text):
            found_perks.append(label)
            
    return random.sample(found_perks, min(len(found_perks), 3))

# --- NEW: SMART TAG GENERATOR ---
def generate_smart_tags(role, city, state, job_type, smart_context):
    """Creates SEO Hashtags for the footer."""
    tags = [
        f"#{title_case(city).replace(' ', '')}Jobs",
        f"#{job_type.replace('-', '')}",
    ]
    
    if smart_context and smart_context.get('tag'):
        tags.append(smart_context['tag'])
        
    # Add generic role tag
    role_tag = "#" + title_case(role).split()[0]
    if role_tag not in tags:
        tags.append(role_tag)
        
    return " ".join(tags)

def create_tabular_summary(rec: dict, primary_skill: str, salary_details: dict, job_urgency: dict, exp_level_info: dict, industries_info: dict, emp_types_info: dict) -> str:
    """Generates the SPEC SHEET (Table) body type."""
    role_for_summary, company_for_summary = clean_role_and_company(rec.get('title',''), rec.get('hiringOrganization',{}).get('name',''))
    city, state, _ = get_location_details(rec)
    
    hidden_perks = extract_hidden_perks(rec.get('description', ''))
    
    role_lower = role_for_summary.lower()
    smart_context = None
    for key, ctx in ROLE_CONTEXT_MAP.items():
        if key in role_lower:
            smart_context = ctx
            break
    
    if smart_context: role_for_summary = smart_context['synonym']

    emp_type_key = emp_types_info.get('template_key', 'PART_TIME')
    
    salary_display = salary_details.get("primary_display", "Negotiable")
    if salary_details.get("conversions") and emp_type_key == "PART_TIME" and "HOUR" in salary_details["conversions"]:
        salary_display = salary_details["conversions"]["HOUR"]

    # --- UNIQUE BODY: SPEC SHEET TABLE ---
    # Clean, alternating colors (Zebra), Bordered
    
    rows = [
        ("Position", title_case(role_for_summary)),
        ("Location", f"{city}, {state}"),
        ("Company", company_for_summary),
        ("Pay Rate", f"<strong>{salary_display}</strong>"),
        ("Job Type", emp_types_info.get('title_display')),
        ("Urgency", job_urgency.get('title_tag', 'Immediate Start')),
    ]
    
    if hidden_perks:
        rows.append(("Highlights", ", ".join(hidden_perks)))
    
    table_html = '<table style="width:100%; border-collapse:collapse; font-family:sans-serif; font-size:14px; border:1px solid #ddd; margin-bottom:15px;">'
    
    for i, (label, value) in enumerate(rows):
        bg = "#f9f9f9" if i % 2 == 0 else "#ffffff"
        table_html += f'''
        <tr style="background-color:{bg};">
            <td style="padding:10px; border-bottom:1px solid #ddd; color:#666; width:30%;"><strong>{label}</strong></td>
            <td style="padding:10px; border-bottom:1px solid #ddd; color:#333;">{value}</td>
        </tr>
        '''
    table_html += '</table>'
    
    # Add a "Job Brief" below table
    brief = f"""
    <p style="font-family:sans-serif; font-size:15px; line-height:1.5; color:#444;">
        <strong>{title_case(city)} Alert:</strong> We are currently seeking a {role_for_summary} to join our local team. 
        This is an excellent opportunity for anyone looking for {emp_types_info.get('title_display').lower()} work 
        with {salary_display} potential.
    </p>
    """
    
    # Smart Tags Footer
    tags = generate_smart_tags(role_for_summary, city, state, emp_types_info.get('title_display'), smart_context)
    footer = f'<p style="font-size:12px; color:#999; font-family:monospace;">Tags: {tags}</p>'

    return table_html + brief + footer

# ==========================
# Title building
# ==========================

def enforce_length(title: str, max_len: int) -> str:
    if len(title) <= max_len: return title.strip()
    shortened = re.sub(r'\s*\([^)]*\)\s*$', '', title).strip()
    if len(shortened) <= max_len: return shortened
    return title[:max_len-3].strip() + "..."

def generate_location_focused_title(rec: dict, primary_skill: str, salary_details: dict, job_urgency:dict,
                                    exp_level_info:dict, emp_types_info:dict, industries_info:dict,
                                    dynamic_max_len:int) -> str:
    
    ho_name = rec.get('hiringOrganization', {}).get('name', '')
    cleaned_role, company_name = clean_role_and_company(rec.get('title', rec.get('name','')), ho_name)
    city, state, _ = get_location_details(rec)

    role_lower = cleaned_role.lower()
    for key, ctx in ROLE_CONTEXT_MAP.items():
        if key in role_lower:
            cleaned_role = ctx['synonym']
            break

    salary_fmt = ""
    if salary_details and salary_details.get("conversions"):
        if "HOUR" in salary_details["conversions"]:
             salary_fmt = salary_details["conversions"]["HOUR"].split('/')[0] + "/hr"
        else:
             salary_fmt = salary_details.get("primary_display", "").split('/')[0]

    parts = {
        "role": title_case(cleaned_role),
        "company": title_case(company_name), 
        "city": title_case(city),
        "state": state.upper(),
        "job_type": emp_types_info.get('title_display', "Part-Time"),
        "hiring": random.choice(HIRING_SYNS),
        "urgency_tag": job_urgency.get('title_tag', 'Now Hiring'),
        "experience_tag": exp_level_info.get('title_tag', ''),
        "salary_compact": salary_fmt if "Negotiable" not in salary_fmt else "Competitive Pay",
        "skill1": title_case(primary_skill),
        "local_area": f"{city} Area"
    }

    emp_type_key = emp_types_info.get('template_key', 'PART_TIME')
    template_list = GEO_FOCUSED_TITLE_TEMPLATES.get(emp_type_key, GEO_FOCUSED_TITLE_TEMPLATES['PART_TIME'])
    
    attempts = 0
    tmpl = ""
    while attempts < 15:
        candidate = random.choice(template_list)
        needed = re.findall(r'\{([a-zA-Z_]+)\}', candidate)
        if "salary_compact" in needed and parts["salary_compact"] == "Competitive Pay":
            pass 
        missing = [n for n in needed if not parts.get(n)]
        if not missing:
            tmpl = candidate
            break
        attempts += 1
        
    if not tmpl: tmpl = "{city} Part-Time: {role}"

    title = tmpl.format(**parts)
    title = re.sub(r'\s{2,}', ' ', title).strip()
    title = re.sub(r'\s*([-|(),:•—])\s*', r'\1', title)
    title = title.replace('()', '').replace('[]', '').strip(" -|,: •—")
    
    return enforce_length(title, dynamic_max_len)

def generate_slight_title_improvement(original_title: str, city: str, state: str, urgency_tag: str, max_len: int) -> str:
    title = original_title.strip().strip(" -|,: •—")
    title_lower = title.lower()
    
    has_city = city and city.lower() in title_lower
    has_urgency = urgency_tag and urgency_tag.lower() in title_lower
    
    template_buckets = {
        'urgent_geo': [
            "[{urgency_tag}] {title} - {city}",
            "{title} ({city}) - {urgency_tag}",
            "({urgency_tag}) {title} in {city}"
        ],
        'geo': [
            "{title} - {city}",
            "{title} [{city}]",
            "({city}) {title}",
            "{city} Opening: {title}"
        ],
        'urgent': [
            "[{urgency_tag}] {title}",
            "{urgency_tag}: {title}",
            "{title} (Start ASAP)"
        ],
        'bracket_pop': [
             "[Part-Time] {title}",
             "{title} (PT)",
             "{title} [Flexible]"
        ]
    }

    parts = {
        "title": title,
        "city": title_case(city),
        "state": state.upper(),
        "urgency_tag": urgency_tag,
    }

    priority_order = []
    if urgency_tag and city and not has_city: priority_order.append('urgent_geo')
    if city and not has_city: priority_order.append('geo')
    if urgency_tag and not has_urgency: priority_order.append('urgent')
    priority_order.append('bracket_pop')

    for bucket_name in priority_order:
        templates = template_buckets[bucket_name]
        random.shuffle(templates)
        for tmpl in templates:
            placeholders = re.findall(r'\{([^{}]+)\}', tmpl)
            if not all(parts.get(p) for p in placeholders): continue 
            
            new_title = tmpl.format(**parts)
            if len(new_title) <= max_len:
                return enforce_length(new_title, max_len)

    return enforce_length(title, max_len)

# ==========================
# Core pipeline
# ==========================
ARGS = None

def rewrite_geotargeted_job_records(in_path: str, out_path: str, seed: int=None, logo_cdn: str = DESIRED_DEFAULT_FALLBACK_LOGO_URL,
                                    default_currency_arg: str = DEFAULT_CURRENCY, enable_salary_adj_arg: bool = False,
                                    full_time_hours_arg: int = DEFAULT_FULL_TIME_HOURS_PER_WEEK,
                                    part_time_hours_arg: int = DEFAULT_PART_TIME_HOURS_PER_WEEK,
                                    no_change_in_title_arg: bool = False,
                                    slightly_improve_title_arg: bool = False):
    
    logging.info(f"Optimizing Geo-Targeted Jobs (v4: Spec Sheet): {in_path} -> {out_path}")

    stats = {"total": 0, "title_len_sum": 0}

    try:
        with open(in_path, 'r', encoding='utf-8') as fin_check:
            num_lines = sum(1 for line in fin_check if line.strip())

        with open(in_path, 'r', encoding='utf-8') as fin, open(out_path, 'w', encoding='utf-8') as fout:
            for line in tqdm(fin, total=num_lines, desc="Generating Spec Sheets"):
                if not line.strip(): continue
                try: rec = json.loads(line)
                except json.JSONDecodeError: continue
                if not isinstance(rec, dict): continue

                jid = rec.get('@id') or rec.get('url') or hashlib.sha256(line.encode()).hexdigest()
                random.seed(hash(str(jid) + str(seed)))

                # BRANCH 1: Slight Improvement
                if slightly_improve_title_arg:
                    original_title = rec.get('title', rec.get('name', ''))
                    if not original_title:
                        fout.write(line) 
                        continue

                    city, state, _ = get_location_details(rec)
                    job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                    
                    dynamic_max_len = MAX_TITLE_LEN_BASE
                    if stats["total"] >= max(10, num_lines // 2):
                        avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                        if avg_title_len < 52: dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                        elif avg_title_len > 72: dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                    new_title = generate_slight_title_improvement(
                        original_title, city, state, 
                        job_urgency.get('title_tag', ''), 
                        dynamic_max_len
                    )
                    rec['title'] = new_title
                    stats["total"] += 1
                    fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")
                    continue 

                # BRANCH 2: Full Optimization
                primary_skill = get_primary_skill(rec.get('skills',''))
                emp_types_info = get_employment_types_info(rec) 
                industries_info = get_industries_info(rec)
                exp_level_info = get_experience_level_info(rec)

                if rec.get('datePosted'):
                    rec['datePosted'] = to_dhaka_offset(rec['datePosted'])
                if rec.get('validThrough'):
                    rec['validThrough'] = to_midnight(rec['validThrough'])

                job_urgency = get_job_urgency_tags(rec.get('datePosted'), rec.get('validThrough'), jid)
                curr_code = rec.get('baseSalary', {}).get('currency', default_currency_arg) or default_currency_arg
                curr_symbol = get_currency_symbol(curr_code)
                salary_details = format_salary_details(
                    rec, curr_symbol, enable_salary_adj_arg,
                    emp_types_info['chosen_for_description'],
                    full_time_hours_arg, part_time_hours_arg
                )

                # --- UPGRADE: TABLE BODY (SPEC SHEET) ---
                table_summary = create_tabular_summary(
                    rec, primary_skill, salary_details, job_urgency, 
                    exp_level_info, industries_info, emp_types_info
                ) 
                
                # NO HR TAGS, smooth double-break
                rec['description'] = table_summary + "<br><br>" + (rec.get('description','') or "")

                dynamic_max_len = MAX_TITLE_LEN_BASE
                if stats["total"] >= max(10, num_lines // 2):
                    avg_title_len = (stats["title_len_sum"] / max(stats["total"], 1))
                    if avg_title_len < 52: dynamic_max_len = min(84, MAX_TITLE_LEN_BASE + 10)
                    elif avg_title_len > 72: dynamic_max_len = max(60, MAX_TITLE_LEN_BASE - 5)

                if no_change_in_title_arg:
                    pass
                else:
                    rec['title'] = generate_location_focused_title(
                        rec, primary_skill, salary_details, job_urgency,
                        exp_level_info, emp_types_info, industries_info,
                        dynamic_max_len
                    )

                rec['employmentType'] = emp_types_info['schema_list'] or None
                rec['industry'] = industries_info['schema_list'] or None

                if rec.get('url'):
                    rec['url'] = normalize_url(rec['url'])
                else:
                    slug = re.sub(r'[^a-z0-9]+', '-', rec.get('title', '').lower()).strip('-')
                    rec['url'] = normalize_url(f"https://bgcareers.us.com/jobs/{slug}/")

                ho = rec.get('hiringOrganization')
                if isinstance(ho, dict):
                    rec['hiringOrganization'] = clean_role_and_company(rec.get('title',''), ho.get('name'))[1] 
                    if not ho.get('name'): ho['name'] = "Local Employer"
                    if not ho.get('logo'): ho['logo'] = logo_cdn
                    rec['hiringOrganization'] = ho
                else:
                    _, cname = clean_role_and_company(rec.get('title',''), None)
                    rec['hiringOrganization'] = {"@type":"Organization", "name":cname, "logo": logo_cdn}

                bs = rec.get('baseSalary', {})
                if isinstance(bs, dict):
                    bs.setdefault('@type','MonetaryAmount')
                    bs['currency'] = curr_code
                    v = bs.get('value', {})
                    if not isinstance(v, dict): v = {}
                    v.setdefault('@type','QuantitativeValue')
                    if salary_details.get("is_negotiable"):
                        v.update({'description': "Negotiable", 'minValue': None, 'maxValue': None, 'unitText': None})
                    elif salary_details.get("primary_raw_min") is not None:
                        v.update({
                            'minValue': str(salary_details.get("primary_raw_min")),
                            'maxValue': str(salary_details.get("primary_raw_max")),
                            'unitText': salary_details.get("primary_unit_normalized", "PROJECT").upper(),
                        })
                    bs['value'] = v
                    rec['baseSalary'] = bs

                if rec.get('baseSalary', {}).get('value', {}).get('description') is None:
                    try: del rec['baseSalary']['value']['description']
                    except KeyError: pass

                rec.update({'@context':'http://schema.org', '@type':'JobPosting'})
                
                for k in list(rec.keys()):
                    if rec[k] is None: del rec[k]

                stats["total"] += 1
                stats["title_len_sum"] += len(rec.get('title', ''))

                fout.write(json.dumps(rec, ensure_ascii=False, sort_keys=True) + "\n")

        if stats["total"]:
            avg_t = stats["title_len_sum"] / stats["total"]
            logging.info(f"Summary: avg title len={avg_t:.1f}, n={stats['total']}")

    except FileNotFoundError:
        logging.error(f"Input file '{in_path}' not found.")
    except Exception as e:
        logging.error(f"Error: {e}", exc_info=True)
    logging.info(f"Done. Output: {out_path}")

def main():
    global ARGS
    ARGS = parse_args()
    log_level = logging.DEBUG if ARGS.verbose else logging.INFO
    logging.basicConfig(level=log_level, format="%(asctime)s - %(message)s", datefmt="%H:%M:%S")
    rewrite_geotargeted_job_records(
        ARGS.input, ARGS.output, ARGS.seed, ARGS.logo_cdn, ARGS.currency,
        ARGS.enable_salary_adjustment, ARGS.full_time_hours, ARGS.part_time_hours,
        ARGS.no_change_in_title, ARGS.slightly_improve_title
    )

if __name__ == "__main__":
    main()