from __future__ import print_function

import argparse
import logging
import math
import os
import sys
import time

import numpy as np
import sklearn.metrics as metrics
import tensorboard_logger as tb_logger
import torch
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from tabulate import tabulate
from torchvision import datasets, transforms

from dataset.cxr import ChestXray
from calibrate.losses import (FocalLoss, LabelSmoothingLoss, SupConLoss,
                              WeightedCrossEntropyLoss)
from calibrate.util import (AverageMeter, ExponentialMovingAverage,
                            GaussianBlur, SaveFeaturesInputHook,
                            TwoCropTransform, accuracy, adjust_learning_rate,
                            save_model, set_optimizer, warmup_learning_rate)
from models.module import all_classifiers, get_imagenet_model

try:
    import apex
    from apex import amp, optimizers
except ImportError:
    pass



def parse_option():
    parser = argparse.ArgumentParser('argument for training')

    parser.add_argument('--print_freq', type=int, default=50,
                        help='print frequency')
    parser.add_argument('--save_freq', type=int, default=2,
                        help='save frequency')
    parser.add_argument('--batch_size', type=int, default=256,
                        help='batch_size')
    parser.add_argument('--num_workers', type=int, default=8,
                        help='num of workers to use')
    parser.add_argument('--epochs', type=int, default=18,
                        help='number of training epochs')

    # optimization
    parser.add_argument('--learning_rate', type=float, default=0.001,
                        help='learning rate')
    parser.add_argument('--lr_decay_epochs', type=str, default='700,800,900',
                        help='where to decay lr, can be a list')
    parser.add_argument('--lr_decay_rate', type=float, default=0.1,
                        help='decay rate for learning rate')
    parser.add_argument('--weight_decay', type=float, default=1e-4,
                        help='weight decay')
    parser.add_argument('--momentum', type=float, default=0.9,
                        help='momentum')

    # model dataset
    parser.add_argument('--model', type=str, default='resnet50')
    parser.add_argument('--loss', type=str, default='wce')
    parser.add_argument('--loss_gamma', type=float, default=0.5)
    parser.add_argument('--dataset', type=str, default='cifar10',
                        choices=['cifar10', 'cifar100', 'path', 'imagenet'], help='dataset')
    parser.add_argument('--mean', type=str,
                        help='mean of dataset in path in form of str tuple')
    parser.add_argument('--std', type=str,
                        help='std of dataset in path in form of str tuple')
    parser.add_argument('--ema_decay', type=float, default=0.99,
                        help='Decay rate for exponential moving average of model weights')
    parser.add_argument('--contrast_weight', type=float, default=0.1,
                        help='Weighting for the supervised contrast')
    parser.add_argument('--data_folder', type=str,
                        default=None, help='path to custom dataset')
    parser.add_argument('--size', type=int, default=32,
                        help='parameter for RandomResizedCrop')

    # method
    parser.add_argument('--method', type=str, default='SupCon',
                        choices=['SupCon', 'SimCLR'], help='choose method')
    
    parser.add_argument('--eval_only', action='store_true', default=False )
    parser.add_argument('--checkpoint', type=str, default='')

    # temperature
    parser.add_argument('--temp', type=float, default=0.07,
                        help='temperature for loss function')

    # other setting
    parser.add_argument('--cosine', action='store_true',
                        help='using cosine annealing')
    parser.add_argument('--syncBN', action='store_true',
                        help='using synchronized batch normalization')
    parser.add_argument('--warm', action='store_true',
                        help='warm-up for large batch training')
    parser.add_argument('--trial', type=str, default='0',
                        help='id for recording multiple runs')

    opt = parser.parse_args()

    # check if dataset is path that passed required arguments
    if opt.dataset == 'path':
        assert opt.data_folder is not None \
            and opt.mean is not None \
            and opt.std is not None

    # set the path according to the environment
    if opt.data_folder is None:
        opt.data_folder = './datasets/'
    opt.model_path = './save/SupCon/{}_models'.format(opt.dataset)
    opt.tb_path = './save/SupCon/{}_tensorboard'.format(opt.dataset)

    iterations = opt.lr_decay_epochs.split(',')
    opt.lr_decay_epochs = list([])
    for it in iterations:
        opt.lr_decay_epochs.append(int(it))

    # Set the name of the model and related parameters
    opt.model_name = '{}_{}_{}_lr_{}_decay_{}_bsz_{}_temp_{}_trial_{}_ema{}_epoch{}_cw{}'.\
        format(opt.method, opt.dataset, opt.model, opt.learning_rate,
               opt.weight_decay, opt.batch_size, opt.temp, opt.trial, opt.ema_decay, opt.epochs, opt.contrast_weight)

    if opt.cosine:
        opt.model_name = '{}_cosine'.format(opt.model_name)

    # warm-up for large-batch training,
    if opt.batch_size > 256:
        opt.warm = True
    if opt.warm:
        opt.model_name = '{}_warm'.format(opt.model_name)
        opt.warmup_from = 0.01
        opt.warm_epochs = 10
        if opt.cosine:
            eta_min = opt.learning_rate * (opt.lr_decay_rate ** 3)
            opt.warmup_to = eta_min + (opt.learning_rate - eta_min) * (
                1 + math.cos(math.pi * opt.warm_epochs / opt.epochs)) / 2
        else:
            opt.warmup_to = opt.learning_rate

    opt.tb_folder = os.path.join(opt.tb_path, opt.model_name)
    if not os.path.isdir(opt.tb_folder):
        os.makedirs(opt.tb_folder)

    opt.save_folder = os.path.join(opt.model_path, opt.model_name)
    if not os.path.isdir(opt.save_folder):
        os.makedirs(opt.save_folder)

    return opt


def set_loader(opt):
    train_transform = transforms.Compose([
        transforms.Resize(256),              # Resize the image to 256x256
        transforms.ToTensor(),                # Convert the image to a PyTorch tensor
        transforms.Normalize(mean=[0.5, 0.5, 0.5], 
                             std=[0.5, 0.5, 0.5]),  # Normalize the image
    ])

    val_transform = transforms.Compose([
        transforms.Resize(256),              # Resize the image to 256x256
        transforms.ToTensor(),                # Convert the image to a PyTorch tensor
        transforms.Normalize(mean=[0.5, 0.5, 0.5], 
                             std=[0.5, 0.5, 0.5]),  # Normalize the image
    ])

    train_dataset = ChestXray(root=opt.data_folder,
                              split_txt='train_val_list.txt',
                              transform=train_transform
                              ,labeled_file=os.path.join(opt.data_folder, 'train_val_list_labels.txt')
                              )
    
    val_dataset = ChestXray(root=opt.data_folder,
                              split_txt='test_list.txt',
                              transform=val_transform
                              ,labeled_file=os.path.join(opt.data_folder, 'test_list_labels.txt')
                              )
    train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=opt.batch_size,
        shuffle=(train_sampler is None),
        num_workers=opt.num_workers,
        pin_memory=True,
        sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=256, shuffle=False,
        num_workers=8, pin_memory=True)

    return train_loader, val_loader


def set_model(opt):
    opt.classifier = opt.model
    model = get_imagenet_model(opt, pretrained=True)
    if 'densenet' in opt.model:
        in_features = model.classifier.in_features
        model.classifier = torch.nn.Linear(in_features, 14)
    else:
        in_features = model.fc.in_features
        model.fc = torch.nn.Linear(in_features, 14)
        

   
    if opt.loss == 'wce':
        criterion = WeightedCrossEntropyLoss(opt.loss_gamma)# torch.nn.BCEWithLogitsLoss()
    elif opt.loss == 'ce':
        criterion = torch.nn.BCEWithLogitsLoss()
    elif opt.loss == 'focal':
        criterion = FocalLoss()
    else:
        NotImplementedError()

    # enable synchronized Batch Normalization
    if opt.syncBN:
        model = apex.parallel.convert_syncbn_model(model)

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model)
        model = model.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True
    if os.path.exists(opt.checkpoint):
        logging.info("Load Trained Weights")
        state_dict = opt.checkpoint
        model.load_state_dict(torch.load(state_dict)['model'])


    return model, criterion


def train(train_loader, model, criterion, optimizer, epoch, opt, ema):
    """Train the model for one epoch"""

    # Set the model to train mode
    model.train()

    # Initialize average meters to keep track of performance metrics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    # lsl = LabelSmoothingLoss(100, smoothing=0.1)

    # Record the start time for the epoch
    end = time.time()

    # Loop over the batches in the train loader
    for idx, (images, labels) in enumerate(train_loader):
        # Record the time it takes to load the data
        data_time.update(time.time() - end)

        if torch.cuda.is_available():
            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True).float()
        bsz = labels.shape[0]

        # Warm-up the learning rate
        warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer)
        
        # Compute the supervised contrastive loss
        output = model(images)
        loss = criterion(output, labels)

        # Update the performance metrics
        losses.update(loss.item(), bsz)

        # Zero out the gradients and perform SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update the EMA shadow variables
        ema.update()

        # Record the time it took to process this batch
        batch_time.update(time.time() - end)
        end = time.time()

        # Print performance information every opt.print_freq batches
        if (idx + 1) % opt.print_freq == 0:
            logging.info('Train: [{0}][{1}/{2}]\t'
                  'Learning Rate: {lr:.3f}\t'
                  'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'loss {loss.val:.3f} ({loss.avg:.3f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, idx + 1, len(train_loader), batch_time=batch_time, 
                      lr=optimizer.param_groups[0]['lr'],
                      data_time=data_time, loss=losses, top1=top1))
            sys.stdout.flush()

    # Return the average losses and accuracy for the epoch
    return losses.avg, top1.avg


def set_ema(model, decay):
    ema = ExponentialMovingAverage(model, decay)
    ema.register()
    return ema


def validate(val_loader, model, classifier, opt):
    """validation"""
    model.eval()
    classifier.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()

    y_true = []  # True labels
    y_pred = []  # Predicted probabilities

    with torch.no_grad():
        end = time.time()
        for idx, (images, labels) in enumerate(val_loader):
            images = images.float().cuda()
            labels = labels.float().cuda()
            bsz = labels.shape[0]

            # Forward pass
            output = model(images)
            loss = F.binary_cross_entropy_with_logits(output, labels)

            # update metric
            losses.update(loss.item(), bsz)

            # Store true labels and predicted probabilities
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(torch.sigmoid(output).cpu().numpy())

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if idx % opt.print_freq == 0:
                logging.info('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                          idx, len(val_loader), batch_time=batch_time,
                          loss=losses))

    # Calculate F1 score
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    y_pred_binary = np.round(y_pred)
    
    logging.info(metrics.classification_report(y_true, y_pred_binary))
    
    f1_score_micro = metrics.f1_score(y_true, y_pred_binary, average='micro')
    f1_score_macro = metrics.f1_score(y_true, y_pred_binary, average='macro')
    logging.info('F1 Score (micro): {:.3f}'.format(f1_score_micro))
    logging.info('F1 Score (macro): {:.3f}'.format(f1_score_macro))

    # Calculate ROC AUC score for each label
    n_classes = y_true.shape[1]
    auc_scores = []
    for i in range(n_classes):
        unique_classes = np.unique(y_true[:, i])
        if len(unique_classes) > 1:
            roc_auc = metrics.roc_auc_score(y_true[:, i], y_pred[:, i])
            auc_scores.append([val_loader.dataset.used_labels[i], roc_auc])


    # Calculate mean AUC score
    mean_auc_score = np.mean([score for _, score in auc_scores])

    # Prepare the table data
    table_data = [["Class", "AUC Score"]] + auc_scores + [["Mean", mean_auc_score]]

    # Print AUC scores as a table
    logging.info("AUC Score per Class:")
    logging.info(tabulate(table_data, headers="firstrow"))
    return losses.avg, f1_score_macro, mean_auc_score


def main():
    opt = parse_option()

    # configure logging to save output to a local file
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(os.path.join(opt.save_folder, 'logfile.txt')),
            logging.StreamHandler()
        ]
    )
    
    # build data loader
    train_loader, val_loader = set_loader(opt)

    # build model and criterion
    model, criterion = set_model(opt)
    if opt.eval_only:
        loss, f1_score, auc_score = validate(val_loader, model, criterion, opt)
    else:
        # model = set_grad(model)

        # build optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                lr=opt.learning_rate,
                weight_decay=opt.weight_decay)
        # optimizer = set_optimizer(opt, model)

        ema = set_ema(model, opt.ema_decay)

        # print("Starting Validation Loss")
        # tensorboard
        logger = tb_logger.Logger(logdir=opt.tb_folder, flush_secs=2)
        # loss, f1_score, auc_score = validate(val_loader, model, criterion, opt)

        # initialize best validation accuracy
        best_val_acc = 0.0
        # training routine
        for epoch in range(1, opt.epochs + 1):
            adjust_learning_rate(opt, optimizer, epoch)

            # train for one epoch
            time1 = time.time()
            loss, train_acc = train(train_loader, model,
                                    criterion, optimizer, epoch, opt, ema)
            time2 = time.time()
            logging.info('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

            # tensorboard logger
            logger.log_value('train_loss', loss, epoch)
            logger.log_value('train_acc', train_acc, epoch)
            logger.log_value(
                'learning_rate', optimizer.param_groups[0]['lr'], epoch)

            # Apply EMA to the model during validation
            ema.apply_shadow_weights()
            # eval for one epoch
            loss, f1_score, auc_score = validate(val_loader, model, criterion, opt)
            ema.restore_original_weights()

            logger.log_value('val_loss', loss, epoch)
            logger.log_value('val_f1_score', f1_score, epoch)
            logger.log_value('val_auc_score', auc_score, epoch)
            
            # check if the current model is the best so far and save it
            if auc_score > best_val_acc:
                best_val_acc = auc_score
                save_file = os.path.join(opt.save_folder, 'best.pth')
                save_model(model, optimizer, opt, epoch, save_file)

            if epoch % opt.save_freq == 0:
                save_file = os.path.join(
                    opt.save_folder, 'ckpt_epoch_{epoch}.pth'.format(epoch=epoch))
                save_model(model, optimizer, opt, epoch, save_file)

        # save the last model
        save_file = os.path.join(
            opt.save_folder, 'last.pth')
        save_model(model, optimizer, opt, opt.epochs, save_file)


if __name__ == '__main__':
    main()
