#include <stdint.h>
#include <stdio.h>
#include <string.h>

#define RAM_SIZE 4096 // 4KB of memory

// 6502 CPU Flags
#define FLAG_CARRY     0x01
#define FLAG_ZERO      0x02
#define FLAG_INTERRUPT 0x04
#define FLAG_DECIMAL   0x08
#define FLAG_BREAK     0x10
#define FLAG_CONSTANT  0x20
#define FLAG_OVERFLOW  0x40
#define FLAG_SIGN      0x80

#define BASE_STACK 0x100

// Function declarations for external functions
extern void printhex(uint16_t val);
extern void serout(uint8_t value);
extern uint8_t getkey();
extern void clearkey();

// Function pointer types for instructions and addressing modes
typedef struct CPU CPU;
typedef struct Memory Memory;
typedef void (*InstructionFunc)(CPU* cpu, Memory* mem);
typedef void (*AddressingModeFunc)(CPU* cpu, Memory* mem);

// Structure to hold CPU state
typedef struct CPU {
    uint16_t pc;
    uint8_t sp, a, x, y, status;
    uint16_t ea;      // Effective address
    uint16_t reladdr; // Relative address
    uint16_t value;
    uint16_t result;
    uint8_t opcode;
    uint8_t use_accumulator;
    uint32_t instructions_executed;
} CPU;

// Structure to hold Memory state
typedef struct Memory {
    uint8_t data[RAM_SIZE];
} Memory;

// Structure for opcode entry
typedef struct OpcodeEntry {
    InstructionFunc instruction;
    AddressingModeFunc addressing_mode;
    const char* mnemonic;
} OpcodeEntry;

// Flag manipulation macros
#define SET_FLAG(cpu, flag)     ((cpu)->status |= (flag))
#define CLEAR_FLAG(cpu, flag)   ((cpu)->status &= ~(flag))
#define CHECK_FLAG(cpu, flag)   ((cpu)->status & (flag))

// Helper macros for flag calculations
#define CALC_ZERO(cpu, val)     ((val) == 0 ? SET_FLAG(cpu, FLAG_ZERO) : CLEAR_FLAG(cpu, FLAG_ZERO))
#define CALC_SIGN(cpu, val)     ((val) & 0x80 ? SET_FLAG(cpu, FLAG_SIGN) : CLEAR_FLAG(cpu, FLAG_SIGN))
#define CALC_CARRY(cpu, val)    ((val) > 0xFF ? SET_FLAG(cpu, FLAG_CARRY) : CLEAR_FLAG(cpu, FLAG_CARRY))
#define CALC_OVERFLOW(cpu, result, a, b) \
    (((~((a) ^ (b)) & ((a) ^ (result))) & 0x80) ? SET_FLAG(cpu, FLAG_OVERFLOW) : CLEAR_FLAG(cpu, FLAG_OVERFLOW))

// Function prototypes for addressing modes
void imp(CPU* cpu, Memory* mem);
void acc(CPU* cpu, Memory* mem);
void imm(CPU* cpu, Memory* mem);
void zp(CPU* cpu, Memory* mem);
void zpx(CPU* cpu, Memory* mem);
void zpy(CPU* cpu, Memory* mem);
void rel(CPU* cpu, Memory* mem);
void abso(CPU* cpu, Memory* mem);
void absx(CPU* cpu, Memory* mem);
void absy(CPU* cpu, Memory* mem);
void ind(CPU* cpu, Memory* mem);
void indx(CPU* cpu, Memory* mem);
void indy(CPU* cpu, Memory* mem);

// Function prototypes for instructions
void adc(CPU* cpu, Memory* mem);
void op_and(CPU* cpu, Memory* mem);
void asl(CPU* cpu, Memory* mem);
void bcc(CPU* cpu, Memory* mem);
void bcs(CPU* cpu, Memory* mem);
void beq(CPU* cpu, Memory* mem);
void op_bit(CPU* cpu, Memory* mem);
void bmi(CPU* cpu, Memory* mem);
void bne(CPU* cpu, Memory* mem);
void bpl(CPU* cpu, Memory* mem);
void brk(CPU* cpu, Memory* mem);
void bvc(CPU* cpu, Memory* mem);
void bvs(CPU* cpu, Memory* mem);
void clc(CPU* cpu, Memory* mem);
void cld(CPU* cpu, Memory* mem);
void cli(CPU* cpu, Memory* mem);
void clv(CPU* cpu, Memory* mem);
void cmp(CPU* cpu, Memory* mem);
void cpx(CPU* cpu, Memory* mem);
void cpy(CPU* cpu, Memory* mem);
void dec(CPU* cpu, Memory* mem);
void dex(CPU* cpu, Memory* mem);
void dey(CPU* cpu, Memory* mem);
void eor(CPU* cpu, Memory* mem);
void inc(CPU* cpu, Memory* mem);
void inx(CPU* cpu, Memory* mem);
void iny(CPU* cpu, Memory* mem);
void jmp(CPU* cpu, Memory* mem);
void jsr(CPU* cpu, Memory* mem);
void lda(CPU* cpu, Memory* mem);
void ldx(CPU* cpu, Memory* mem);
void ldy(CPU* cpu, Memory* mem);
void lsr(CPU* cpu, Memory* mem);
void nop(CPU* cpu, Memory* mem);
void ora(CPU* cpu, Memory* mem);
void pha(CPU* cpu, Memory* mem);
void php(CPU* cpu, Memory* mem);
void pla(CPU* cpu, Memory* mem);
void plp(CPU* cpu, Memory* mem);
void rol(CPU* cpu, Memory* mem);
void ror(CPU* cpu, Memory* mem);
void rti(CPU* cpu, Memory* mem);
void rts(CPU* cpu, Memory* mem);
void sbc(CPU* cpu, Memory* mem);
void sec(CPU* cpu, Memory* mem);
void sed(CPU* cpu, Memory* mem);
void sei(CPU* cpu, Memory* mem);
void sta(CPU* cpu, Memory* mem);
void stx(CPU* cpu, Memory* mem);
void sty(CPU* cpu, Memory* mem);
void tax(CPU* cpu, Memory* mem);
void tay(CPU* cpu, Memory* mem);
void tsx(CPU* cpu, Memory* mem);
void txa(CPU* cpu, Memory* mem);
void txs(CPU* cpu, Memory* mem);
void tya(CPU* cpu, Memory* mem);

// Function to read from memory
uint8_t read_memory(CPU* cpu, Memory* mem, uint16_t address) {
    // Handle special cases for memory-mapped I/O or ROM
    // For simplicity, we only handle RAM here
    if (address < RAM_SIZE) {
        return mem->data[address];
    }
    // Return 0 if address is out of bounds
    return 0;
}

// Function to write to memory
void write_memory(CPU* cpu, Memory* mem, uint16_t address, uint8_t value) {
    // Handle special cases for memory-mapped I/O or ROM
    // For simplicity, we only handle RAM here
    if (address < RAM_SIZE) {
        mem->data[address] = value;
    }
}

// Function to push a byte onto the stack
void push8(CPU* cpu, Memory* mem, uint8_t value) {
    write_memory(cpu, mem, BASE_STACK + cpu->sp--, value);
}

// Function to push a 16-bit word onto the stack
void push16(CPU* cpu, Memory* mem, uint16_t value) {
    push8(cpu, mem, (value >> 8) & 0xFF);
    push8(cpu, mem, value & 0xFF);
}

// Function to pull a byte from the stack
uint8_t pull8(CPU* cpu, Memory* mem) {
    return read_memory(cpu, mem, BASE_STACK + ++cpu->sp);
}

// Function to pull a 16-bit word from the stack
uint16_t pull16(CPU* cpu, Memory* mem) {
    uint8_t low = pull8(cpu, mem);
    uint8_t high = pull8(cpu, mem);
    return (high << 8) | low;
}

// Function to reset the CPU
void reset_cpu(CPU* cpu, Memory* mem) {
    cpu->pc = (read_memory(cpu, mem, 0xFFFC) | (read_memory(cpu, mem, 0xFFFD) << 8));
    cpu->sp = 0xFD;
    cpu->a = cpu->x = cpu->y = 0;
    cpu->status = FLAG_CONSTANT;
    cpu->instructions_executed = 0;
}

// Function to fetch the next opcode
uint8_t fetch_opcode(CPU* cpu, Memory* mem) {
    return read_memory(cpu, mem, cpu->pc++);
}

// Function to get the operand value
uint16_t get_operand(CPU* cpu, Memory* mem) {
    if (cpu->use_accumulator) {
        return cpu->a;
    } else {
        return read_memory(cpu, mem, cpu->ea);
    }
}

// Function to store the result
void store_result(CPU* cpu, Memory* mem, uint16_t value) {
    if (cpu->use_accumulator) {
        cpu->a = value & 0xFF;
    } else {
        write_memory(cpu, mem, cpu->ea, value & 0xFF);
    }
}

// Addressing mode implementations
void imp(CPU* cpu, Memory* mem) {
    // Implied addressing mode
}

void acc(CPU* cpu, Memory* mem) {
    // Accumulator addressing mode
    cpu->use_accumulator = 1;
}

void imm(CPU* cpu, Memory* mem) {
    // Immediate addressing mode
    cpu->ea = cpu->pc++;
}

void zp(CPU* cpu, Memory* mem) {
    // Zero-page addressing mode
    cpu->ea = read_memory(cpu, mem, cpu->pc++) & 0x00FF;
}

void zpx(CPU* cpu, Memory* mem) {
    // Zero-page,X addressing mode
    cpu->ea = (read_memory(cpu, mem, cpu->pc++) + cpu->x) & 0x00FF;
}

void zpy(CPU* cpu, Memory* mem) {
    // Zero-page,Y addressing mode
    cpu->ea = (read_memory(cpu, mem, cpu->pc++) + cpu->y) & 0x00FF;
}

void rel(CPU* cpu, Memory* mem) {
    // Relative addressing mode
    cpu->reladdr = read_memory(cpu, mem, cpu->pc++);
    if (cpu->reladdr & 0x80) {
        cpu->reladdr |= 0xFF00;
    }
}

void abso(CPU* cpu, Memory* mem) {
    // Absolute addressing mode
    uint8_t low = read_memory(cpu, mem, cpu->pc++);
    uint8_t high = read_memory(cpu, mem, cpu->pc++);
    cpu->ea = (high << 8) | low;
}

void absx(CPU* cpu, Memory* mem) {
    // Absolute,X addressing mode
    uint8_t low = read_memory(cpu, mem, cpu->pc++);
    uint8_t high = read_memory(cpu, mem, cpu->pc++);
    cpu->ea = ((high << 8) | low) + cpu->x;
}

void absy(CPU* cpu, Memory* mem) {
    // Absolute,Y addressing mode
    uint8_t low = read_memory(cpu, mem, cpu->pc++);
    uint8_t high = read_memory(cpu, mem, cpu->pc++);
    cpu->ea = ((high << 8) | low) + cpu->y;
}

void ind(CPU* cpu, Memory* mem) {
    // Indirect addressing mode
    uint8_t ptr_low = read_memory(cpu, mem, cpu->pc++);
    uint8_t ptr_high = read_memory(cpu, mem, cpu->pc++);
    uint16_t ptr = (ptr_high << 8) | ptr_low;
    // Handle page boundary bug
    uint8_t ea_low = read_memory(cpu, mem, ptr);
    uint8_t ea_high = read_memory(cpu, mem, (ptr & 0xFF00) | ((ptr + 1) & 0x00FF));
    cpu->ea = (ea_high << 8) | ea_low;
}

void indx(CPU* cpu, Memory* mem) {
    // Indexed Indirect (Indirect,X) addressing mode
    uint8_t ptr = (read_memory(cpu, mem, cpu->pc++) + cpu->x) & 0xFF;
    uint8_t ea_low = read_memory(cpu, mem, ptr);
    uint8_t ea_high = read_memory(cpu, mem, (ptr + 1) & 0xFF);
    cpu->ea = (ea_high << 8) | ea_low;
}

void indy(CPU* cpu, Memory* mem) {
    // Indirect Indexed (Indirect),Y addressing mode
    uint8_t ptr = read_memory(cpu, mem, cpu->pc++);
    uint8_t ea_low = read_memory(cpu, mem, ptr);
    uint8_t ea_high = read_memory(cpu, mem, (ptr + 1) & 0xFF);
    cpu->ea = ((ea_high << 8) | ea_low) + cpu->y;
}

// Instruction implementations (only a subset for brevity)

// ADC - Add with Carry
void adc(CPU* cpu, Memory* mem) {
    cpu->value = get_operand(cpu, mem);
    cpu->result = cpu->a + cpu->value + (CHECK_FLAG(cpu, FLAG_CARRY) ? 1 : 0);

    CALC_CARRY(cpu, cpu->result);
    CALC_ZERO(cpu, cpu->result & 0xFF);
    CALC_OVERFLOW(cpu, cpu->result, cpu->a, cpu->value);
    CALC_SIGN(cpu, cpu->result & 0xFF);

    cpu->a = cpu->result & 0xFF;
}

// AND - Logical AND
void op_and(CPU* cpu, Memory* mem) {
    cpu->value = get_operand(cpu, mem);
    cpu->a &= cpu->value;

    CALC_ZERO(cpu, cpu->a);
    CALC_SIGN(cpu, cpu->a);
}

// ASL - Arithmetic Shift Left
void asl(CPU* cpu, Memory* mem) {
    cpu->value = get_operand(cpu, mem);
    cpu->result = cpu->value << 1;

    CALC_CARRY(cpu, cpu->result);
    cpu->result &= 0xFF;
    CALC_ZERO(cpu, cpu->result);
    CALC_SIGN(cpu, cpu->result);

    store_result(cpu, mem, cpu->result);
}

// BCC - Branch if Carry Clear
void bcc(CPU* cpu, Memory* mem) {
    if (!CHECK_FLAG(cpu, FLAG_CARRY)) {
        cpu->pc += (int8_t)cpu->reladdr;
    }
}

// Other instructions would be implemented similarly...

// Opcode table initialization
OpcodeEntry opcode_table[256] = {
    [0x69] = {adc, imm, "ADC"}, [0x65] = {adc, zp, "ADC"},   [0x75] = {adc, zpx, "ADC"},
    [0x6D] = {adc, abso, "ADC"}, [0x7D] = {adc, absx, "ADC"}, [0x79] = {adc, absy, "ADC"},
    [0x61] = {adc, indx, "ADC"}, [0x71] = {adc, indy, "ADC"},
    // ... Initialize other opcodes
    [0x29] = {op_and, imm, "AND"}, [0x25] = {op_and, zp, "AND"}, [0x35] = {op_and, zpx, "AND"},
    [0x2D] = {op_and, abso, "AND"}, [0x3D] = {op_and, absx, "AND"}, [0x39] = {op_and, absy, "AND"},
    [0x21] = {op_and, indx, "AND"}, [0x31] = {op_and, indy, "AND"},
    [0x0A] = {asl, acc, "ASL"},     [0x06] = {asl, zp, "ASL"},     [0x16] = {asl, zpx, "ASL"},
    [0x0E] = {asl, abso, "ASL"},    [0x1E] = {asl, absx, "ASL"},
    // NOP
    [0xEA] = {nop, imp, "NOP"},
    // ... More opcodes
};

// Function to execute a specified number of CPU cycles
void execute_cpu(CPU* cpu, Memory* mem, int32_t cycles) {
    while (cycles > 0) {
        cpu->opcode = fetch_opcode(cpu, mem);
        OpcodeEntry* entry = &opcode_table[cpu->opcode];

        cpu->use_accumulator = 0;

        if (entry->addressing_mode) {
            entry->addressing_mode(cpu, mem);
        }
        if (entry->instruction) {
            entry->instruction(cpu, mem);
        } else {
            // Handle illegal opcode
            printf("Illegal opcode 0x%02X at address 0x%04X\n", cpu->opcode, cpu->pc - 1);
            break;
        }

        cpu->instructions_executed++;
        // Adjust cycles based on the instruction (not implemented here)
        cycles--;
    }
}

// Implementations for other instructions and addressing modes
// Here you would fill in the rest of the instruction implementations
// For example:

void bcs(CPU* cpu, Memory* mem) {
    if (CHECK_FLAG(cpu, FLAG_CARRY)) {
        cpu->pc += (int8_t)cpu->reladdr;
    }
}

void beq(CPU* cpu, Memory* mem) {
    if (CHECK_FLAG(cpu, FLAG_ZERO)) {
        cpu->pc += (int8_t)cpu->reladdr;
    }
}

void nop(CPU* cpu, Memory* mem) {
    // Do nothing
}

void lda(CPU* cpu, Memory* mem) {
    cpu->a = get_operand(cpu, mem);
    CALC_ZERO(cpu, cpu->a);
    CALC_SIGN(cpu, cpu->a);
}

void ldx(CPU* cpu, Memory* mem) {
    cpu->x = get_operand(cpu, mem);
    CALC_ZERO(cpu, cpu->x);
    CALC_SIGN(cpu, cpu->x);
}

void ldy(CPU* cpu, Memory* mem) {
    cpu->y = get_operand(cpu, mem);
    CALC_ZERO(cpu, cpu->y);
    CALC_SIGN(cpu, cpu->y);
}

// And so on for the rest of the instructions...

// Example of main function to demonstrate usage
int main() {
    CPU cpu;
    Memory mem;

    // Initialize memory and CPU
    memset(&mem, 0, sizeof(Memory));
    reset_cpu(&cpu, &mem);

    // Load a program into memory (for demonstration purposes)
    // For example, an infinite loop: JMP $
    mem.data[0x8000] = 0x4C; // JMP
    mem.data[0x8001] = 0x00;
    mem.data[0x8002] = 0x80;

    // Set reset vector to 0x8000
    mem.data[0xFFFC] = 0x00;
    mem.data[0xFFFD] = 0x80;

    // Reset CPU to start execution from reset vector
    reset_cpu(&cpu, &mem);

    // Execute CPU cycles (infinite loop in this case)
    execute_cpu(&cpu, &mem, 100); // Execute 100 cycles for demonstration

    return 0;
}