import zipfile
import xml.etree.ElementTree as ET
import os
import sys

def get_shared_strings(z):
    strings = []
    if 'xl/sharedStrings.xml' in z.namelist():
        with z.open('xl/sharedStrings.xml') as f:
            parse_tree = ET.parse(f)
            # Find all <t> elements, considering namespaces
            for elem in parse_tree.iter():
                if elem.tag.endswith('}t'):
                    strings.append(elem.text or "")
    return strings

def get_sheet_map(z):
    # Map sheet name to path (e.g., "B2B Access Token" -> "xl/worksheets/sheet3.xml")
    sheet_map = {}
    
    # 1. Parse content types to find workbook
    # Assume xl/workbook.xml is standard
    
    # 2. Parse workbook to get sheet names and r:ids
    sheets = {} # rId -> name
    with z.open('xl/workbook.xml') as f:
        root = ET.parse(f).getroot()
        # namespaces
        ns = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
              'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'}
        
        for sheet in root.findall('.//main:sheet', ns):
             name = sheet.attrib.get('name')
             rid = sheet.attrib.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id')
             sheets[rid] = name
             
    # 3. Parse rels to get paths
    with z.open('xl/_rels/workbook.xml.rels') as f:
        root = ET.parse(f).getroot()
        ns = {'rel': 'http://schemas.openxmlformats.org/package/2006/relationships'}
        
        for rel in root.findall('.//rel:Relationship', ns):
            rid = rel.attrib.get('Id')
            target = rel.attrib.get('Target')
            if rid in sheets:
                # Target is relative to xl/ usually, e.g. "worksheets/sheet1.xml"
                path = "xl/" + target if not target.startswith("/") else target[1:]
                sheet_map[sheets[rid]] = path
                
    return sheet_map

def parse_sheet(z, path, shared_strings):
    rows = []
    with z.open(path) as f:
        root = ET.parse(f).getroot()
        ns = {'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
        
        # Iterate rows
        for row in root.findall('.//main:row', ns):
            row_data = []
            for c in row.findall('.//main:c', ns):
                t = c.attrib.get('t')
                v = c.find('main:v', ns)
                val = v.text if v is not None else ""
                
                if t == 's': # Shared string
                    try:
                        val = shared_strings[int(val)]
                    except:
                        val = f"ERROR: String {val} not found"
                elif t == 'inlineStr':
                     is_node = c.find('main:is', ns)
                     if is_node:
                         t_node = is_node.find('main:t', ns)
                         if t_node:
                             val = t_node.text
                
                if val:
                    row_data.append(val)
            if row_data:
                rows.append(" | ".join(row_data))
    return rows

filename = "/Users/andrimustaqim/Downloads/Sekolah/OCBC_NISP_Virtual_Account_Specification_ SNAP (Version 2.12.0).xlsx"

if not os.path.exists(filename):
    print("File not found")
    sys.exit(1)

try:
    with zipfile.ZipFile(filename, 'r') as z:
        strings = get_shared_strings(z)
        sheets = get_sheet_map(z)
        
        print("Sheets found:")
        for s in sheets:
            print(f"- {s}")
            
        targets = ["B2B Access Token", "Sample Request", "Sample"]
        
        for s_name, s_path in sheets.items():
            is_match = False
            for t in targets:
                if t.lower() in s_name.lower():
                    is_match = True
                    break
            
            if is_match:
                print(f"\n\n=== CONTENTS OF SHEET: {s_name} ===")
                content = parse_sheet(z, s_path, strings)
                for line in content:
                    # Print line if it has interesting keywords to reduce noise
                    # or print all if it's short
                    print(line)

except Exception as e:
    print(f"Error: {e}")
