import sys
import os
import hashlib
import base64
import requests
from lxml import etree
from urllib.parse import urlparse
import mimetypes
import re

class SVGImageProcessor:

    def __init__(self, upload_path):
        self.upload_path = upload_path
        self.namespaces = {
            'svg': 'http://www.w3.org/2000/svg',
            'xlink': 'http://www.w3.org/1999/xlink'
        }
    
    def process_svg(self, file_path):
        """Process SVG file by converting external image references to embedded data URIs"""
        try:
            # Check if file exists
            if not os.path.exists(file_path):
                print(f"File not found: {file_path}")
                return None

            # Parse SVG file directly from path
            parser = etree.XMLParser(strip_cdata=False, recover=True, huge_tree=True)
            tree = etree.parse(file_path, parser)
            root = tree.getroot()
            
            print(f"Successfully parsed SVG: {file_path}")
            print(f"Root tag: {root.tag}")
            
            # Step 1: Find all image elements with external references
            images = root.xpath('//svg:image[@xlink:href or @href]', namespaces=self.namespaces)
            print(f"Found {len(images)} image elements")
            
            processed_count = 0
            
            for image in images:
                href = image.get('{http://www.w3.org/1999/xlink}href') or image.get('href')
                
                # Skip if already a data URI
                if href and href.startswith('data:'):
                    print(f"Skipping already embedded data URI")
                    continue
                
                print(f"Processing image href: {href}")
                
                local_file_path = self.url_to_local_path(href)
                
                if local_file_path:
                    # Try to fetch the image data
                    image_data = self.get_local_image_data(local_file_path)
                    
                    if image_data:
                        print(f"Successfully fetched image data: {image_data['type']}")
                        print(f"Data size: {len(image_data['data'])} bytes")
                        
                        # Convert to data URI and replace href
                        data_uri = f"data:{image_data['type']};base64,{base64.b64encode(image_data['data']).decode()}"
                        
                        # Update the href attribute
                        if image.get('{http://www.w3.org/1999/xlink}href'):
                            image.set('{http://www.w3.org/1999/xlink}href', data_uri)
                        else:
                            image.set('href', data_uri)
                        
                        print(f"Converted to data URI (length: {len(data_uri)})")
                        processed_count += 1
                    else:
                        print(f"Failed to fetch image data for: {href}")
                else:
                    print(f"Could not resolve local path for: {href}")
            
            print(f"Successfully processed {processed_count} images")
            
            # Return the processed SVG
            result = etree.tostring(root, encoding='unicode', method='xml')

            xml_header = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
            doctype = '<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
            result = xml_header + doctype + result

            return result
            
        except etree.XMLSyntaxError as e:
            print(f"XML parsing error: {e}")
            return None
        except Exception as e:
            print(f"Error processing SVG: {e}")
            return None

    def url_to_local_path(self, url):
        """Convert URL to local file path within upload directory"""
        try:
            print(f"Converting URL to local path: {url}")
            
            # Skip data URIs
            if url.startswith('data:'):
                print("Skipping data URI")
                return None
            
            # Parse the URL
            if url.startswith('http'):
                # Extract path from HTTP URL
                parsed = urlparse(url)
                url_path = parsed.path
                print(f"Extracted path from URL: {url_path}")
            else:
                # Already a path
                url_path = url
                print(f"Using path as-is: {url_path}")
            
            # Look for wp-content/uploads in the path
            if 'wp-content/uploads' in url_path:
                # Extract everything after wp-content/uploads
                parts = url_path.split('wp-content/uploads')
                if len(parts) > 1:
                    relative_path = parts[1].lstrip('/')
                    local_path = os.path.join(self.upload_path, relative_path)
                    print(f"Constructed local path: {local_path}")
                    
                    # Security check: ensure the path is within upload directory
                    real_upload_path = os.path.realpath(self.upload_path)
                    real_local_path = os.path.realpath(local_path)
                    
                    if real_local_path.startswith(real_upload_path):
                        print("Path security check passed")
                        return local_path
                    else:
                        print("WARNING: Path security check failed - potential directory traversal attack")
                        return None
                else:
                    print("Could not split path on wp-content/uploads")
                    return None
            else:
                print("Path does not contain wp-content/uploads")
                return None
                
        except Exception as e:
            print(f"Error converting URL to local path: {e}")
            return None

    def get_local_image_data(self, file_path):
        """Fetch image data from local file system only"""
        try:
            print(f"Reading local file: {file_path}")
            
            if not file_path or not os.path.exists(file_path):
                print(f"Local file not found: {file_path}")
                return None
            
            # Additional security check
            if not os.path.isfile(file_path):
                print(f"Path is not a file: {file_path}")
                return None
            
            # Get file size for logging
            file_size = os.path.getsize(file_path)
            print(f"File size: {file_size} bytes")
            
            # Check if file is too large (limit to 50MB for safety)
            if file_size > 50 * 1024 * 1024:
                print(f"File too large: {file_size} bytes")
                return None
            
            content_type, _ = mimetypes.guess_type(file_path)
            if not content_type:
                # Try to detect file type by reading first few bytes
                with open(file_path, 'rb') as f:
                    first_bytes = f.read(100)
                    if b'<svg' in first_bytes or b'<?xml' in first_bytes:
                        content_type = 'image/svg+xml'
                    elif first_bytes.startswith(b'\x89PNG'):
                        content_type = 'image/png'
                    elif first_bytes.startswith(b'\xff\xd8\xff'):
                        content_type = 'image/jpeg'
                    elif first_bytes.startswith(b'GIF8'):
                        content_type = 'image/gif'
                    else:
                        content_type = 'application/octet-stream'
            
            print(f"Detected content type: {content_type}")
            
            with open(file_path, 'rb') as f:
                return {
                    'type': content_type,
                    'data': f.read()
                }
        
        except Exception as e:
            print(f"Error reading local file {file_path}: {e}")
            return None

def main():
    print('Started SVG processor - Converting to data URIs')
    if len(sys.argv) < 3:
        print("Usage: python svg_processor.py <file_path> <upload_path> [output_path]", file=sys.stderr)
        sys.exit(1)

    file_path = sys.argv[1]
    upload_path = sys.argv[2]
    output_path = sys.argv[3] if len(sys.argv) > 3 else file_path

    processor = SVGImageProcessor(upload_path)
    result = processor.process_svg(file_path)

    if result:
        print("Processing successful!")

        try:
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(result)
            print(f"Saved result to: {output_path}")
            
            # Log file sizes for comparison
            original_size = os.path.getsize(file_path)
            new_size = os.path.getsize(output_path)
            print(f"Original size: {original_size} bytes")
            print(f"New size: {new_size} bytes")
            print(f"Size change: {new_size - original_size:+d} bytes")
            
        except Exception as e:
            print(f"Error saving file: {e}")
            sys.exit(1)
        
        print("Processing complete!")
    else:
        print("Processing failed!")
        sys.exit(1)

if __name__ == "__main__":
    main()

