Skip to main content

DICOM to JPG Converter

This page contains the complete, ready-to-use Python script for converting DICOM files to JPG images while extracting comprehensive medical imaging metadata.
This script converts DICOM (Digital Imaging and Communications in Medicine) files to JPG images and extracts comprehensive metadata to CSV format. It handles both single-frame and multiframe DICOM files and supports various medical imaging modalities.

Back to DICOM Converter Documentation

Return to the main DICOM converter guide for setup instructions, usage examples, and the complete workflow.

Installation Requirements

Before using this script, install the required Python packages:
pip install pandas pydicom dicom2jpg opencv-python numpy

Usage

Save this script as dicom_converter.py and run:
# Basic usage
python dicom_converter.py /path/to/dicom/files

# With custom output directory
python dicom_converter.py /path/to/dicom/files --output /custom/output/path

# With verbose logging
python dicom_converter.py /path/to/dicom/files --verbose

Complete Script Code

#!/usr/bin/env python3
"""
DICOM to JPG Converter with Metadata Extraction

This script converts DICOM files to JPG images and extracts metadata,
creating a CSV file that links each converted image to its metadata.
"""

import os
import pandas as pd
import pydicom
import dicom2jpg
from pathlib import Path
import logging
import shutil
import argparse
import sys
from datetime import datetime
import numpy as np
import cv2

# Logging will be configured by setup_logging() function

def extract_dicom_metadata(dicom_path):
    """Extract comprehensive metadata from a DICOM file."""
    try:
        ds = pydicom.dcmread(dicom_path)

        # Create metadata dictionary
        metadata = {}

        # Patient Information (anonymized)
        metadata['patient_id'] = getattr(ds, 'PatientID', '')
        metadata['patient_name'] = str(getattr(ds, 'PatientName', ''))
        metadata['patient_birth_date'] = getattr(ds, 'PatientBirthDate', '')
        metadata['patient_sex'] = getattr(ds, 'PatientSex', '')
        metadata['patient_age'] = getattr(ds, 'PatientAge', '')

        # Study Information
        metadata['study_instance_uid'] = getattr(ds, 'StudyInstanceUID', '')
        metadata['study_date'] = getattr(ds, 'StudyDate', '')
        metadata['study_time'] = getattr(ds, 'StudyTime', '')
        metadata['study_description'] = getattr(ds, 'StudyDescription', '')
        metadata['accession_number'] = getattr(ds, 'AccessionNumber', '')

        # Series Information
        metadata['series_instance_uid'] = getattr(ds, 'SeriesInstanceUID', '')
        metadata['series_number'] = getattr(ds, 'SeriesNumber', '')
        metadata['series_date'] = getattr(ds, 'SeriesDate', '')
        metadata['series_time'] = getattr(ds, 'SeriesTime', '')
        metadata['series_description'] = getattr(ds, 'SeriesDescription', '')

        # Instance Information
        metadata['sop_instance_uid'] = getattr(ds, 'SOPInstanceUID', '')
        metadata['instance_number'] = getattr(ds, 'InstanceNumber', '')
        metadata['instance_creation_date'] = getattr(ds, 'InstanceCreationDate', '')
        metadata['instance_creation_time'] = getattr(ds, 'InstanceCreationTime', '')

        # Equipment Information
        metadata['manufacturer'] = getattr(ds, 'Manufacturer', '')
        metadata['manufacturer_model_name'] = getattr(ds, 'ManufacturerModelName', '')
        metadata['device_serial_number'] = getattr(ds, 'DeviceSerialNumber', '')
        metadata['software_versions'] = getattr(ds, 'SoftwareVersions', '')
        metadata['modality'] = getattr(ds, 'Modality', '')

        # Image Acquisition Parameters
        metadata['image_type'] = str(getattr(ds, 'ImageType', ''))
        metadata['acquisition_date'] = getattr(ds, 'AcquisitionDate', '')
        metadata['acquisition_time'] = getattr(ds, 'AcquisitionTime', '')
        metadata['slice_thickness'] = getattr(ds, 'SliceThickness', '')
        metadata['slice_location'] = getattr(ds, 'SliceLocation', '')
        metadata['image_position_patient'] = str(getattr(ds, 'ImagePositionPatient', ''))
        metadata['image_orientation_patient'] = str(getattr(ds, 'ImageOrientationPatient', ''))

        # CT-specific parameters
        if hasattr(ds, 'KVP'):
            metadata['kvp'] = ds.KVP
        if hasattr(ds, 'XRayTubeCurrent'):
            metadata['tube_current'] = ds.XRayTubeCurrent
        if hasattr(ds, 'ExposureTime'):
            metadata['exposure_time'] = ds.ExposureTime
        if hasattr(ds, 'ConvolutionKernel'):
            metadata['convolution_kernel'] = ds.ConvolutionKernel

        # Image Display Parameters
        metadata['window_center'] = getattr(ds, 'WindowCenter', '')
        metadata['window_width'] = getattr(ds, 'WindowWidth', '')
        metadata['rescale_intercept'] = getattr(ds, 'RescaleIntercept', '')
        metadata['rescale_slope'] = getattr(ds, 'RescaleSlope', '')

        # Image Pixel Data Information
        metadata['rows'] = getattr(ds, 'Rows', '')
        metadata['columns'] = getattr(ds, 'Columns', '')
        metadata['pixel_spacing'] = str(getattr(ds, 'PixelSpacing', ''))
        metadata['bits_allocated'] = getattr(ds, 'BitsAllocated', '')
        metadata['bits_stored'] = getattr(ds, 'BitsStored', '')
        metadata['high_bit'] = getattr(ds, 'HighBit', '')
        metadata['pixel_representation'] = getattr(ds, 'PixelRepresentation', '')
        metadata['photometric_interpretation'] = getattr(ds, 'PhotometricInterpretation', '')

        # Multiframe-specific information
        metadata['number_of_frames'] = getattr(ds, 'NumberOfFrames', '')
        metadata['frame_increment_pointer'] = str(getattr(ds, 'FrameIncrementPointer', ''))
        metadata['frame_time'] = getattr(ds, 'FrameTime', '')
        metadata['frame_time_vector'] = str(getattr(ds, 'FrameTimeVector', ''))

        # Enhanced timing information
        metadata['repetition_time'] = getattr(ds, 'RepetitionTime', '')
        metadata['echo_time'] = getattr(ds, 'EchoTime', '')

        return metadata

    except Exception as e:
        logging.error(f"Error extracting metadata from {dicom_path}: {str(e)}")
        return {}

def is_multiframe_dicom(dicom_path):
    """Check if a DICOM file contains multiframe images."""
    try:
        ds = pydicom.dcmread(dicom_path, stop_before_pixels=False)

        # Check if pixel data exists
        if not hasattr(ds, 'pixel_array'):
            return False

        pixel_array = ds.pixel_array

        # Multiframe detection: 3D array where first dimension is not RGB (not 3)
        # Shape: [frames, height, width] for grayscale multiframe
        # vs [height, width, 3] for RGB single frame
        if len(pixel_array.shape) == 3 and pixel_array.shape[2] != 3:
            return True

        return False

    except Exception as e:
        logging.error(f"Error checking multiframe status for {dicom_path}: {str(e)}")
        return False

def convert_multiframe_dicom(dicom_file, series_dir, images_dir):
    """Convert multiframe DICOM to individual JPG files."""
    try:
        # Read DICOM file
        ds = pydicom.dcmread(dicom_file)
        pixel_array = ds.pixel_array.astype(float)

        # Get number of frames
        num_frames = pixel_array.shape[0]
        logging.info(f"Processing {num_frames} frames from multiframe DICOM: {dicom_file.name}")

        converted_files = []
        frame_metadata_list = []

        for frame_idx in range(num_frames):
            # Extract individual frame
            frame_data = pixel_array[frame_idx]

            # Apply DICOM processing (windowing, rescaling)
            processed_frame = apply_dicom_processing(ds, frame_data)

            # Generate output filename
            frame_filename = f"{series_dir.name}_{dicom_file.stem}_frame-{frame_idx + 1:03d}.jpg"
            output_path = images_dir / frame_filename

            # Save as JPG using OpenCV
            success = cv2.imwrite(str(output_path), processed_frame)

            if success:
                logging.info(f"Converted frame {frame_idx + 1}/{num_frames} -> {frame_filename}")
                converted_files.append(frame_filename)

                # Extract metadata for this frame
                frame_metadata = extract_dicom_metadata(dicom_file)
                frame_metadata['image_filename'] = frame_filename
                frame_metadata['is_multiframe'] = True
                frame_metadata['frame_number'] = frame_idx + 1
                frame_metadata['total_frames'] = num_frames
                frame_metadata['original_dicom_path'] = str(dicom_file.relative_to(dicom_file.parents[1]))
                frame_metadata['series_folder'] = series_dir.name
                frame_metadata['conversion_timestamp'] = datetime.now().isoformat()

                frame_metadata_list.append(frame_metadata)
            else:
                logging.error(f"Failed to save frame {frame_idx + 1} from {dicom_file.name}")

        return converted_files, frame_metadata_list

    except Exception as e:
        logging.error(f"Error processing multiframe DICOM {dicom_file}: {str(e)}")
        return [], []

def apply_dicom_processing(ds, pixel_data):
    """Apply DICOM windowing and rescaling to pixel data."""
    try:
        # Apply rescale slope and intercept if available
        if hasattr(ds, 'RescaleSlope') and hasattr(ds, 'RescaleIntercept'):
            rescale_slope = float(ds.RescaleSlope)
            rescale_intercept = float(ds.RescaleIntercept)
            pixel_data = pixel_data * rescale_slope + rescale_intercept

        # Apply windowing if available
        if hasattr(ds, 'WindowCenter') and hasattr(ds, 'WindowWidth'):
            window_center = ds.WindowCenter
            window_width = ds.WindowWidth

            # Handle multivalue fields
            if hasattr(window_center, '__iter__') and not isinstance(window_center, str):
                window_center = float(window_center[0])
            else:
                window_center = float(window_center)

            if hasattr(window_width, '__iter__') and not isinstance(window_width, str):
                window_width = float(window_width[0])
            else:
                window_width = float(window_width)

            # Apply windowing
            img_min = window_center - window_width // 2
            img_max = window_center + window_width // 2
            pixel_data = np.clip(pixel_data, img_min, img_max)

        # Normalize to 8-bit range
        pixel_data = ((pixel_data - pixel_data.min()) / (pixel_data.max() - pixel_data.min())) * 255.0

        # Handle photometric interpretation
        if hasattr(ds, 'PhotometricInterpretation') and ds.PhotometricInterpretation == "MONOCHROME1":
            pixel_data = 255 - pixel_data

        return pixel_data.astype('uint8')

    except Exception as e:
        logging.error(f"Error in DICOM processing: {str(e)}")
        # Fallback: simple normalization
        pixel_data = ((pixel_data - pixel_data.min()) / (pixel_data.max() - pixel_data.min())) * 255.0
        return pixel_data.astype('uint8')

def validate_input_directory(input_dir):
    """Validate that the input directory exists and contains DICOM files."""
    input_path = Path(input_dir)

    if not input_path.exists():
        logging.error(f"Input directory does not exist: {input_dir}")
        return False

    if not input_path.is_dir():
        logging.error(f"Input path is not a directory: {input_dir}")
        return False

    # Check for DICOM files (either .dcm files or series directories)
    dicom_files = list(input_path.glob("**/*.dcm"))
    series_dirs = [d for d in input_path.iterdir() if d.is_dir() and d.name.startswith('series-')]

    if not dicom_files and not series_dirs:
        logging.error(f"No DICOM files or series directories found in: {input_dir}")
        return False

    logging.info(f"Found {len(dicom_files)} DICOM files and {len(series_dirs)} series directories")
    return True

def convert_dicom_to_jpg(input_dir, output_dir):
    """Convert DICOM files to JPG and extract metadata."""

    # Create output directories
    images_dir = Path(output_dir) / "converted_images"
    metadata_dir = Path(output_dir) / "metadata"
    images_dir.mkdir(parents=True, exist_ok=True)
    metadata_dir.mkdir(parents=True, exist_ok=True)

    # List to store all metadata
    all_metadata = []

    # Check for organized series directories first
    series_dirs = [d for d in Path(input_dir).iterdir() if d.is_dir() and d.name.startswith('series-')]

    # Also check for DICOM files directly in the input directory
    direct_dicom_files = list(Path(input_dir).glob("*.dcm"))

    logging.info(f"Found {len(series_dirs)} series directories and {len(direct_dicom_files)} direct DICOM files")

    # Process series directories first
    for series_dir in sorted(series_dirs):
        logging.info(f"Processing {series_dir.name}...")

        # Find all DICOM files in the series
        dicom_files = list(series_dir.glob("*.dcm"))

        if not dicom_files:
            logging.warning(f"No DICOM files found in {series_dir.name}")
            continue

        logging.info(f"Found {len(dicom_files)} DICOM files in {series_dir.name}")

        for dicom_file in sorted(dicom_files):
            try:
                # Check if this is a multiframe DICOM
                if is_multiframe_dicom(dicom_file):
                    logging.info(f"Detected multiframe DICOM: {dicom_file.name}")

                    # Convert multiframe DICOM to individual frames
                    converted_files, frame_metadata_list = convert_multiframe_dicom(dicom_file, series_dir, images_dir)

                    if converted_files:
                        logging.info(f"Successfully converted multiframe DICOM to {len(converted_files)} frames")
                        all_metadata.extend(frame_metadata_list)
                    else:
                        logging.error(f"Failed to convert multiframe DICOM: {dicom_file.name}")

                else:
                    # Handle single-frame DICOM using dicom2jpg
                    image_filename = f"{series_dir.name}_{dicom_file.stem}.jpg"
                    output_image_path = images_dir / image_filename

                    # Convert DICOM to JPG using dicom2jpg
                    # dicom2jpg creates its own directory structure, so we convert first then move
                    temp_dir = images_dir / "temp"
                    temp_dir.mkdir(exist_ok=True)

                    logging.info(f"Converting {dicom_file.name} -> {image_filename}")
                    dicom2jpg.dicom2jpg(str(dicom_file), str(temp_dir))

                    # Find the generated JPG file and rename it
                    generated_files = list(temp_dir.rglob("*.jpg"))
                    if generated_files:
                        generated_file = generated_files[0]  # Take the first (should be only one)
                        generated_file.rename(output_image_path)
                        logging.info(f"Moved generated file to {image_filename}")
                    else:
                        raise Exception(f"No JPG file generated for {dicom_file.name}")

                    # Clean up temp directory structure
                    for item in temp_dir.iterdir():
                        if item.is_dir():
                            shutil.rmtree(item)

                    # Extract metadata
                    metadata = extract_dicom_metadata(dicom_file)

                    # Add file information to metadata
                    metadata['image_filename'] = image_filename
                    metadata['original_dicom_path'] = str(dicom_file.relative_to(input_dir))
                    metadata['series_folder'] = series_dir.name
                    metadata['conversion_timestamp'] = datetime.now().isoformat()
                    metadata['is_multiframe'] = False
                    metadata['frame_number'] = 1
                    metadata['total_frames'] = 1

                    all_metadata.append(metadata)

            except Exception as e:
                logging.error(f"Error processing {dicom_file}: {str(e)}")
                continue

    # Process direct DICOM files (flat folder structure)
    if direct_dicom_files:
        logging.info(f"Processing {len(direct_dicom_files)} direct DICOM files...")

        for dicom_file in sorted(direct_dicom_files):
            try:
                # Create a pseudo series_dir for direct files
                pseudo_series_dir = Path("direct")

                # Check if this is a multiframe DICOM
                if is_multiframe_dicom(dicom_file):
                    logging.info(f"Detected multiframe DICOM: {dicom_file.name}")

                    # Convert multiframe DICOM to individual frames
                    converted_files, frame_metadata_list = convert_multiframe_dicom(dicom_file, pseudo_series_dir, images_dir)

                    if converted_files:
                        logging.info(f"Successfully converted multiframe DICOM to {len(converted_files)} frames")
                        all_metadata.extend(frame_metadata_list)
                    else:
                        logging.error(f"Failed to convert multiframe DICOM: {dicom_file.name}")

                else:
                    # Handle single-frame DICOM using dicom2jpg
                    image_filename = f"direct_{dicom_file.stem}.jpg"
                    output_image_path = images_dir / image_filename

                    # Convert DICOM to JPG using dicom2jpg
                    # dicom2jpg creates its own directory structure, so we convert first then move
                    temp_dir = images_dir / "temp"
                    temp_dir.mkdir(exist_ok=True)

                    logging.info(f"Converting {dicom_file.name} -> {image_filename}")
                    dicom2jpg.dicom2jpg(str(dicom_file), str(temp_dir))

                    # Find the generated JPG file and rename it
                    generated_files = list(temp_dir.rglob("*.jpg"))
                    if generated_files:
                        generated_file = generated_files[0]  # Take the first (should be only one)
                        generated_file.rename(output_image_path)
                        logging.info(f"Moved generated file to {image_filename}")
                    else:
                        raise Exception(f"No JPG file generated for {dicom_file.name}")

                    # Clean up temp directory structure
                    for item in temp_dir.iterdir():
                        if item.is_dir():
                            shutil.rmtree(item)

                    # Extract metadata
                    metadata = extract_dicom_metadata(dicom_file)

                    # Add file information to metadata
                    metadata['image_filename'] = image_filename
                    metadata['original_dicom_path'] = str(dicom_file.relative_to(input_dir))
                    metadata['series_folder'] = "direct"  # Mark as direct files
                    metadata['conversion_timestamp'] = datetime.now().isoformat()
                    metadata['is_multiframe'] = False
                    metadata['frame_number'] = 1
                    metadata['total_frames'] = 1

                    all_metadata.append(metadata)

            except Exception as e:
                logging.error(f"Error processing {dicom_file}: {str(e)}")
                continue

    # Save metadata to CSV
    if all_metadata:
        df = pd.DataFrame(all_metadata)

        # Reorder columns to put image_filename first
        if 'image_filename' in df.columns:
            cols = ['image_filename'] + [col for col in df.columns if col != 'image_filename']
            df = df[cols]

        csv_path = metadata_dir / "dicom_metadata.csv"
        df.to_csv(csv_path, index=False)
        logging.info(f"Metadata saved to {csv_path}")

        logging.info(f"Conversion complete! Processed {len(all_metadata)} files")
        logging.info(f"Images saved to: {images_dir}")
        logging.info(f"Metadata saved to: {csv_path}")

        return df
    else:
        logging.error("No files were successfully processed")
        return None

def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description='Convert DICOM files to JPG images with metadata extraction',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s /path/to/dicom/folder
  %(prog)s /path/to/dicom/folder --output /custom/output/path
  %(prog)s /path/to/dicom/folder --format png --verbose
        """)

    parser.add_argument('input_dir',
                        help='Directory containing DICOM files or series subdirectories')

    parser.add_argument('-o', '--output',
                        help='Output directory (default: input_dir/output)')

    parser.add_argument('-f', '--format',
                        choices=['jpg', 'png', 'bmp', 'tiff'],
                        default='jpg',
                        help='Output image format (default: jpg)')

    parser.add_argument('-v', '--verbose',
                        action='store_true',
                        help='Enable verbose logging')

    parser.add_argument('--series',
                        help='Process only specific series (e.g., series-00001)')

    return parser.parse_args()

def setup_logging(verbose=False):
    """Setup logging configuration."""
    log_level = logging.DEBUG if verbose else logging.INFO

    # Clear any existing handlers
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)

    logging.basicConfig(
        level=log_level,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()  # Only console output, no file
        ]
    )

if __name__ == "__main__":
    # Parse command line arguments
    args = parse_arguments()

    # Setup logging
    setup_logging(args.verbose)

    # Validate input directory
    if not validate_input_directory(args.input_dir):
        sys.exit(1)

    # Determine output directory
    if args.output:
        output_directory = args.output
    else:
        # Create output directory with timestamp inside input directory
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_directory = Path(args.input_dir) / f"output_{timestamp}"

    # Convert to absolute paths
    input_directory = Path(args.input_dir).resolve()
    output_directory = Path(output_directory).resolve()

    # Log configuration
    logging.info("DICOM to JPG Converter Starting...")
    logging.info(f"Input directory: {input_directory}")
    logging.info(f"Output directory: {output_directory}")
    logging.info(f"Output format: {args.format}")

    if args.series:
        logging.info(f"Processing only series: {args.series}")

    # Run conversion
    try:
        result_df = convert_dicom_to_jpg(str(input_directory), str(output_directory))

        if result_df is not None:
            print(f"\n✅ Conversion Complete!")
            print(f"📊 Summary:")
            print(f"   • Total files converted: {len(result_df)}")
            print(f"   • Unique series processed: {result_df['series_folder'].nunique()}")
            print(f"   • Output directory: {output_directory}")
            print(f"   • Images directory: {output_directory}/converted_images")
            print(f"   • Metadata file: {output_directory}/metadata/dicom_metadata.csv")
            print("\n📋 Sample metadata:")
            print(result_df[['image_filename', 'series_folder', 'modality', 'instance_number']].head())
        else:
            print("❌ Conversion failed. Check the log file for details.")
            sys.exit(1)

    except KeyboardInterrupt:
        print("\n⚠️  Conversion interrupted by user")
        sys.exit(1)
    except Exception as e:
        logging.error(f"Unexpected error: {str(e)}")
        print(f"❌ Unexpected error: {str(e)}")
        sys.exit(1)
I