DICOM to JPG Converter
This page contains the complete, ready-to-use Python script for converting DICOM files to JPG images while extracting comprehensive medical imaging metadata.
Back to DICOM Converter Documentation
Return to the main DICOM converter guide for setup instructions, usage examples, and the complete workflow.
Installation Requirements
Before using this script, install the required Python packages:Copy
Ask AI
pip install pandas pydicom dicom2jpg opencv-python numpy
Usage
Save this script asdicom_converter.py and run:
Copy
Ask AI
# Basic usage
python dicom_converter.py /path/to/dicom/files
# With custom output directory
python dicom_converter.py /path/to/dicom/files --output /custom/output/path
# With verbose logging
python dicom_converter.py /path/to/dicom/files --verbose
Complete Script Code
Copy
Ask AI
#!/usr/bin/env python3
"""
DICOM to JPG Converter with Metadata Extraction
This script converts DICOM files to JPG images and extracts metadata,
creating a CSV file that links each converted image to its metadata.
"""
import os
import pandas as pd
import pydicom
import dicom2jpg
from pathlib import Path
import logging
import shutil
import argparse
import sys
from datetime import datetime
import numpy as np
import cv2
# Logging will be configured by setup_logging() function
def extract_dicom_metadata(dicom_path):
"""Extract comprehensive metadata from a DICOM file."""
try:
ds = pydicom.dcmread(dicom_path)
# Create metadata dictionary
metadata = {}
# Patient Information (anonymized)
metadata['patient_id'] = getattr(ds, 'PatientID', '')
metadata['patient_name'] = str(getattr(ds, 'PatientName', ''))
metadata['patient_birth_date'] = getattr(ds, 'PatientBirthDate', '')
metadata['patient_sex'] = getattr(ds, 'PatientSex', '')
metadata['patient_age'] = getattr(ds, 'PatientAge', '')
# Study Information
metadata['study_instance_uid'] = getattr(ds, 'StudyInstanceUID', '')
metadata['study_date'] = getattr(ds, 'StudyDate', '')
metadata['study_time'] = getattr(ds, 'StudyTime', '')
metadata['study_description'] = getattr(ds, 'StudyDescription', '')
metadata['accession_number'] = getattr(ds, 'AccessionNumber', '')
# Series Information
metadata['series_instance_uid'] = getattr(ds, 'SeriesInstanceUID', '')
metadata['series_number'] = getattr(ds, 'SeriesNumber', '')
metadata['series_date'] = getattr(ds, 'SeriesDate', '')
metadata['series_time'] = getattr(ds, 'SeriesTime', '')
metadata['series_description'] = getattr(ds, 'SeriesDescription', '')
# Instance Information
metadata['sop_instance_uid'] = getattr(ds, 'SOPInstanceUID', '')
metadata['instance_number'] = getattr(ds, 'InstanceNumber', '')
metadata['instance_creation_date'] = getattr(ds, 'InstanceCreationDate', '')
metadata['instance_creation_time'] = getattr(ds, 'InstanceCreationTime', '')
# Equipment Information
metadata['manufacturer'] = getattr(ds, 'Manufacturer', '')
metadata['manufacturer_model_name'] = getattr(ds, 'ManufacturerModelName', '')
metadata['device_serial_number'] = getattr(ds, 'DeviceSerialNumber', '')
metadata['software_versions'] = getattr(ds, 'SoftwareVersions', '')
metadata['modality'] = getattr(ds, 'Modality', '')
# Image Acquisition Parameters
metadata['image_type'] = str(getattr(ds, 'ImageType', ''))
metadata['acquisition_date'] = getattr(ds, 'AcquisitionDate', '')
metadata['acquisition_time'] = getattr(ds, 'AcquisitionTime', '')
metadata['slice_thickness'] = getattr(ds, 'SliceThickness', '')
metadata['slice_location'] = getattr(ds, 'SliceLocation', '')
metadata['image_position_patient'] = str(getattr(ds, 'ImagePositionPatient', ''))
metadata['image_orientation_patient'] = str(getattr(ds, 'ImageOrientationPatient', ''))
# CT-specific parameters
if hasattr(ds, 'KVP'):
metadata['kvp'] = ds.KVP
if hasattr(ds, 'XRayTubeCurrent'):
metadata['tube_current'] = ds.XRayTubeCurrent
if hasattr(ds, 'ExposureTime'):
metadata['exposure_time'] = ds.ExposureTime
if hasattr(ds, 'ConvolutionKernel'):
metadata['convolution_kernel'] = ds.ConvolutionKernel
# Image Display Parameters
metadata['window_center'] = getattr(ds, 'WindowCenter', '')
metadata['window_width'] = getattr(ds, 'WindowWidth', '')
metadata['rescale_intercept'] = getattr(ds, 'RescaleIntercept', '')
metadata['rescale_slope'] = getattr(ds, 'RescaleSlope', '')
# Image Pixel Data Information
metadata['rows'] = getattr(ds, 'Rows', '')
metadata['columns'] = getattr(ds, 'Columns', '')
metadata['pixel_spacing'] = str(getattr(ds, 'PixelSpacing', ''))
metadata['bits_allocated'] = getattr(ds, 'BitsAllocated', '')
metadata['bits_stored'] = getattr(ds, 'BitsStored', '')
metadata['high_bit'] = getattr(ds, 'HighBit', '')
metadata['pixel_representation'] = getattr(ds, 'PixelRepresentation', '')
metadata['photometric_interpretation'] = getattr(ds, 'PhotometricInterpretation', '')
# Multiframe-specific information
metadata['number_of_frames'] = getattr(ds, 'NumberOfFrames', '')
metadata['frame_increment_pointer'] = str(getattr(ds, 'FrameIncrementPointer', ''))
metadata['frame_time'] = getattr(ds, 'FrameTime', '')
metadata['frame_time_vector'] = str(getattr(ds, 'FrameTimeVector', ''))
# Enhanced timing information
metadata['repetition_time'] = getattr(ds, 'RepetitionTime', '')
metadata['echo_time'] = getattr(ds, 'EchoTime', '')
return metadata
except Exception as e:
logging.error(f"Error extracting metadata from {dicom_path}: {str(e)}")
return {}
def is_multiframe_dicom(dicom_path):
"""Check if a DICOM file contains multiframe images."""
try:
ds = pydicom.dcmread(dicom_path, stop_before_pixels=False)
# Check if pixel data exists
if not hasattr(ds, 'pixel_array'):
return False
pixel_array = ds.pixel_array
# Multiframe detection: 3D array where first dimension is not RGB (not 3)
# Shape: [frames, height, width] for grayscale multiframe
# vs [height, width, 3] for RGB single frame
if len(pixel_array.shape) == 3 and pixel_array.shape[2] != 3:
return True
return False
except Exception as e:
logging.error(f"Error checking multiframe status for {dicom_path}: {str(e)}")
return False
def convert_multiframe_dicom(dicom_file, series_dir, images_dir):
"""Convert multiframe DICOM to individual JPG files."""
try:
# Read DICOM file
ds = pydicom.dcmread(dicom_file)
pixel_array = ds.pixel_array.astype(float)
# Get number of frames
num_frames = pixel_array.shape[0]
logging.info(f"Processing {num_frames} frames from multiframe DICOM: {dicom_file.name}")
converted_files = []
frame_metadata_list = []
for frame_idx in range(num_frames):
# Extract individual frame
frame_data = pixel_array[frame_idx]
# Apply DICOM processing (windowing, rescaling)
processed_frame = apply_dicom_processing(ds, frame_data)
# Generate output filename
frame_filename = f"{series_dir.name}_{dicom_file.stem}_frame-{frame_idx + 1:03d}.jpg"
output_path = images_dir / frame_filename
# Save as JPG using OpenCV
success = cv2.imwrite(str(output_path), processed_frame)
if success:
logging.info(f"Converted frame {frame_idx + 1}/{num_frames} -> {frame_filename}")
converted_files.append(frame_filename)
# Extract metadata for this frame
frame_metadata = extract_dicom_metadata(dicom_file)
frame_metadata['image_filename'] = frame_filename
frame_metadata['is_multiframe'] = True
frame_metadata['frame_number'] = frame_idx + 1
frame_metadata['total_frames'] = num_frames
frame_metadata['original_dicom_path'] = str(dicom_file.relative_to(dicom_file.parents[1]))
frame_metadata['series_folder'] = series_dir.name
frame_metadata['conversion_timestamp'] = datetime.now().isoformat()
frame_metadata_list.append(frame_metadata)
else:
logging.error(f"Failed to save frame {frame_idx + 1} from {dicom_file.name}")
return converted_files, frame_metadata_list
except Exception as e:
logging.error(f"Error processing multiframe DICOM {dicom_file}: {str(e)}")
return [], []
def apply_dicom_processing(ds, pixel_data):
"""Apply DICOM windowing and rescaling to pixel data."""
try:
# Apply rescale slope and intercept if available
if hasattr(ds, 'RescaleSlope') and hasattr(ds, 'RescaleIntercept'):
rescale_slope = float(ds.RescaleSlope)
rescale_intercept = float(ds.RescaleIntercept)
pixel_data = pixel_data * rescale_slope + rescale_intercept
# Apply windowing if available
if hasattr(ds, 'WindowCenter') and hasattr(ds, 'WindowWidth'):
window_center = ds.WindowCenter
window_width = ds.WindowWidth
# Handle multivalue fields
if hasattr(window_center, '__iter__') and not isinstance(window_center, str):
window_center = float(window_center[0])
else:
window_center = float(window_center)
if hasattr(window_width, '__iter__') and not isinstance(window_width, str):
window_width = float(window_width[0])
else:
window_width = float(window_width)
# Apply windowing
img_min = window_center - window_width // 2
img_max = window_center + window_width // 2
pixel_data = np.clip(pixel_data, img_min, img_max)
# Normalize to 8-bit range
pixel_data = ((pixel_data - pixel_data.min()) / (pixel_data.max() - pixel_data.min())) * 255.0
# Handle photometric interpretation
if hasattr(ds, 'PhotometricInterpretation') and ds.PhotometricInterpretation == "MONOCHROME1":
pixel_data = 255 - pixel_data
return pixel_data.astype('uint8')
except Exception as e:
logging.error(f"Error in DICOM processing: {str(e)}")
# Fallback: simple normalization
pixel_data = ((pixel_data - pixel_data.min()) / (pixel_data.max() - pixel_data.min())) * 255.0
return pixel_data.astype('uint8')
def validate_input_directory(input_dir):
"""Validate that the input directory exists and contains DICOM files."""
input_path = Path(input_dir)
if not input_path.exists():
logging.error(f"Input directory does not exist: {input_dir}")
return False
if not input_path.is_dir():
logging.error(f"Input path is not a directory: {input_dir}")
return False
# Check for DICOM files (either .dcm files or series directories)
dicom_files = list(input_path.glob("**/*.dcm"))
series_dirs = [d for d in input_path.iterdir() if d.is_dir() and d.name.startswith('series-')]
if not dicom_files and not series_dirs:
logging.error(f"No DICOM files or series directories found in: {input_dir}")
return False
logging.info(f"Found {len(dicom_files)} DICOM files and {len(series_dirs)} series directories")
return True
def convert_dicom_to_jpg(input_dir, output_dir):
"""Convert DICOM files to JPG and extract metadata."""
# Create output directories
images_dir = Path(output_dir) / "converted_images"
metadata_dir = Path(output_dir) / "metadata"
images_dir.mkdir(parents=True, exist_ok=True)
metadata_dir.mkdir(parents=True, exist_ok=True)
# List to store all metadata
all_metadata = []
# Check for organized series directories first
series_dirs = [d for d in Path(input_dir).iterdir() if d.is_dir() and d.name.startswith('series-')]
# Also check for DICOM files directly in the input directory
direct_dicom_files = list(Path(input_dir).glob("*.dcm"))
logging.info(f"Found {len(series_dirs)} series directories and {len(direct_dicom_files)} direct DICOM files")
# Process series directories first
for series_dir in sorted(series_dirs):
logging.info(f"Processing {series_dir.name}...")
# Find all DICOM files in the series
dicom_files = list(series_dir.glob("*.dcm"))
if not dicom_files:
logging.warning(f"No DICOM files found in {series_dir.name}")
continue
logging.info(f"Found {len(dicom_files)} DICOM files in {series_dir.name}")
for dicom_file in sorted(dicom_files):
try:
# Check if this is a multiframe DICOM
if is_multiframe_dicom(dicom_file):
logging.info(f"Detected multiframe DICOM: {dicom_file.name}")
# Convert multiframe DICOM to individual frames
converted_files, frame_metadata_list = convert_multiframe_dicom(dicom_file, series_dir, images_dir)
if converted_files:
logging.info(f"Successfully converted multiframe DICOM to {len(converted_files)} frames")
all_metadata.extend(frame_metadata_list)
else:
logging.error(f"Failed to convert multiframe DICOM: {dicom_file.name}")
else:
# Handle single-frame DICOM using dicom2jpg
image_filename = f"{series_dir.name}_{dicom_file.stem}.jpg"
output_image_path = images_dir / image_filename
# Convert DICOM to JPG using dicom2jpg
# dicom2jpg creates its own directory structure, so we convert first then move
temp_dir = images_dir / "temp"
temp_dir.mkdir(exist_ok=True)
logging.info(f"Converting {dicom_file.name} -> {image_filename}")
dicom2jpg.dicom2jpg(str(dicom_file), str(temp_dir))
# Find the generated JPG file and rename it
generated_files = list(temp_dir.rglob("*.jpg"))
if generated_files:
generated_file = generated_files[0] # Take the first (should be only one)
generated_file.rename(output_image_path)
logging.info(f"Moved generated file to {image_filename}")
else:
raise Exception(f"No JPG file generated for {dicom_file.name}")
# Clean up temp directory structure
for item in temp_dir.iterdir():
if item.is_dir():
shutil.rmtree(item)
# Extract metadata
metadata = extract_dicom_metadata(dicom_file)
# Add file information to metadata
metadata['image_filename'] = image_filename
metadata['original_dicom_path'] = str(dicom_file.relative_to(input_dir))
metadata['series_folder'] = series_dir.name
metadata['conversion_timestamp'] = datetime.now().isoformat()
metadata['is_multiframe'] = False
metadata['frame_number'] = 1
metadata['total_frames'] = 1
all_metadata.append(metadata)
except Exception as e:
logging.error(f"Error processing {dicom_file}: {str(e)}")
continue
# Process direct DICOM files (flat folder structure)
if direct_dicom_files:
logging.info(f"Processing {len(direct_dicom_files)} direct DICOM files...")
for dicom_file in sorted(direct_dicom_files):
try:
# Create a pseudo series_dir for direct files
pseudo_series_dir = Path("direct")
# Check if this is a multiframe DICOM
if is_multiframe_dicom(dicom_file):
logging.info(f"Detected multiframe DICOM: {dicom_file.name}")
# Convert multiframe DICOM to individual frames
converted_files, frame_metadata_list = convert_multiframe_dicom(dicom_file, pseudo_series_dir, images_dir)
if converted_files:
logging.info(f"Successfully converted multiframe DICOM to {len(converted_files)} frames")
all_metadata.extend(frame_metadata_list)
else:
logging.error(f"Failed to convert multiframe DICOM: {dicom_file.name}")
else:
# Handle single-frame DICOM using dicom2jpg
image_filename = f"direct_{dicom_file.stem}.jpg"
output_image_path = images_dir / image_filename
# Convert DICOM to JPG using dicom2jpg
# dicom2jpg creates its own directory structure, so we convert first then move
temp_dir = images_dir / "temp"
temp_dir.mkdir(exist_ok=True)
logging.info(f"Converting {dicom_file.name} -> {image_filename}")
dicom2jpg.dicom2jpg(str(dicom_file), str(temp_dir))
# Find the generated JPG file and rename it
generated_files = list(temp_dir.rglob("*.jpg"))
if generated_files:
generated_file = generated_files[0] # Take the first (should be only one)
generated_file.rename(output_image_path)
logging.info(f"Moved generated file to {image_filename}")
else:
raise Exception(f"No JPG file generated for {dicom_file.name}")
# Clean up temp directory structure
for item in temp_dir.iterdir():
if item.is_dir():
shutil.rmtree(item)
# Extract metadata
metadata = extract_dicom_metadata(dicom_file)
# Add file information to metadata
metadata['image_filename'] = image_filename
metadata['original_dicom_path'] = str(dicom_file.relative_to(input_dir))
metadata['series_folder'] = "direct" # Mark as direct files
metadata['conversion_timestamp'] = datetime.now().isoformat()
metadata['is_multiframe'] = False
metadata['frame_number'] = 1
metadata['total_frames'] = 1
all_metadata.append(metadata)
except Exception as e:
logging.error(f"Error processing {dicom_file}: {str(e)}")
continue
# Save metadata to CSV
if all_metadata:
df = pd.DataFrame(all_metadata)
# Reorder columns to put image_filename first
if 'image_filename' in df.columns:
cols = ['image_filename'] + [col for col in df.columns if col != 'image_filename']
df = df[cols]
csv_path = metadata_dir / "dicom_metadata.csv"
df.to_csv(csv_path, index=False)
logging.info(f"Metadata saved to {csv_path}")
logging.info(f"Conversion complete! Processed {len(all_metadata)} files")
logging.info(f"Images saved to: {images_dir}")
logging.info(f"Metadata saved to: {csv_path}")
return df
else:
logging.error("No files were successfully processed")
return None
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='Convert DICOM files to JPG images with metadata extraction',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s /path/to/dicom/folder
%(prog)s /path/to/dicom/folder --output /custom/output/path
%(prog)s /path/to/dicom/folder --format png --verbose
""")
parser.add_argument('input_dir',
help='Directory containing DICOM files or series subdirectories')
parser.add_argument('-o', '--output',
help='Output directory (default: input_dir/output)')
parser.add_argument('-f', '--format',
choices=['jpg', 'png', 'bmp', 'tiff'],
default='jpg',
help='Output image format (default: jpg)')
parser.add_argument('-v', '--verbose',
action='store_true',
help='Enable verbose logging')
parser.add_argument('--series',
help='Process only specific series (e.g., series-00001)')
return parser.parse_args()
def setup_logging(verbose=False):
"""Setup logging configuration."""
log_level = logging.DEBUG if verbose else logging.INFO
# Clear any existing handlers
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=log_level,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler() # Only console output, no file
]
)
if __name__ == "__main__":
# Parse command line arguments
args = parse_arguments()
# Setup logging
setup_logging(args.verbose)
# Validate input directory
if not validate_input_directory(args.input_dir):
sys.exit(1)
# Determine output directory
if args.output:
output_directory = args.output
else:
# Create output directory with timestamp inside input directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_directory = Path(args.input_dir) / f"output_{timestamp}"
# Convert to absolute paths
input_directory = Path(args.input_dir).resolve()
output_directory = Path(output_directory).resolve()
# Log configuration
logging.info("DICOM to JPG Converter Starting...")
logging.info(f"Input directory: {input_directory}")
logging.info(f"Output directory: {output_directory}")
logging.info(f"Output format: {args.format}")
if args.series:
logging.info(f"Processing only series: {args.series}")
# Run conversion
try:
result_df = convert_dicom_to_jpg(str(input_directory), str(output_directory))
if result_df is not None:
print(f"\n✅ Conversion Complete!")
print(f"📊 Summary:")
print(f" • Total files converted: {len(result_df)}")
print(f" • Unique series processed: {result_df['series_folder'].nunique()}")
print(f" • Output directory: {output_directory}")
print(f" • Images directory: {output_directory}/converted_images")
print(f" • Metadata file: {output_directory}/metadata/dicom_metadata.csv")
print("\n📋 Sample metadata:")
print(result_df[['image_filename', 'series_folder', 'modality', 'instance_number']].head())
else:
print("❌ Conversion failed. Check the log file for details.")
sys.exit(1)
except KeyboardInterrupt:
print("\n⚠️ Conversion interrupted by user")
sys.exit(1)
except Exception as e:
logging.error(f"Unexpected error: {str(e)}")
print(f"❌ Unexpected error: {str(e)}")
sys.exit(1)