DICOM Object Metadata Extraction System

The [dicom_object_meta.rs] module provides comprehensive extraction capabilities for DICOM medical imaging metadata, processing hundreds of standardized DICOM tags to create structured data representations.

Metadata Categories

Patient Information

Extracts critical patient identifiers and demographics:

  • Patient name, ID, and demographics
  • Birth date and sex
  • Medical record numbers
  • Accession numbers

Study Metadata

Processes study-level information:

  • Study instance UID and description
  • Study date and time
  • Referring physician information
  • Study ID and accession numbers

Series Information

Handles series-specific data:

  • Series instance UID and number
  • Series description and modality
  • Body part examined
  • Protocol information

Image Characteristics

Extracts detailed image metadata:

  • Image dimensions and pixel characteristics
  • Spatial information (orientation, position, thickness)
  • Acquisition parameters
  • Windowing settings

Technical Implementation

Data Validation

Implements strict validation for all extracted fields with proper error handling for missing or malformed data.

Type Safety

Uses bounded string types to ensure data integrity and prevent buffer overflows.

Performance Optimization

Leverages parallel processing for bulk metadata extraction operations.

Data Processing Pipeline

  1. DICOM File Parsing: Reads DICOM objects using standardized libraries
  2. Tag Extraction: Processes relevant DICOM tags based on medical imaging standards
  3. Data Transformation: Converts DICOM data formats to application-friendly structures
  4. Validation: Ensures data integrity and completeness
  5. Storage Preparation: Formats data for database storage or caching

Healthcare Compliance

  • HIPAA Compliant: Handles patient data according to privacy regulations
  • DICOM Standard: Follows DICOM standards for metadata processing
  • Audit Trail: Maintains data processing logs for compliance purposes

database_provider_base.rs


use std::fmt;

#[derive(Debug)]
pub enum ExtractionError {
    MissingPatientId,
    EmptyPatientId,
    MissingStudyUid,
    EmptyStudyUid,
    MissingSeriesUid,
    EmptySeriesUid,
    MissingSopUid,
    EmptySopUid,
}

impl fmt::Display for ExtractionError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            ExtractionError::MissingPatientId => write!(f, "Missing patient ID in DICOM object"),
            ExtractionError::EmptyPatientId => write!(f, "Patient ID is empty in DICOM object"),
            ExtractionError::MissingStudyUid => write!(f, "Missing study UID in DICOM object"),
            ExtractionError::EmptyStudyUid => write!(f, "Study UID is empty in DICOM object"),
            ExtractionError::MissingSeriesUid => write!(f, "Missing series UID in DICOM object"),
            ExtractionError::EmptySeriesUid => write!(f, "Series UID is empty in DICOM object"),
            ExtractionError::MissingSopUid => write!(f, "Missing SOP UID in DICOM object"),
            ExtractionError::EmptySopUid => write!(f, "SOP UID is empty in DICOM object"),
        }
    }
}

impl std::error::Error for ExtractionError {}
 

database_factory.rs

use crate::server_config;
use crate::server_config::DatabaseConfig;
use database::dicom_dbprovider::DbProvider;
use database::dicom_mysql::MySqlDbProvider;
use database::dicom_pg::PgDbProvider;
use std::sync::Arc;

 
#[derive(Debug)]
pub enum DatabaseError {
    ConfigError(String),
    ConnectionError(String),
    UnsupportedDatabase(String),
}

impl std::fmt::Display for DatabaseError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            DatabaseError::ConfigError(msg) => write!(f, "Config error: {}", msg),
            DatabaseError::ConnectionError(msg) => write!(f, "Connection error: {}", msg),
            DatabaseError::UnsupportedDatabase(msg) => write!(f, "Unsupported database: {}", msg),
        }
    }
}

impl std::error::Error for DatabaseError {}

// Generate a DB instance based on the configuration file, return Result instead of exiting directly
pub async fn create_db_instance(
    dbconfig: &DatabaseConfig,
) -> Result<Arc<dyn DbProvider>, DatabaseError> {
    let db_type = dbconfig.dbtype.to_lowercase();
    if !(db_type == "doris" || db_type == "postgresql") {
        return Err(DatabaseError::UnsupportedDatabase(
            "only mysql, doris or postgresql is supported".to_string(),
        ));
    }

    match db_type.as_str() {
        "postgresql" => {
            let conn_url =
                server_config::generate_pg_database_connection(&dbconfig).map_err(|_| {
                    DatabaseError::ConfigError(
                        "database connection string is not right".to_string(),
                    )
                })?; 

            let pg_provider = PgDbProvider::new(conn_url);
            Ok(Arc::new(pg_provider))
        }
        "doris" => {
            let conn_url =
                server_config::generate_database_connection(&dbconfig).map_err(|_| {
                    DatabaseError::ConfigError(
                        "database connection string is not right".to_string(),
                    )
                })?; 

            let db_provider = MySqlDbProvider::new(conn_url);
            Ok(Arc::new(db_provider))
        }
        _ => Err(DatabaseError::UnsupportedDatabase(format!(
            "Unsupported database type: {}",
            db_type
        ))),
    }
}

dicom_object_meta.rs Extract DICOM Meta

use crate::dicom_utils;

use crate::storage_config::hash_uid;
use chrono::{NaiveDate, NaiveTime};
use database::dicom_dbtype::{BoundedString, DicomDateString};
use database::dicom_meta::{DicomImageMeta, DicomStateMeta};
use dicom_dictionary_std::tags;
use dicom_object::InMemDicomObject;
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize)]
#[non_exhaustive]
pub enum DicomParseError {
    MissingRequiredField(String),
    InvalidTimeFormat(String),
    InvalidDateFormat(String),
    InvalidFormat(String),

    TransferSyntaxUidIsEmpty(String),
    SopClassUidIsEmpty(String),
    ConversionError(String),
    
}

struct DicomCommonMeta {
    patient_id: String,
    study_uid: String,
    series_uid: String,
    sop_uid: String,
    study_date: NaiveDate,
    study_date_str: String,
}

impl DicomCommonMeta {
    fn extract_from_dicom(dicom_obj: &InMemDicomObject) -> Result<Self, DicomParseError> {
       
        let patient_id_str = dicom_utils::get_text_value(dicom_obj, tags::PATIENT_ID)
            .filter(|v| !v.is_empty() && v.len() <= 64)
            .ok_or_else(|| DicomParseError::MissingRequiredField("PATIENT_ID".to_string()))?;

        
        let study_uid = dicom_utils::get_text_value(dicom_obj, tags::STUDY_INSTANCE_UID)
            .filter(|v| !v.is_empty() && v.len() <= 64)
            .ok_or_else(|| {
                DicomParseError::MissingRequiredField("STUDY_INSTANCE_UID".to_string())
            })?;

      
        let series_uid = dicom_utils::get_text_value(dicom_obj, tags::SERIES_INSTANCE_UID)
            .filter(|v| !v.is_empty() && v.len() <= 64)
            .ok_or_else(|| {
                DicomParseError::MissingRequiredField("SERIES_INSTANCE_UID".to_string())
            })?;

       
        let sop_uid = dicom_utils::get_text_value(dicom_obj, tags::SOP_INSTANCE_UID)
            .filter(|v| !v.is_empty() && v.len() <= 64)
            .ok_or_else(|| DicomParseError::MissingRequiredField("SOP_INSTANCE_UID".to_string()))?;
 
        let study_date_str = dicom_utils::get_text_value(dicom_obj, tags::STUDY_DATE)
            .ok_or_else(|| DicomParseError::MissingRequiredField("STUDY_DATE".to_string()))?;
        let study_date_v = match NaiveDate::parse_from_str(&study_date_str, "%Y%m%d") {
            Ok(date) => date,
            Err(_) => {
                return Err(DicomParseError::InvalidDateFormat(format!(
                    "Study Date must be in YYYYMMDD format, got: {}",
                    study_date_str
                )));
            }
        };

        Ok(DicomCommonMeta {
            patient_id: patient_id_str,
            study_uid,
            series_uid,
            sop_uid,
            study_date: study_date_v,
            study_date_str,
        })
    }
}

 

pub fn make_image_info(
    tenant_id: &str,
    dicom_obj: &InMemDicomObject,
    fsize: Option<u32>,
) -> Result<DicomImageMeta, DicomParseError> {
    
    let common_meta = DicomCommonMeta::extract_from_dicom(dicom_obj)?;
  
    let instance_number = dicom_utils::get_int_value(dicom_obj, tags::INSTANCE_NUMBER);

    let content_date = dicom_utils::get_date_value_dicom(dicom_obj, tags::CONTENT_DATE)
        .map(|date| {
            let date_str = date.format("%Y%m%d").to_string();
            DicomDateString::try_from(date_str)
        })
        .transpose()
        .map_err(|_| {
            DicomParseError::InvalidDateFormat("Failed to convert content date".to_string())
        })?;

    let content_time = dicom_utils::get_text_value(dicom_obj, tags::CONTENT_TIME)
        .filter(|v| !v.is_empty())
        .map(|v| parse_dicom_time(v.as_str()))
        .transpose()
        .map_err(|_| {
            DicomParseError::InvalidTimeFormat("Failed to convert content_time".to_string())
        })?;

    let image_type = dicom_utils::get_text_value(dicom_obj, tags::IMAGE_TYPE)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<128>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert image type".to_string())
        })?;

    let image_orientation_patient =
        dicom_utils::get_text_value(dicom_obj, tags::IMAGE_ORIENTATION_PATIENT)
            .filter(|v| !v.is_empty())
            .map(|v| BoundedString::<128>::try_from(v))
            .transpose()
            .map_err(|_| {
                DicomParseError::ConversionError(
                    "Failed to convert image orientation patient".to_string(),
                )
            })?;
    let image_position_patient =
        dicom_utils::get_text_value(dicom_obj, tags::IMAGE_POSITION_PATIENT)
            .filter(|v| !v.is_empty())
            .map(|v| BoundedString::<64>::try_from(v))
            .transpose()
            .map_err(|_| {
                DicomParseError::ConversionError(
                    "Failed to convert image position patient".to_string(),
                )
            })?;

    let slice_thickness = dicom_utils::get_decimal_value(dicom_obj, tags::SLICE_THICKNESS);
    let spacing_between_slices =
        dicom_utils::get_decimal_value(dicom_obj, tags::SPACING_BETWEEN_SLICES);
    let slice_location = dicom_utils::get_decimal_value(dicom_obj, tags::SLICE_LOCATION);

    let samples_per_pixel = dicom_utils::get_int_value(dicom_obj, tags::SAMPLES_PER_PIXEL);
    let photometric_interpretation =
        dicom_utils::get_text_value(dicom_obj, tags::PHOTOMETRIC_INTERPRETATION)
            .filter(|v| !v.is_empty())
            .map(|v| BoundedString::<32>::try_from(v))
            .transpose()
            .map_err(|_| {
                DicomParseError::ConversionError(
                    "Failed to convert photometric interpretation".to_string(),
                )
            })?;

    let width = dicom_utils::get_int_value(dicom_obj, tags::ROWS);
    let columns = dicom_utils::get_int_value(dicom_obj, tags::COLUMNS);
    let bits_allocated = dicom_utils::get_int_value(dicom_obj, tags::BITS_ALLOCATED);
    let bits_stored = dicom_utils::get_int_value(dicom_obj, tags::BITS_STORED);
    let high_bit = dicom_utils::get_int_value(dicom_obj, tags::HIGH_BIT);
    let pixel_representation = dicom_utils::get_int_value(dicom_obj, tags::PIXEL_REPRESENTATION);

    let rescale_intercept = dicom_utils::get_decimal_value(dicom_obj, tags::RESCALE_INTERCEPT);
    let rescale_slope = dicom_utils::get_decimal_value(dicom_obj, tags::RESCALE_SLOPE);
    let rescale_type = dicom_utils::get_text_value(dicom_obj, tags::RESCALE_TYPE)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert rescale type".to_string())
        })?;

    let window_center = dicom_utils::get_text_value(dicom_obj, tags::WINDOW_CENTER)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert window center".to_string())
        })?;
    let window_width = dicom_utils::get_text_value(dicom_obj, tags::WINDOW_WIDTH)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert window width".to_string())
        })?;
    let transfer_syntax_uid = dicom_utils::get_text_value(dicom_obj, tags::TRANSFER_SYNTAX_UID)
        .filter(|v| !v.is_empty())
        .unwrap_or_else(|| "1.2.840.10008.1.2".to_string());

    let sop_class_uid = dicom_utils::get_text_value(dicom_obj, tags::SOP_CLASS_UID)
        .filter(|v| !v.is_empty())
        .ok_or_else(|| DicomParseError::MissingRequiredField("SOP Class UID".to_string()))?;

    let image_status = Some(
        BoundedString::<32>::try_from("ACTIVE".to_string()).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert image status".to_string())
        })?,
    );

   
    let study_uid_hash = hash_uid(&common_meta.study_uid).into();
    let series_uid_hash = hash_uid(&common_meta.series_uid).into();

   
    let now = chrono::Local::now().naive_local();

    Ok(DicomImageMeta {
        tenant_id: BoundedString::<64>::try_from(tenant_id.to_string()).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert tenant ID".to_string())
        })?,
        patient_id: BoundedString::<64>::try_from(common_meta.patient_id).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert patient ID".to_string())
        })?,
        study_uid: BoundedString::<64>::from_str(&common_meta.study_uid).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert study UID".to_string())
        })?,
        series_uid: BoundedString::<64>::from_str(&common_meta.series_uid).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert series UID".to_string())
        })?,
        sop_uid: BoundedString::<64>::from_str(&common_meta.sop_uid).map_err(|_| {
            DicomParseError::ConversionError("Failed to convert SOP UID".to_string())
        })?,
        study_uid_hash,
        series_uid_hash,

        instance_number,

        content_date,
        content_time,

        image_type,
        image_orientation_patient,
        image_position_patient,
        slice_thickness,
        spacing_between_slices,
        slice_location,
        samples_per_pixel,
        photometric_interpretation,
        width,
        columns,
        bits_allocated,
        bits_stored,
        high_bit,
        pixel_representation,
        rescale_intercept,
        rescale_slope,
        rescale_type,
        window_center,
        window_width,

        transfer_syntax_uid: BoundedString::<64>::try_from(transfer_syntax_uid).unwrap(),
        pixel_data_location: None,
        thumbnail_location: None,
        sop_class_uid: BoundedString::<64>::try_from(sop_class_uid).map_err(|_| {
            DicomParseError::SopClassUidIsEmpty("SOP Class UID is empty".to_string())
        })?,
        image_status,
        space_size: fsize,
        created_time: Some(now),
        updated_time: Some(now),
    })
}

fn make_crc32(tenante_id: &str, study_uid: Option<&str>) -> u32 {
    let mut data = vec![0u8; 128];
    data[..tenante_id.len()].copy_from_slice(tenante_id.as_bytes());
    if let Some(study_uid) = study_uid {
        data[tenante_id.len()..tenante_id.len() + study_uid.len()]
            .copy_from_slice(study_uid.as_bytes());
    }
    const_crc32::crc32(&data)
}
 
fn parse_dicom_time(time_str: &str) -> Result<NaiveTime, chrono::ParseError> {
   
    NaiveTime::parse_from_str(time_str, "%H%M%S.%f")
        .or_else(|_| {
             
            NaiveTime::parse_from_str(time_str, "%H%M%S.")
        })
        .or_else(|_| {
          
            NaiveTime::parse_from_str(time_str, "%H%M%S")
        })
}
pub fn make_state_info(
    tenant_id: &str,

    dicom_obj: &InMemDicomObject,
    msg_study_uid: Option<&str>,
) -> Result<DicomStateMeta, DicomParseError> {
    
    let common_meta = DicomCommonMeta::extract_from_dicom(dicom_obj)?;

    let acc_num = dicom_utils::get_text_value(dicom_obj, tags::ACCESSION_NUMBER)
        .filter(|v| !v.is_empty() && v.len() <= 16)
        .unwrap_or_else(|| format!("X32CRC{}", make_crc32(tenant_id, msg_study_uid))); // 当为空时设置默认值"X12333"
    let modality = dicom_utils::get_text_value(dicom_obj, tags::MODALITY)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<16>::try_from(v))
        .transpose()
        .map_err(|_| DicomParseError::ConversionError("Failed to convert modality".to_string()))?;
 
    let patient_name = dicom_utils::get_text_value(dicom_obj, tags::PATIENT_NAME)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert patient name".to_string())
        })?;

    let patient_sex = dicom_utils::get_text_value(dicom_obj, tags::PATIENT_SEX)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<1>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert patient sex".to_string())
        })?;

    let patient_birth_date = dicom_utils::get_date_value_dicom(dicom_obj, tags::PATIENT_BIRTH_DATE);

    let patient_birth_time = dicom_utils::get_text_value(dicom_obj, tags::PATIENT_BIRTH_TIME)
        .filter(|v| !v.is_empty())
        .map(|v| parse_dicom_time(v.as_str()))
        .transpose()
        .map_err(|_| {
            DicomParseError::InvalidTimeFormat("Failed to convert patient birth time".to_string())
        })?;

    
    let patient_age = dicom_utils::get_text_value(dicom_obj, tags::PATIENT_AGE)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<16>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert patient age".to_string())
        })?;

    let patient_size = dicom_utils::get_decimal_value(dicom_obj, tags::PATIENT_SIZE);
    let patient_weight = dicom_utils::get_decimal_value(dicom_obj, tags::PATIENT_WEIGHT);

    let study_date = common_meta.study_date;
   
    let study_time = dicom_utils::get_text_value(dicom_obj, tags::STUDY_TIME)
        .filter(|v| !v.is_empty())
        .map(|v| parse_dicom_time(v.as_str()))
        .transpose()
        .map_err(|_| {
            DicomParseError::InvalidTimeFormat("Failed to convert study time".to_string())
        })?;

    let study_id = dicom_utils::get_text_value(dicom_obj, tags::STUDY_ID)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<16>::try_from(v))
        .transpose()
        .map_err(|_| DicomParseError::ConversionError("Failed to convert study ID".to_string()))?;

    let study_description = dicom_utils::get_text_value(dicom_obj, tags::STUDY_DESCRIPTION)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert study description".to_string())
        })?;

 

    let series_number = dicom_utils::get_int_value(dicom_obj, tags::SERIES_NUMBER);
    let series_date = dicom_utils::get_date_value_dicom(dicom_obj, tags::SERIES_DATE);

    let series_time = dicom_utils::get_text_value(dicom_obj, tags::SERIES_TIME)
        .filter(|v| !v.is_empty())
        .map(|v| parse_dicom_time(v.as_str()))
        .transpose()
        .map_err(|_| {
            DicomParseError::InvalidTimeFormat("Failed to convert series time".to_string())
        })?;

    let series_description = dicom_utils::get_text_value(dicom_obj, tags::SERIES_DESCRIPTION)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<256>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert series description".to_string())
        })?;

    let body_part_examined = dicom_utils::get_text_value(dicom_obj, tags::BODY_PART_EXAMINED)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert body part examined".to_string())
        })?;

    let protocol_name = dicom_utils::get_text_value(dicom_obj, tags::PROTOCOL_NAME)
        .filter(|v| !v.is_empty())
        .map(|v| BoundedString::<64>::try_from(v))
        .transpose()
        .map_err(|_| {
            DicomParseError::ConversionError("Failed to convert protocol name".to_string())
        })?;
 
    let series_related_instances =
        dicom_utils::get_int_value(dicom_obj, tags::NUMBER_OF_SERIES_RELATED_INSTANCES);

   
    let study_uid_hash = hash_uid(&common_meta.study_uid).into();
    let series_uid_hash = hash_uid(&common_meta.series_uid).into();
 
    let now = chrono::Local::now().naive_local();
    let study_date_origin = DicomDateString::try_from(&common_meta.study_date_str).unwrap();

    let tenant_id = BoundedString::<64>::try_from(tenant_id.to_string()).unwrap();
    let patient_id = BoundedString::<64>::from_str(&common_meta.patient_id).unwrap();
    let study_uid = BoundedString::<64>::from_str(&common_meta.study_uid).unwrap();
    let series_uid = BoundedString::<64>::from_str(&common_meta.series_uid).unwrap();
    let accession_number = BoundedString::<16>::from_str(acc_num.as_str()).unwrap();

    Ok(DicomStateMeta {
        tenant_id,
        patient_id,
        study_uid,
        series_uid,
        study_uid_hash,
        series_uid_hash,
        study_date_origin,
        // Patient Information
        patient_name,
        patient_sex,
        patient_birth_date,
        patient_birth_time,
        patient_age,
        patient_size,
        patient_weight,

        // study Information
        study_date,
        study_time,
        accession_number,
        study_id,
        study_description,

        // Series Information
        modality,
        series_number,
        series_date,
        series_time,
        series_description,
        body_part_examined,
        protocol_name,

        series_related_instances,
        // timestamps
        created_time: now,
        updated_time: now,
    })
}

GoTo Summary : how-to-build-cloud-dicom