Download Latest Version pdi_dv_framework_vmware_virtual_machine_24.7z (3.4 GB)
Email in envelope

Get an email when there's a new version of PDI Data Vault framework

Home / upgrade_to_version_2.3
Name Modified Size InfoDownloads / Week
Parent folder
pdi_data_vault_framework_code_2_3.7z 2014-11-24 3.5 MB
upgrade_to_version_2_3_readme.txt 2014-11-24 5.2 kB
shared.xml 2014-11-24 11.4 kB
Totals: 3 Items   3.5 MB 0
Version 2.3 is a minor update.
It adds optional mailing after error/success of the main jobs and a job is added to collect database table sizes.
That job could be scheduled once a week for example.
Here are the 5 steps to upgrade to version 2.3, from version 2.2:


1:	Add the following lines in the properties file:

# Properties for the mail messages (if you want to use the mailing stuff, fill in the right user/password/server etc. here
mail_destination_address      = 
mail_sender_name              = etl_admin
mail_sender_address           = etl_admin@pdidatavaultfw.com
mail_smtp_server              = smtp.gmail.com
mail_smtp_port                = 465
mail_authentication_user      = 
mail_authentication_password  = 

2:	Replace the code in the file based PDI repository by the code in pdi_data_vault_framework_code_2_3.7z

3:	Change the script to run the full batch (/media/percona/disk2/pdi_dv_framework/run_job_complete_batch_data_warehouse.sh) to:

# Errorcodes from kitchen/pan
#    * 0 : The job ran without a problem.
#    * 1 : Errors occurred during processing
#    * 2 : An unexpected error occurred during loading / running of the job
#    * 7 : The job couldn't be loaded from XML or the Repository
#    * 8 : Error loading steps or plugins (error in loading one of the plugins mostly)
#    * 9 : Command line usage printing

JAVA_HOME=/media/percona/disk2/pdi_dv_framework/kff/software/jdk1.7.0_71
LOG_FILE_NAME=/media/percona/disk2/pdi_dv_framework/logs/job_complete_batch_staging_`date +%Y%m%d_%H:%M:%S`.log

cd /media/percona/disk2/pdi_dv_framework/kff/software/pdi/5.2
. kitchen.sh /rep:"pdi_file_repository_dv_demo_kff" -job=job_complete_batch_staging -dir=/staging_generic -param:par_id_rtyp=4 -param:par_subruntype=0 -param:par_send_error_mail_ind=0 -param:par_send_success_mail_ind=0 -level=Basic >> $LOG_FILE_NAME

LOG_FILE2=/media/percona/disk2/pdi_dv_framework/logs/job_data_vault_all_incl_md_`date +%Y%m%d_%H:%M:%S`.log

. kitchen.sh /rep:"pdi_file_repository_dv_demo_kff" -job=job_data_vault_all_incl_md -dir=/data_vault -param:id_data_vault=1 -param:ind_restart=0 -param:subruntype=0 -param:process_hub_satellites=1 -param:process_hub_status_satellites=1 -param:process_hubs=1 -param:process_link_group_validity_satellites=1 -param:process_link_satellites=1 -param:process_link_status_satellites=1 -param:process_link_validity_satellites=1 -param:process_links=1 -param:process_ref_tables=1 -param:par_send_error_mail_ind=0 -param:par_send_success_mail_ind=0 -level=Basic >> $LOG_FILE2

# Uncomment this to mail the logs. The mail configuration must be valid, including user and password!
# . kitchen.sh  /rep:"pdi_file_repository_dv_demo_kff" -job=job_send_mail -dir=/management -param:par_mail_body="See attached staging log file"    -param:par_mail_subject="Log of the ETL staging job"    -param:par_file_to_attach=$LOG_FILE_NAME -level=Basic
# . kitchen.sh  /rep:"pdi_file_repository_dv_demo_kff" -job=job_send_mail -dir=/management -param:par_mail_body="See attached Data Vault log file" -param:par_mail_subject="Log of the ETL Data Vault job" -param:par_file_to_attach=$LOG_FILE2     -level=Basic

# Uncomment these lines if you want the logfiles to be deleted after a succesful run
# if [ $? -eq 0 ] ; then
#  rm $LOG_FILE_NAME
#  rm $LOG_FILE2
# fi


4:	Run the follwing SQL in pdi_meta_dv_demo (foreign keys form the hist table must be deleted, problem when deleteing from the 'current' tables.


alter table ref_data_vault_link_sources_hist drop foreign key fk_ref_dv_link_sources_hist_source_links;
alter table ref_data_vault_link_sources_hist drop foreign key fk_ref_dv_link_sources_hist_source_tables;

create or replace view vw_adm_database_table_sizes as
select table_schema                          as table_schema
,      table_name                            as table_name
,      now()                                 as date_checked
,      ROUND(data_length  / (1024 * 1024),2) as data_mb
,      ROUND(index_length / (1024 * 1024),2) as index_mb
from   information_schema.tables
where  table_type = 'BASE TABLE'

insert into ref_runtypes (id_rtyp,description) values (6,'Refresh database metrics')

CREATE TABLE adm_database_table_sizes
    (
        table_schema   VARCHAR(128)  NOT NULL,
        table_name     VARCHAR(128)  NOT NULL,
        date_checked   DATETIME      NOT NULL,
        id_run_checked INT,
        data_mb        DECIMAL(18,2) NOT NULL,
        index_mb       DECIMAL(18,2) NOT NULL,
        PRIMARY KEY (table_schema,table_name)
    )
    ENGINE=InnoDB DEFAULT CHARSET=utf8;
    
    CREATE TABLE adm_database_table_sizes_hist
    (
        table_schema   VARCHAR(128)  NOT NULL,
        table_name     VARCHAR(128)  NOT NULL,
        date_checked   DATETIME      NOT NULL,
        id_run_checked INT,
        data_mb        DECIMAL(18,2) NOT NULL,
        index_mb       DECIMAL(18,2) NOT NULL,
        PRIMARY KEY (table_schema,table_name,date_checked)
    )
    ENGINE=InnoDB DEFAULT CHARSET=utf8;
    
5.	Replace the file shared.xml by the new one provided.
	The changes involve zeroDateBehavior, zeroDateTimebehaviour (convertToNull) and the support of the Timestamp datatype (Y).
Source: upgrade_to_version_2_3_readme.txt, updated 2014-11-24