Elk / Discussion / Elk Users: PARALLELIZATION MPI+PHONONS

Elio - 2014-12-13

Dear All,

I really need some help on the parallelization of the Elk code for phonon calculations. I am not an expert in Fortran; however I spend two full days trying to find solutions on the internet and trying all sorts of combining compilers to include in the "make,inc" file but in vain...For example adding the flag fopenmp to mpif90 produces an error while running Elk (namely about charge density)...on the other hand mpif90 does not accept the flag openmp and crashes with an error (use mpi, include paths..)..Compiling with a combination of ifort and mpif90 with -fopenmp produces an a segmentation fault error....In brief, I managed to compile successfully with the follwing make.inc

MKLROOT=/opt/intel/Compiler/11.1/069/mkl
MAKE = make
F90 = mpif90
F90_OPTS = -O3 -ip -unroll
F77 = mpif90
F77_OPTS = -O3 -ip -unroll
AR = ar
LIB_SYS =
LIB_LPK = lapack.a blas.a /opt/intel/Compiler/11.1/069/mkl/lib/em64t/libmkl_lapack95_lp64.a -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -limf -lsvml -lirc -liomp5 -lp
thread -lifcore -lgfortran
LIB_FFT = fftlib.a

I have also realized that I can run the phonons calculations in serial that is if I open several windows, i can manage to run a point per cluster window...Howver this is will become utterly impractical for an 8 atoms system that I have to run.

Please can anyone advise me ( and in details) how can I incorporate parallelization for phonon calculations using mpiexec or any other method...

Waiting for an asnwer...Thanks in advance.

P.S. I run mpiexec using a slurm bash. I presume I need to change things in here:

SBATCH --nodelist=NODELIST

SBATCH -o elk.o

SBATCH -J elk

SBATCH --partition=superlong

SBATCH --nodes=6

SBATCH --ntasks=48

SBATCH --cpus-per-task=1

SBATCH --error=elk.err

-----------------elk----------------

EXEC_DIR=/home_cluster/fis718/eliemouj/elk-2.3.22/src/

EXEC_BIN=parsec_arpack_seq.x

EXEC_BIN=parsec_arpack_mpi.x

EXEC_BIN=elk
INPUTFILE=elk.in
OUTPUTFILE=elk-${SLURM_JOB_ID}.out

PP_DIR=${HOME}/parsec_pp

PP_FILES="C_POTRE.DAT"

-----------------parallel env-----------------

MPI=INTEL

MPI=BULL

MPI=SERIAL

if [ "${MPI}" == "INTEL" ]; then
source /opt/intel/Compiler/11.1/069/bin/iccvars.sh intel64
source /opt/intel/Compiler/11.1/069/bin/ifortvars.sh intel64
source /opt/intel/impi/4.0.0.028/intel64/bin/mpivars.sh
export I_MPI_PMI_LIBRARY=/usr/lib64/libpmi.so
export I_MPI_FABRICS=shm:dapl
fi
if [ "${MPI}" == "BULL" ]; then
mpibull2_devices -d=ibmr_gen2
fi
ulimit -s unlimited
ulimit -a

if [ -z "$SLURM_NPROCS" ] ; then if [ -z "$SLURM_NTASKS_PER_NODE" ] ; then
SLURM_NTASKS_PER_NODE=1
fi
SLURM_NPROCS=$(( $SLURM_JOB_NUM_NODES * $SLURM_NTASKS_PER_NODE ))
fi
echo "SLURM_TASKS_PER_NODE = " $SLURM_TASKS_PER_NODE
echo "SLURM_NTASKS_PER_CORE = " $SLURM_NTASKS_PER_CORE
echo "SLURM_NTASKS_PER_NODE = " $SLURM_NTASKS_PER_NODE
echo "SLURM_NTASKS_PER_SOCKET = " $SLURM_NTASKS_PER_SOCKET
echo "SLURM_JOB_NUM_NODES = " $SLURM_JOB_NUM_NODES
echo "SLURM_NNODES = " $SLURM_NNODES
echo "SLURM_CPUS_PER_TASK = " $SLURM_CPUS_PER_TASK
echo "SLURM_NPROCS = " $SLURM_NPROCS
echo "SLURM_JOB_NODELIST = " $SLURM_JOB_NODELIST

queuedir=pwd | sed -r 's/^.{23}(.*)/\~\1/g'
date=date +%d/%m/%Y-%H:%M
printf "%s %s %s %s %s %s %s \n" "$date " "elk " "${OUTPUTFILE} " "48 " "superlong " "elk " "$queuedir" >> job-inicio.dat

cd ${SLURM_SUBMIT_DIR}

check if we need to restart the calculation

ID=${SLURM_JOB_NAME}.${SLURM_JOB_ID}
STORAGE_DIR=/storage/fis718/suapasta/${ID}

stagein()
{
#----------------------------------------
# DO NOT TRUST sbcast. USE pdcp instead
#----------------------------------------

echo 'starting stagein' srun --ntasks=${SLURM_JOB_NUM_NODES} mkdir -p ${STORAGE_DIR} for pp in ${PP_FILES}; do /bin/cp -f ${PP_DIR}/${pp} ${STORAGE_DIR}/${pp} done /bin/cp -f ${EXEC_DIR}/${EXEC_BIN} ${STORAGE_DIR}/${EXEC_BIN} /bin/cp -f ${SLURM_SUBMIT_DIR}/${INPUTFILE} ${STORAGE_DIR}/${INPUTFILE} ln -s ${STORAGE_DIR}/ ${SLURM_SUBMIT_DIR} echo 'finalizing stagein'

}

early()
{
echo ' '
echo ' '

touch stop_scf sleep 120 # and so parsec does a clean kill ...

}

trap 'early' SIGHUP SIGINT SIGUSR1 SIGUSR2
cd ${STORAGE_DIR}

if [ "${MPI}" == "BULL" ]; then
srun ./${EXEC_BIN}
fi

if [ "${MPI}" == "INTEL" ]; then
HOSTFILE=/tmp/hosts.$SLURM_JOB_ID
srun hostname -s | sort -u > ${HOSTFILE}
mpdboot -n ${SLURM_NNODES} -f ${HOSTFILE} -r ssh
mpdtrace -l
echo "start executable"
mpiexec -np ${SLURM_NPROCS} ${EXEC_DIR}/./${EXEC_BIN} <${INPUTFILE}> ${OUTPUTFILE}
mpdallexit
fi

rm -f ${EXEC_BIN}

send time checker daemon

exit

Elio
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Anton F. - 2014-12-17

Hi, Elio,

I just want to stress that in Elk phonons are not parallelized via MPI (see the manual). So the only thing you can do is use OpenMP and as many cores as possible or run calculations in serial from different machines, which share some directory. So for the phonon calculations you need to compile Elk using not mpif90 but simply ifort or gfortran with -openmp flag. Of course, after that you have to run Elk not via mpirun but just by executing the Elk binary.

Calculations of phonons in Elk are not fast and, as often the developers say, if you need to calculate phonons for a large system, it's better to consider some pseudopotential code.

Best regards,
Anton F.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2014-12-20

Dear Anton,

thanks very much for your reply. In fact, I have been running the phonon calculations in serial opening as many windows as possible. for now it is working since I am testing it for small systems such as monolayer graphene. My main aim is to calculate the electron phonon coupling since this is the only code I know of who can calculate the constant straightforwardly. Speaking of which, I noticed that the electron phonon coupling constant is not correct and is not the same as the one in the examples (for example Al). Is there still a bug in the electron phonon program? i tried to play with all sorts of parameters but in vain: i get a value of lambda of 3.53 whereas the correct value must be 0.47.. Do you know anything about that; if not whom shall I ask as this is really important for my research

Thank you

Elio

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Anton F. - 2014-12-21

Dear Elio,
Which value of ngridq do you use? The results often depend on this and usually minimum 4x4x4 is required. Also, do you use DFPT or supercell method? The first one was implemented in Elk not a long time ago, so it's better to try both approaches and compare.

Good luck,
Anton.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2014-12-25

Dear Anton,

Thanks for your reply. I will be doing that. I have a few questions about this:

(1) a 4x4x4 grid means 16 phonons...Will the electron phonon coupling lambda be calculated based on only the 16 phonons or will this involve an extrapolation to other points?

(2) Is there a way to parallelize the calculation of the el-ph matrices that is for task 240?

(3) If the calculation crashed..is there a way to restart from where it stopped or it has to start all over again?

Thanks for your help

Regards

Elio

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Anton F. - 2014-12-27

Dear Elio,
As far as I know, Elk will perform an interpolation for calculating the electron-phonon coupling.
Task 240 is definetely parallelized via OpenMP, but from the code it seems like you can also try to start it via mpirun.
If the task 200 or 205 crashed, as I think you know, you will have to recalculate only those dynamical matrices, which has not been calculated, while the already calculated ones can be retained (if you don't change parameters, of course). As for the task 240, I'm afraid, you'll have to start it all over again.

Best regards,
Anton.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2014-12-28

Dear Anton.

First of all I want to thank you for your help and I apologize for asking a lot of questions. I have tried a 6x6x6 phonon grid (16 points) for Al but I am still getting a really high value of the electron phonon coupling something like 2.22, whereas the right value if 0.47. The thing is I want to calculate lambda for some theoretical syste that I have so I must be sure that the results are reliable. Either there is still a bug in the ekectron phonon code or something is wrong in the input. I have tried with Fermi Dirac smearing and Methssfel Paxton but none gave correct results...The input is :

1.0 1.0 0.0
1.0 0.0 1.0
0.0 1.0 1.0

! vertices for dispersion plot
plot1d
4 300 : nvp1d, npp1d
0.0 0.0 0.0 : vlvp1d
0.5 0.5 0.0
1.0 0.0 0.0
0.5 0.0 0.0

ngridq
6 6 6

scale
3.8267

mixtype
3

ntemp
40

lmaxmat
6

epsforce
1.e-6

epsengy
1.e-7

epsstress
1.e-5

swidth
0.005

epspot
1.e-9

! this is the relative path to the species files
sppath
'/home_cluster/fis718/eliemouj/elk-2.3.22/species/'
atoms
1 : nspecies
'Al.in' : spfname
1 : natoms
0.0 0.0 0.0 0.0 0.0 0.0 : atposl, bfcmt

ngridk
24 24 24

(2) Something else: I am getting for the total energy -242...Hartrees for Al whereas using other codes I get -4.1 Rydbergs....What is the source of this discrepancy

I am sorry to bother you with this. it is just that I cannot figure out what is going wrong

Regards

Elio

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Anton F. - 2014-12-30

Dear Elio,

Looks like the input is OK, so it indeed might be a bug in the elctron-phonon code... I'm wondering, why nobody else joins our conversation, especially somebody from the developers. I hope that eventually they'll say something about this.
As for the total energy, I'm not sure, but I think this quantity values from different codes has not to be the same. The things that should be more or less close are the band gaps, densities of states, etc....

Best regards,
Anton

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

supermarche - 2015-01-09

Dear Elio,

all-electron total energies per unit cell should be comparable if you choose the computational parameters as close as possible (apply the same approximations), i.e. the same E_xc, k-mesh, converged basis, ... however, pseudopotentials (PPs) are not all-electron -- thus the total energy depends on the PP.

I am not an expert on the implementation of the e-ph part of elk, but there should be two indepent smearing variables: the smearing width A to calculate the DOS (needed to determine the occupation in a scf cycle) and the smearing width B to do the e-ph integration. The first in principle depends on the k-point density (the result can be checked with the tetrahedron method). The second depends on the q-point and k-point density, and thus should be different. Try to redo the e-ph summation with a different swdith (this is the smearing width B used in the e-ph summation in the code, see ephcouple.f90, if I am not mistaken).

Best Regards,
Marc

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2015-01-27

Dear supermarche,

Thanks for your e-mail and sorry for the late reply as I was travelling and I read your e-mail just yesterday. I did repeat the electronic calculations using the same input (in this case of Quantum espresso code), however I still get very different total energies. I also tried the linear response method (to calculate phonons) which was implemented recently in Elk code (but not for all exchange functions..correct?) for Al however the calculation of the dynamical matrix elements were taking longer than the supercell method..I thought this will take less time...Any comments on that?.Has anyone actually tested the method or even the electron phonon code and got the same results as in the example? I am really interested to know

Thank you

Regards

Elio

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

supermarche - 2015-01-28

Dear Elio,

a) the total energies from Quantum Espresso (QE) depend on the pseudopotentials used. Neither norm-conserving nor ultra-soft pseudopotentials are all-electron per se. Whether the implementation of PAWs is all-electron in QE, I don't know. However, in principle one can construct all-electron total energies with PAW (see P. Blöchls original PRB).

b) In order to compare the total energy I suggest to use FP-LMTO, wien2k, FPLO, fleur, or any other suitable all-electron code.

c) Regarding the linear response code in elk: I didn't use it yet.

As I wrote earlier, the result of the e-ph part depends very much on the parameters (BZ integration, smearing, phonon interpolation, ...). If I have time, I might post an example input file.

Best Regards,
Marc

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

supermarche - 2015-01-29

Dear Elio,

I get the following total energies for Al:

elk: -241.9208260 Ha
qe: -241.9177915 Ha (PAW / Al.pz-n-kjpaw_psl.0.1.UPF)

Though I suppose I missed some numerical parameters, because the numbers should be closer. Be aware that you have to use an all-electron code / approximation in order to compare total energies (see my posts above).

All the best,
Marc

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2015-01-29

Dear Marc,

Thanks very much for your help and the efforts you are doing to clarify things for me. I understood the total energy now. It was my mistake. I was still using pseudopotentials in QE..This is why I got very different results.

As for the e-ph interaction, I would appreciate it if you can post input file regarding this later, if you had time. Meanwhile, I will keep on working on this to see if I can figure out something. As you previously posted, there must be an swidth for the elph calculation; adjusting it would correctly produce lambda and Tc; however , how would we know which swidth to use (and thus produce reliable correct results) when the system is purely theoretical for example and no experimental backup exists

Regards

Elio

Last edit: Elio 2015-01-29

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
- supermarche - 2015-01-30
  
  Dear Elio,
  
  the smearing width (swidth) for BZ integration is purely artificial (or so to say: a computational parameter). The best way would be to use a tetrahedron method for BZ integration, however without corrections the total energy is then not variational anymore (for advanced BZ integration schemes see for example M. Kawamura, Y. Gohda, and S.Tsuneyuki, Phys. Rev. B 89, 094515, and the reference to P. Blöchl's paper therein). To check the swidth for DOS is easy because you can in most cases compare it to a DOS calculated with a method based on tetrahedron integration. You choose the swdith which is closest to the tetrahedron result without artificial wiggles. For e-ph you usually don't go below this combination of swidth-kmesh. So if lambda is stable for a given swdith and a bigger kmesh, I would account the calculation as converged for this swidth and the kmesh.
  
  So the way to go:
  1) converge the DOS with the tetrahedron method
  2) choose your swidth according to 1
  3) increase the kmesh for this swidth until the lambda's are stable for the BZ e-ph integration on each single q-point
  
  All the best,
  Marc
  
  If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Anton F. - 2015-02-01

Dear Marc,

I really like your suggestion on definition of swidth since this parameter has always been difficult for me too. However, as far as I know, Elk doesn't support the tetrahedron method...or I miss something?

Best regards,
Anton.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

supermarche - 2015-02-02

Dear Anton,

you are correct, as far as I know, the tetrahedron method is not implemted in elk due to the non-trivial corrections to conserve the variational principle (which is heavily used for SO and non-collin. magn. in elk), as mentioned, above. Since I usually cross check results with other codes, too, I compare the DOS and swidth value to other codes which support the tetrahedron integration (e.g. QE, FPLO, LMTO, ...) even though one has to be very careful because of the different basis sets.

I suppose the elk developers would appreciate any first reference implementation of the tetrahedron method ; )

All the best,
Marc

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Elio - 2015-02-04

Dear Marc,

Thanks for your great suggestion. just to confirm: for swidth u are talking about the one in the electronic calculations or the one used in the el-ph calculations? swidth (beta in the elk code?) is not used in phonon calculations. correct? it is only involved and chosen in the el-ph calculations while integrating over the BZ...and the DOS in this case would be the electronic or phononic? I am a bit confused..sorry...maybe I missed something

Regards

elio

Last edit: Elio 2015-02-04

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

supermarche - 2015-02-09

Dear Elio,

I was talking about both when I mentioned swidth. In order to make this clear, I rephrase the schedule from above:

1) the idea of smearing originates back to the problem of the discontinuity in occupation numbers for T=0K in metals. There are three solutions: an insanely dense k-mesh, a clever integration routine (e.g. tetrahedron method), or smearing/broadening. Usually the last option is chosen, however you have to choose your combination of kmesh/smearing appropiately. I usually compare the DOS from tetrahedron integration to the DOS with smearing in order to set the swidth for a given kmesh. -> fix kmesh/swidth

2) Calculate the phonons.

3) e-ph part: start with the swidth/kmesh from above and increase the kmesh density till the e-ph coupling constant / phonon linewidth is converged for each single q-point.

In elk, as far as I know, the same parameter is used for the e-ph BZ integration and the broadening in a scf run. If you compute the properties step by step, you can set the swidth differently for the phonon calculation (scf runs) and the e-ph integration.

All the best,
Marc

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

J. K. Dewhurst - 2015-02-14

Dear All,

Regarding the original post, supercell phonons are now running with MPI in Elk version 3.0.4.

Regards,
Kay.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

PARALLELIZATION MPI+PHONONS

Forums

Help

PARALLELIZATION MPI+PHONONS

SBATCH --nodelist=NODELIST

SBATCH -o elk.o

SBATCH -J elk

SBATCH --partition=superlong

SBATCH --nodes=6

SBATCH --ntasks=48

SBATCH --cpus-per-task=1

SBATCH --error=elk.err

-----------------elk----------------

EXEC_BIN=parsec_arpack_seq.x

EXEC_BIN=parsec_arpack_mpi.x

PP_DIR=${HOME}/parsec_pp

PP_FILES="C_POTRE.DAT"

-----------------parallel env-----------------

MPI=BULL

MPI=SERIAL

check if we need to restart the calculation

rm -f ${EXEC_BIN}

send time checker daemon

PARALLELIZATION MPI+PHONONS

Forums

Help

PARALLELIZATION MPI+PHONONS document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

SBATCH --nodelist=NODELIST

SBATCH -o elk.o

SBATCH -J elk

SBATCH --partition=superlong

SBATCH --nodes=6

SBATCH --ntasks=48

SBATCH --cpus-per-task=1

SBATCH --error=elk.err

-----------------elk----------------

EXEC_BIN=parsec_arpack_seq.x

EXEC_BIN=parsec_arpack_mpi.x

PP_DIR=${HOME}/parsec_pp

PP_FILES="C_POTRE.DAT"

-----------------parallel env-----------------

MPI=BULL

MPI=SERIAL

check if we need to restart the calculation

rm -f ${EXEC_BIN}

send time checker daemon

PARALLELIZATION MPI+PHONONS