nmr-relax-users Mailing List for relax
Molecular dynamics by NMR data analysis
Brought to you by:
edauvergne,
troelslinnet
You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
(6) |
Apr
(1) |
May
|
Jun
|
Jul
(4) |
Aug
|
Sep
(49) |
Oct
(48) |
Nov
(21) |
Dec
(21) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(26) |
Feb
|
Mar
(7) |
Apr
(9) |
May
(14) |
Jun
(25) |
Jul
(7) |
Aug
(19) |
Sep
(22) |
Oct
(34) |
Nov
(5) |
Dec
(13) |
2008 |
Jan
(25) |
Feb
(18) |
Mar
(21) |
Apr
(28) |
May
(13) |
Jun
|
Jul
(2) |
Aug
(5) |
Sep
(4) |
Oct
(93) |
Nov
(38) |
Dec
(6) |
2009 |
Jan
(8) |
Feb
(23) |
Mar
(13) |
Apr
(2) |
May
(2) |
Jun
|
Jul
(12) |
Aug
(38) |
Sep
(11) |
Oct
(21) |
Nov
(5) |
Dec
(42) |
2010 |
Jan
(17) |
Feb
(33) |
Mar
(9) |
Apr
(2) |
May
(6) |
Jun
(2) |
Jul
(2) |
Aug
(1) |
Sep
(3) |
Oct
(2) |
Nov
(3) |
Dec
(17) |
2011 |
Jan
(31) |
Feb
(21) |
Mar
(6) |
Apr
(17) |
May
(21) |
Jun
(18) |
Jul
(13) |
Aug
(4) |
Sep
(3) |
Oct
(16) |
Nov
(1) |
Dec
|
2012 |
Jan
(7) |
Feb
(21) |
Mar
(67) |
Apr
(73) |
May
(28) |
Jun
(14) |
Jul
(20) |
Aug
(22) |
Sep
(47) |
Oct
(21) |
Nov
(21) |
Dec
(11) |
2013 |
Jan
(22) |
Feb
(29) |
Mar
(18) |
Apr
(4) |
May
(11) |
Jun
(38) |
Jul
(12) |
Aug
(34) |
Sep
|
Oct
|
Nov
|
Dec
(8) |
2014 |
Jan
(5) |
Feb
(23) |
Mar
(72) |
Apr
|
May
(30) |
Jun
(31) |
Jul
(13) |
Aug
(27) |
Sep
(24) |
Oct
(4) |
Nov
(15) |
Dec
(8) |
2015 |
Jan
(8) |
Feb
(5) |
Mar
(8) |
Apr
(5) |
May
(4) |
Jun
(8) |
Jul
|
Aug
|
Sep
(3) |
Oct
(10) |
Nov
(18) |
Dec
(5) |
2016 |
Jan
(4) |
Feb
(1) |
Mar
(3) |
Apr
(14) |
May
(3) |
Jun
(7) |
Jul
(4) |
Aug
(17) |
Sep
(79) |
Oct
(39) |
Nov
(10) |
Dec
|
2017 |
Jan
(5) |
Feb
(11) |
Mar
(2) |
Apr
(8) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2019 |
Jan
(1) |
Feb
(10) |
Mar
(10) |
Apr
(15) |
May
|
Jun
(12) |
Jul
(2) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2020 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
(5) |
Sep
(6) |
Oct
|
Nov
|
Dec
|
2021 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(3) |
Dec
(2) |
2022 |
Jan
(5) |
Feb
(8) |
Mar
|
Apr
|
May
|
Jun
(1) |
Jul
(3) |
Aug
|
Sep
|
Oct
(1) |
Nov
(1) |
Dec
|
2023 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(1) |
From: Mark B. <mar...@tu...> - 2023-12-16 16:58:05
|
Dear Edward, I'm trying to run a global fit on some 15N CPMG relaxation dispersion data. I have already run the residues with individual fits and am using a script (copied below), based on one in the relax example scripts folder. At the moment the script fails due to the size of the grid search. I thought that by loading existing single-residue fits and using the relax_disp.parameter_copy function, this should reduce the size of the grid search. Could you provide any suggestions? Many thanks, Mark --------------------------------------------------------------------------- # Python module imports. from os import getcwd, sep # relax module imports. from auto_analyses.relax_disp import Relax_disp from data_store import Relax_data_store; ds = Relax_data_store() from lib.dispersion.variables import MODEL_R2EFF, MODEL_CR72, MODEL_CR72_FULL, MODEL_TSMFK01, MODEL_IT99, MODEL_B14_FULL, MODEL_NS_CPMG_2SITE_EXPANDED, MODEL_NS_CPMG_2SITE_3D_FULL, MODEL_NS_CPMG_2SITE_STAR_FULL from pipe_control.mol_res_spin import spin_loop ######################################### #### Setup # The pipe names. if not (hasattr(ds, 'pipe_name') and hasattr(ds, 'pipe_bundle') and hasattr(ds, 'pipe_type') and hasattr(ds, 'pipe_bundle_cluster')): # Set pipe name, bundle and type. ds.pipe_name = 'base pipe' ds.pipe_bundle = 'relax_disp' ds.pipe_type = 'relax_disp' ds.pipe_bundle_cluster = 'cluster' # The data path if not hasattr(ds, 'data_path'): ds.data_path = getcwd() # The models to analyse. print(hasattr(ds,'models')) if not hasattr(ds, 'models'): if 0: ds.models = [MODEL_R2EFF, MODEL_CR72, MODEL_CR72_FULL] else: ds.models = [MODEL_CR72] print(ds.models) # The number of increments per parameter, to split up the search interval in grid search. # This is not used, when pointing to a previous result directory. # Then an average of the previous values will be used. if not hasattr(ds, 'grid_inc'): ds.grid_inc = 11 # The number of Monte-Carlo simulations for estimating the error of the parameters of the fitted models. if not hasattr(ds, 'mc_sim_num'): ds.mc_sim_num = 10 # The model selection technique. Either: 'AIC', 'AICc', 'BIC' if not hasattr(ds, 'modsel'): ds.modsel = 'AIC' # The previous result directory with R2eff values. if not hasattr(ds, 'pre_run_dir'): ds.pre_run_dir = './fit_CR72_final/' + sep + 'R2eff' print(ds.pre_run_dir) # The result directory. if not hasattr(ds, 'results_dir'): ds.results_dir = './fit_clustered_CR72/' ## The optimisation function tolerance. ## This is set to the standard value, and should not be changed. #if not hasattr(ds, 'opt_func_tol'): # ds.opt_func_tol = 1e-25 #Relax_disp.opt_func_tol = ds.opt_func_tol #if not hasattr(ds, 'opt_max_iterations'): # ds.opt_max_iterations = int(1e7) #Relax_disp.opt_max_iterations = ds.opt_max_iteration ######################################### # Create the data pipe. ini_pipe_name = '%s - %s' % (ds.models[0], ds.pipe_bundle) pipe.create(pipe_name=ini_pipe_name, bundle=ds.pipe_bundle, pipe_type=ds.pipe_type) # Load the previous results into the base pipe. results.read(file='results', dir=ds.pre_run_dir) # Create a new pipe, where the clustering analysis will happen. # We will copy the pipe to get all information. pipe.copy(pipe_from=ini_pipe_name, pipe_to=ds.pipe_name, bundle_to=ds.pipe_bundle_cluster) pipe.switch(ds.pipe_name) #pipe.display() #Spin clusters N15_cluster_remove = [":2@N",":3@N",":4@N"] for spin_id in N15_cluster_remove: relax_disp.cluster('N15_cluster', spin_id) # See the clustering in the current data pipe "cdp". for key in cdp.clustering: print(key, cdp.clustering[key]) # Print parameter kex before copying. #for cur_spin, mol_name, resi, resn, spin_id in spin_loop(full_info=True, return_id=True, skip_desel=True): # print(cur_spin.kex) ## Make advanced parameter copy. # It is more advanced than the value.copy user function, in that clustering is taken into account. # When the destination data pipe has spin clusters defined, then the new parameter values, when required, will be taken as the median value. relax_disp.parameter_copy(pipe_from=ini_pipe_name, pipe_to=ds.pipe_name) pipe.display() # Run the analysis. Relax_disp(pipe_name=ds.pipe_name, pipe_bundle=ds.pipe_bundle_cluster, results_dir=ds.results_dir, models=ds.models, grid_inc=ds.grid_inc, mc_sim_num=ds.mc_sim_num, modsel=ds.modsel) |
From: Edward d'A. <ed...@nm...> - 2022-11-30 09:32:18
|
Hi Paul, Great to hear from you again! I hope all is well over there in Vienna. Sorry for the super late reply - I've been flat out lately. As for your questions: On Wed, 26 Oct 2022 at 17:44, Paul SCHANDA <pau...@is...> wrote: > Hi, > > I am running into troubles with R1rho dispersion fits. In brief, we get totally unrealistic values for phi_ex (>10000) and k_ex (<1 s-1). > It seems that the default parameter limits for phi_ex are such that there is only a lower bound (0) but no upper bound. This can result in physically totally meaningless situations, like a strong R1rho dispersion profile, with a fitted exchange rate constants of 0.2 s-1…. > I would like to have some control over the grid search and the analysis. > > My questions: > * does anyone know how to set/change the grid parameter range? Has anyone run into similar issues? For the initial grid search, in a custom script you can simply pass in a list of numbers for the lower bounds and or upper bounds. This issue is quite common and most people end up reporting that this happens for peaks that are very weak at the very start, so that the fit simply fails no matter what you do. This may not be the case for your data. What would be best is to actually map out the parameter optimisation space and visualise it in 3D. If you install the ancient, yet still very powerful, OpenDX software (the code is still on SourceForge and a number of Linux distributions package it). You can then use the relax dx.map user function to visualise the optimisation space: https://www.nmr-relax.com/manual/dx_map.html We do need to implement other ways of visualising this in relax, for example with mathplotlib which is currently an optional dependency, but that has not materialised. For anyone interested, it should be a pretty easy task to duplicate dx.map to a new mathplotlib.map. > * does anyone have a script to do the analysis without going through the auto-analysis? The manual is (for me) not useful to design the analysis. The best way is to copy the auto-analysis file itself (auto_analyses/relax_disp.py) and modify that as you see fit. You can have this sitting next to your analysis script (or even copied and pasted inside it) and, instead of importing the auto-analysis with: # relax module imports. from auto_analyses.relax_disp import Relax_disp you can import your modified auto-analysis with: # My module imports. from my_relax_disp_auto_analysis import Relax_disp That way you can go in and add upper (and maybe lower) bounds where the grid search user function is called (self.interpreter.minimise.grid_search()). However I'd first recommend mapping out the optimisation space to see if you have a minimum anywhere in there first. This will save a lot of time. In relaxation dispersion, as far as anyone has seen that I am aware of, the minimisation space is very simple. You have a single very broad quadratic minimum. There are no multi-minima spaces, long twisting tunnels, or banana problems (that's a real thing) in the optimization space. So directly visualising the space is incredibly powerful and will shape your strategy for dealing with the data point. Regards, Edward P. S. If you need upper bounds on optimisation, for now you'll need to modify the relax source code itself. We had plans many years ago to open up this interface via new user functions, however that work never materialised. See the function linear_constraints() in the file specific_analyses/relax_disp/parameters.py for the hard-coded values. |
From: Paul S. <pau...@is...> - 2022-10-26 15:44:12
|
Hi, I am running into troubles with R1rho dispersion fits. In brief, we get totally unrealistic values for phi_ex (>10000) and k_ex (<1 s-1). It seems that the default parameter limits for phi_ex are such that there is only a lower bound (0) but no upper bound. This can result in physically totally meaningless situations, like a strong R1rho dispersion profile, with a fitted exchange rate constants of 0.2 s-1…. I would like to have some control over the grid search and the analysis. My questions: * does anyone know how to set/change the grid parameter range? Has anyone run into similar issues? * does anyone have a script to do the analysis without going through the auto-analysis? The manual is (for me) not useful to design the analysis. Thanks Paul 1. |
From: Edward d'A. <ed...@nm...> - 2022-07-28 19:46:13
|
On Thu, 28 Jul 2022 at 16:52, Johan Wallerstein <joh...@gu...> wrote: > > I appreciate these comments and thoughts on the subject, I’m soon back to working mode and will then go more into the topic. If I find something of interest I'll post it. No problems! Note that I'll soon be on summer holiday for the most of August on a beach in France, so don't be surprised if you don't find me very responsive ;) Regards, Edward |
From: Johan W. <joh...@gu...> - 2022-07-28 14:52:16
|
Hi, Thank you very much. I apologise for very late reply, the main reason for my delayed reply is that my summer holiday with kids/family started in beginning of July. For some reason I believe it is good for me to decouple from the job once in a while. See comments further down: On 2 Jul 2022, at 10:06, Edward d'Auvergne <ed...@nm...<mailto:ed...@nm...>> wrote: On Thu, 30 Jun 2022 at 08:19, Johan Wallerstein <joh...@gu...<mailto:joh...@gu...>> wrote: Hi, I perform CPMG-RD cluster fitting using relax, cluster refer to grouping several residues (between 3 to 14 residues) for data from a 45 kDa protein. The software is a good tool for doing this analysis. I marginally adjust the core protocol with the header """Script for performing a full relaxation dispersion analysis using CPMG-type data.""" I use only the CR72-model and I have a PRE_RUN_DIR from a run with individual residues. I use duplicates for error estimation, on both the 800 MHz and 900 MHz data set, and AIC for model selection. When I analyse the clustered data I’m curious to get R2eff_(back_calc) for each data point. I clarify my main question by attaching some of my data. ########### For residue 530, when I do individual fit I get this output. From the log-file: ——— The spin cluster [':530@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 2 25 21.11216 25.11216 CR72 - relax_disp 5 25 13.93686 23.93686 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— The file ‘disp_530_N.out’ in /final gives the following data table: # Experiment_name Field_strength_(MHz) Disp_point_(Hz) R2eff_(measured) R2eff_(back_calc) R2eff_errors 'SQ CPMG' 799.870000000 25.000000 17.523783179912268 16.953711340740483 0.831932502443187 'SQ CPMG' 799.870000000 50.000000 16.513029763549930 16.914478241596726 0.805586049587058 'SQ CPMG' 799.870000000 75.000000 16.920353186819355 16.875245142453196 0.816049323427317 'SQ CPMG' 799.870000000 100.000000 16.667402888129434 16.836012043882192 0.809527349094067 'SQ CPMG' 799.870000000 150.000000 16.454146002323920 16.757546676539960 0.804090431533660 'SQ CPMG' 799.870000000 200.000000 16.359623786385509 16.679111600438773 0.801698521274394 'SQ CPMG' 799.870000000 300.000000 15.525257427659495 16.523477804972345 0.781054888748662 'SQ CPMG' 799.870000000 350.000000 16.609858567997016 16.447662190184474 0.808054742944598 'SQ CPMG' 799.870000000 400.000000 16.844330710216166 16.374401478154368 0.814080812205130 'SQ CPMG' 799.870000000 500.000000 17.414128601521103 16.238705811895670 0.829011905615397 'SQ CPMG' 799.870000000 600.000000 16.093980388806685 16.120475644003818 0.795034804815920 'SQ CPMG' 799.870000000 800.000000 15.988036247232372 15.937187687218284 0.792401090807446 'SQ CPMG' 799.870000000 1000.000000 15.732649459437805 15.811741022120714 0.786107934589661 'SQ CPMG' 900.130000000 57.000000 19.386713898811351 20.163621643615215 0.801212497068354 'SQ CPMG' 900.130000000 114.000000 21.873502893081564 20.050473540803750 0.859660006101508 'SQ CPMG' 900.130000000 171.000000 19.133628964210569 19.937331394199191 0.795598311646227 'SQ CPMG' 900.130000000 228.000000 20.497316023709256 19.824330722189416 0.826567798566107 'SQ CPMG' 900.130000000 285.000000 20.091262254550443 19.712140304427066 0.817160298225920 'SQ CPMG' 900.130000000 400.000000 19.177817248045365 19.494278567900892 0.796574222459005 'SQ CPMG' 900.130000000 514.000000 19.111643299707755 19.300194513689348 0.795113430566997 'SQ CPMG' 900.130000000 628.000000 18.432363807026835 19.135138271300775 0.780352695478047 'SQ CPMG' 900.130000000 742.000000 19.383070346051138 18.999531230125285 0.801131245976946 'SQ CPMG' 900.130000000 857.000000 18.560791856291317 18.889165645990943 0.783110910382522 'SQ CPMG' 900.130000000 971.000000 18.810639108776328 18.801416118812085 0.788520121686263 'SQ CPMG' 900.130000000 1085.000000 18.943973311789268 18.730884832131551 0.791430360141496 ########### For a cluster fit (including residue 530) I get this output from the log-file: ——— The spin cluster [':530@N', ':536@N', ':537@N', ':538@N', ':550@N', ':551@N', ':552@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 14 175 458.66116 486.66116 CR72 - relax_disp 23 175 117.29418 163.29418 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— This looks reasonable. This is 7 spins, so on average, 117.29/7 = 16.76, which is a little more than the single spin value of 13.94. But there is no corresponding data table. Do you mean that there is no ‘disp_530_N.out’ file for the clustered analysis? The file is there, I did not realise that the file disp_???.out in a clustered analysis contains the new data. I think I should have understood this… ########### QUESTION 1: Is it possible to get, or easily create a table with, in my case, 175 R2eff_(back_calc) for the cluster, so that I can get better resolution on the Chi2 = 117.29418 above ? And possibly study how a single residue affect the cluster fitting. Try the value.write() user function: https://www.nmr-relax.com/manual/value_write.html Make sure to set the 'bc' flag to True. Thanks! QUESTION 2: Are there any reference to methods used for doing efficient selection of residues included in the cluster? There is obviously an immense number of combinations of residues to make clusters in a normal size protein. I consider making a program/script for this process and would be curious to get some inspiration. As far as I am aware, human logic is used for this process. You identify a rigid moving unit in your system yourself with similar dispersion results and then use clustering on that. I would assume that an automated system to find clusters would be computationally very expensive, despite being able to run on a computer cluster via MPI. And that such a project would take up half or more of a PhD student's time. Then again, I wouldn't be surprised if there is now a publication exploring this concept. If you do find one, I'd be interested in hearing about it. I appreciate these comments and thoughts on the subject, I’m soon back to working mode and will then go more into the topic. If I find something of interest I'll post it. Before holiday I “somehow" managed to create a semi-automatic approach to run all possible combinations of a set of residues in relax, ie the number of combinations of N things taken K at a time. Lets say the cluster I want to investigate consists of 8 residues (N) but I’m not confident in that selection. The 8 residues is the core cluster and I let relax run the combinations of 6 (K) of these 8 core residues, it turns out to be 28 relax runs. I’m not sure this is a good approach, I need to evaluate it. A problem with the data is the low signal / noise ratio. It is a large protein. This has of course many consequences for my CPMG-RD analysis with relax. (28 comes from N!/K!(N-K)! ) Best regards Johan On 2 Jul 2022, at 10:06, Edward d'Auvergne <ed...@nm...<mailto:ed...@nm...>> wrote: On Thu, 30 Jun 2022 at 08:19, Johan Wallerstein <joh...@gu...<mailto:joh...@gu...>> wrote: Hi, I perform CPMG-RD cluster fitting using relax, cluster refer to grouping several residues (between 3 to 14 residues) for data from a 45 kDa protein. The software is a good tool for doing this analysis. I marginally adjust the core protocol with the header """Script for performing a full relaxation dispersion analysis using CPMG-type data.""" I use only the CR72-model and I have a PRE_RUN_DIR from a run with individual residues. I use duplicates for error estimation, on both the 800 MHz and 900 MHz data set, and AIC for model selection. When I analyse the clustered data I’m curious to get R2eff_(back_calc) for each data point. I clarify my main question by attaching some of my data. ########### For residue 530, when I do individual fit I get this output. From the log-file: ——— The spin cluster [':530@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 2 25 21.11216 25.11216 CR72 - relax_disp 5 25 13.93686 23.93686 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— The file ‘disp_530_N.out’ in /final gives the following data table: # Experiment_name Field_strength_(MHz) Disp_point_(Hz) R2eff_(measured) R2eff_(back_calc) R2eff_errors 'SQ CPMG' 799.870000000 25.000000 17.523783179912268 16.953711340740483 0.831932502443187 'SQ CPMG' 799.870000000 50.000000 16.513029763549930 16.914478241596726 0.805586049587058 'SQ CPMG' 799.870000000 75.000000 16.920353186819355 16.875245142453196 0.816049323427317 'SQ CPMG' 799.870000000 100.000000 16.667402888129434 16.836012043882192 0.809527349094067 'SQ CPMG' 799.870000000 150.000000 16.454146002323920 16.757546676539960 0.804090431533660 'SQ CPMG' 799.870000000 200.000000 16.359623786385509 16.679111600438773 0.801698521274394 'SQ CPMG' 799.870000000 300.000000 15.525257427659495 16.523477804972345 0.781054888748662 'SQ CPMG' 799.870000000 350.000000 16.609858567997016 16.447662190184474 0.808054742944598 'SQ CPMG' 799.870000000 400.000000 16.844330710216166 16.374401478154368 0.814080812205130 'SQ CPMG' 799.870000000 500.000000 17.414128601521103 16.238705811895670 0.829011905615397 'SQ CPMG' 799.870000000 600.000000 16.093980388806685 16.120475644003818 0.795034804815920 'SQ CPMG' 799.870000000 800.000000 15.988036247232372 15.937187687218284 0.792401090807446 'SQ CPMG' 799.870000000 1000.000000 15.732649459437805 15.811741022120714 0.786107934589661 'SQ CPMG' 900.130000000 57.000000 19.386713898811351 20.163621643615215 0.801212497068354 'SQ CPMG' 900.130000000 114.000000 21.873502893081564 20.050473540803750 0.859660006101508 'SQ CPMG' 900.130000000 171.000000 19.133628964210569 19.937331394199191 0.795598311646227 'SQ CPMG' 900.130000000 228.000000 20.497316023709256 19.824330722189416 0.826567798566107 'SQ CPMG' 900.130000000 285.000000 20.091262254550443 19.712140304427066 0.817160298225920 'SQ CPMG' 900.130000000 400.000000 19.177817248045365 19.494278567900892 0.796574222459005 'SQ CPMG' 900.130000000 514.000000 19.111643299707755 19.300194513689348 0.795113430566997 'SQ CPMG' 900.130000000 628.000000 18.432363807026835 19.135138271300775 0.780352695478047 'SQ CPMG' 900.130000000 742.000000 19.383070346051138 18.999531230125285 0.801131245976946 'SQ CPMG' 900.130000000 857.000000 18.560791856291317 18.889165645990943 0.783110910382522 'SQ CPMG' 900.130000000 971.000000 18.810639108776328 18.801416118812085 0.788520121686263 'SQ CPMG' 900.130000000 1085.000000 18.943973311789268 18.730884832131551 0.791430360141496 ########### For a cluster fit (including residue 530) I get this output from the log-file: ——— The spin cluster [':530@N', ':536@N', ':537@N', ':538@N', ':550@N', ':551@N', ':552@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 14 175 458.66116 486.66116 CR72 - relax_disp 23 175 117.29418 163.29418 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— This looks reasonable. This is 7 spins, so on average, 117.29/7 = 16.76, which is a little more than the single spin value of 13.94. But there is no corresponding data table. Do you mean that there is no ‘disp_530_N.out’ file for the clustered analysis? ########### QUESTION 1: Is it possible to get, or easily create a table with, in my case, 175 R2eff_(back_calc) for the cluster, so that I can get better resolution on the Chi2 = 117.29418 above ? And possibly study how a single residue affect the cluster fitting. Try the value.write() user function: https://www.nmr-relax.com/manual/value_write.html Make sure to set the 'bc' flag to True. QUESTION 2: Are there any reference to methods used for doing efficient selection of residues included in the cluster? There is obviously an immense number of combinations of residues to make clusters in a normal size protein. I consider making a program/script for this process and would be curious to get some inspiration. As far as I am aware, human logic is used for this process. You identify a rigid moving unit in your system yourself with similar dispersion results and then use clustering on that. I would assume that an automated system to find clusters would be computationally very expensive, despite being able to run on a computer cluster via MPI. And that such a project would take up half or more of a PhD student's time. Then again, I wouldn't be surprised if there is now a publication exploring this concept. If you do find one, I'd be interested in hearing about it. Regards, Edward |
From: Edward d'A. <ed...@nm...> - 2022-07-02 11:10:08
|
On Thu, 30 Jun 2022 at 08:19, Johan Wallerstein <joh...@gu...> wrote: > > Hi, > > I perform CPMG-RD cluster fitting using relax, cluster refer to grouping several residues (between 3 to 14 residues) for data from a 45 kDa protein. The software is a good tool for doing this analysis. I marginally adjust the core protocol with the header > > > > """Script for performing a full relaxation dispersion analysis using CPMG-type data.""" > > I use only the CR72-model and I have a PRE_RUN_DIR from a run with individual residues. I use duplicates for error estimation, on both the 800 MHz and 900 MHz data set, and AIC for model selection. > When I analyse the clustered data I’m curious to get R2eff_(back_calc) for each data point. I clarify my main question by attaching some of my data. > > ########### > > For residue 530, when I do individual fit I get this output. > > From the log-file: > > ——— > > The spin cluster [':530@N']. > # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion > No Rex - relax_disp 2 25 21.11216 25.11216 > CR72 - relax_disp 5 25 13.93686 23.93686 > The model from the data pipe 'CR72 - relax_disp' has been selected. > > ——— > > The file ‘disp_530_N.out’ in /final gives the following data table: > > # Experiment_name Field_strength_(MHz) Disp_point_(Hz) R2eff_(measured) R2eff_(back_calc) R2eff_errors > 'SQ CPMG' 799.870000000 25.000000 17.523783179912268 16.953711340740483 0.831932502443187 > 'SQ CPMG' 799.870000000 50.000000 16.513029763549930 16.914478241596726 0.805586049587058 > 'SQ CPMG' 799.870000000 75.000000 16.920353186819355 16.875245142453196 0.816049323427317 > 'SQ CPMG' 799.870000000 100.000000 16.667402888129434 16.836012043882192 0.809527349094067 > 'SQ CPMG' 799.870000000 150.000000 16.454146002323920 16.757546676539960 0.804090431533660 > 'SQ CPMG' 799.870000000 200.000000 16.359623786385509 16.679111600438773 0.801698521274394 > 'SQ CPMG' 799.870000000 300.000000 15.525257427659495 16.523477804972345 0.781054888748662 > 'SQ CPMG' 799.870000000 350.000000 16.609858567997016 16.447662190184474 0.808054742944598 > 'SQ CPMG' 799.870000000 400.000000 16.844330710216166 16.374401478154368 0.814080812205130 > 'SQ CPMG' 799.870000000 500.000000 17.414128601521103 16.238705811895670 0.829011905615397 > 'SQ CPMG' 799.870000000 600.000000 16.093980388806685 16.120475644003818 0.795034804815920 > 'SQ CPMG' 799.870000000 800.000000 15.988036247232372 15.937187687218284 0.792401090807446 > 'SQ CPMG' 799.870000000 1000.000000 15.732649459437805 15.811741022120714 0.786107934589661 > 'SQ CPMG' 900.130000000 57.000000 19.386713898811351 20.163621643615215 0.801212497068354 > 'SQ CPMG' 900.130000000 114.000000 21.873502893081564 20.050473540803750 0.859660006101508 > 'SQ CPMG' 900.130000000 171.000000 19.133628964210569 19.937331394199191 0.795598311646227 > 'SQ CPMG' 900.130000000 228.000000 20.497316023709256 19.824330722189416 0.826567798566107 > 'SQ CPMG' 900.130000000 285.000000 20.091262254550443 19.712140304427066 0.817160298225920 > 'SQ CPMG' 900.130000000 400.000000 19.177817248045365 19.494278567900892 0.796574222459005 > 'SQ CPMG' 900.130000000 514.000000 19.111643299707755 19.300194513689348 0.795113430566997 > 'SQ CPMG' 900.130000000 628.000000 18.432363807026835 19.135138271300775 0.780352695478047 > 'SQ CPMG' 900.130000000 742.000000 19.383070346051138 18.999531230125285 0.801131245976946 > 'SQ CPMG' 900.130000000 857.000000 18.560791856291317 18.889165645990943 0.783110910382522 > 'SQ CPMG' 900.130000000 971.000000 18.810639108776328 18.801416118812085 0.788520121686263 > 'SQ CPMG' 900.130000000 1085.000000 18.943973311789268 18.730884832131551 0.791430360141496 > > ########### > > For a cluster fit (including residue 530) I get this output from the log-file: > > ——— > > The spin cluster [':530@N', ':536@N', ':537@N', ':538@N', ':550@N', ':551@N', ':552@N']. > # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion > No Rex - relax_disp 14 175 458.66116 486.66116 > CR72 - relax_disp 23 175 117.29418 163.29418 > The model from the data pipe 'CR72 - relax_disp' has been selected. > ——— This looks reasonable. This is 7 spins, so on average, 117.29/7 = 16.76, which is a little more than the single spin value of 13.94. > But there is no corresponding data table. Do you mean that there is no ‘disp_530_N.out’ file for the clustered analysis? > ########### > > QUESTION 1: > Is it possible to get, or easily create a table with, in my case, 175 R2eff_(back_calc) for the cluster, so that I can get better resolution on the Chi2 = 117.29418 above ? > And possibly study how a single residue affect the cluster fitting. Try the value.write() user function: https://www.nmr-relax.com/manual/value_write.html Make sure to set the 'bc' flag to True. > QUESTION 2: > Are there any reference to methods used for doing efficient selection of residues included in the cluster? There is obviously an immense number of combinations of residues to make clusters in a normal size protein. I consider making a program/script for this process and would be curious to get some inspiration. As far as I am aware, human logic is used for this process. You identify a rigid moving unit in your system yourself with similar dispersion results and then use clustering on that. I would assume that an automated system to find clusters would be computationally very expensive, despite being able to run on a computer cluster via MPI. And that such a project would take up half or more of a PhD student's time. Then again, I wouldn't be surprised if there is now a publication exploring this concept. If you do find one, I'd be interested in hearing about it. Regards, Edward |
From: Johan W. <joh...@gu...> - 2022-06-29 11:12:05
|
Hi, I perform CPMG-RD cluster fitting using relax, cluster refer to grouping several residues (between 3 to 14 residues) for data from a 45 kDa protein. The software is a good tool for doing this analysis. I marginally adjust the core protocol with the header """Script for performing a full relaxation dispersion analysis using CPMG-type data.""" I use only the CR72-model and I have a PRE_RUN_DIR from a run with individual residues. I use duplicates for error estimation, on both the 800 MHz and 900 MHz data set, and AIC for model selection. When I analyse the clustered data I’m curious to get R2eff_(back_calc) for each data point. I clarify my main question by attaching some of my data. ########### For residue 530, when I do individual fit I get this output. From the log-file: ——— The spin cluster [':530@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 2 25 21.11216 25.11216 CR72 - relax_disp 5 25 13.93686 23.93686 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— The file ‘disp_530_N.out’ in /final gives the following data table: # Experiment_name Field_strength_(MHz) Disp_point_(Hz) R2eff_(measured) R2eff_(back_calc) R2eff_errors 'SQ CPMG' 799.870000000 25.000000 17.523783179912268 16.953711340740483 0.831932502443187 'SQ CPMG' 799.870000000 50.000000 16.513029763549930 16.914478241596726 0.805586049587058 'SQ CPMG' 799.870000000 75.000000 16.920353186819355 16.875245142453196 0.816049323427317 'SQ CPMG' 799.870000000 100.000000 16.667402888129434 16.836012043882192 0.809527349094067 'SQ CPMG' 799.870000000 150.000000 16.454146002323920 16.757546676539960 0.804090431533660 'SQ CPMG' 799.870000000 200.000000 16.359623786385509 16.679111600438773 0.801698521274394 'SQ CPMG' 799.870000000 300.000000 15.525257427659495 16.523477804972345 0.781054888748662 'SQ CPMG' 799.870000000 350.000000 16.609858567997016 16.447662190184474 0.808054742944598 'SQ CPMG' 799.870000000 400.000000 16.844330710216166 16.374401478154368 0.814080812205130 'SQ CPMG' 799.870000000 500.000000 17.414128601521103 16.238705811895670 0.829011905615397 'SQ CPMG' 799.870000000 600.000000 16.093980388806685 16.120475644003818 0.795034804815920 'SQ CPMG' 799.870000000 800.000000 15.988036247232372 15.937187687218284 0.792401090807446 'SQ CPMG' 799.870000000 1000.000000 15.732649459437805 15.811741022120714 0.786107934589661 'SQ CPMG' 900.130000000 57.000000 19.386713898811351 20.163621643615215 0.801212497068354 'SQ CPMG' 900.130000000 114.000000 21.873502893081564 20.050473540803750 0.859660006101508 'SQ CPMG' 900.130000000 171.000000 19.133628964210569 19.937331394199191 0.795598311646227 'SQ CPMG' 900.130000000 228.000000 20.497316023709256 19.824330722189416 0.826567798566107 'SQ CPMG' 900.130000000 285.000000 20.091262254550443 19.712140304427066 0.817160298225920 'SQ CPMG' 900.130000000 400.000000 19.177817248045365 19.494278567900892 0.796574222459005 'SQ CPMG' 900.130000000 514.000000 19.111643299707755 19.300194513689348 0.795113430566997 'SQ CPMG' 900.130000000 628.000000 18.432363807026835 19.135138271300775 0.780352695478047 'SQ CPMG' 900.130000000 742.000000 19.383070346051138 18.999531230125285 0.801131245976946 'SQ CPMG' 900.130000000 857.000000 18.560791856291317 18.889165645990943 0.783110910382522 'SQ CPMG' 900.130000000 971.000000 18.810639108776328 18.801416118812085 0.788520121686263 'SQ CPMG' 900.130000000 1085.000000 18.943973311789268 18.730884832131551 0.791430360141496 ########### For a cluster fit (including residue 530) I get this output from the log-file: ——— The spin cluster [':530@N', ':536@N', ':537@N', ':538@N', ':550@N', ':551@N', ':552@N']. # Data pipe Num_params_(k) Num_data_sets_(n) Chi2 Criterion No Rex - relax_disp 14 175 458.66116 486.66116 CR72 - relax_disp 23 175 117.29418 163.29418 The model from the data pipe 'CR72 - relax_disp' has been selected. ——— But there is no corresponding data table. ########### QUESTION 1: Is it possible to get, or easily create a table with, in my case, 175 R2eff_(back_calc) for the cluster, so that I can get better resolution on the Chi2 = 117.29418 above ? And possibly study how a single residue affect the cluster fitting. QUESTION 2: Are there any reference to methods used for doing efficient selection of residues included in the cluster? There is obviously an immense number of combinations of residues to make clusters in a normal size protein. I consider making a program/script for this process and would be curious to get some inspiration. Best regards Johan Wallerstein, Sweden |
From: Hendrik K. <hen...@ru...> - 2022-02-22 12:34:38
|
Dear all, at first I would like to thank all of you for your fast responses! I really appreciate your assistance and I am really thankful that there is such a helpful and respectful NMR community out there! I did not respond earlier to your previous messages, as I wanted to make sure to not ran into another problem. I could figure out the reason for the error of my previous described problem. The error with the error message: relax> spectrum.read_intensities(file='/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms', dir=None, spectrum_id='5', dim=1, int_method='height', int_col=None, spin_id_col=None, mol_name_col=1, res_num_col=2, res_name_col=3, spin_num_col=4, spin_name_col=5, sep=None, spin_id=None, ncproc=None) Opening the file '/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms' for reading. Sparky formatted data file. Number of header lines found: 1 2D peak list detected. Traceback (most recent call last): File "/usr/software/relax/gui/interpreter.py", line 109, in apply fn(*args, **kwds) File "/usr/software/relax/pipe_control/spectrum.py", line 610, in read peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) File "/usr/software/relax/lib/spectrum/peak_list.py", line 213, in read_peak_list sparky.read_list(peak_list=peak_list, file_data=file_data) File "/usr/software/relax/lib/spectrum/sparky.py", line 127, in read_list name2 = row2[-2] + row2[-1] IndexError: list index out of range was caused by a question mark character in my peak list, which originated in an unassigned nitrogen dimension of a residue in my CCPNMR project: Assignment w1 w2 Height F142H-? 8.684 119.044 8.09E+04 So i could fix that problem by assign it and generate new peak lists for my spectra and go on with the R1, R2 and NOE analysis without any further issues. Then I tried to go on with a model free analysis. I loaded the spins for protons and nitrogen from my previous described sequence file and added the "element" attribute for each spin by hand. When I tried to prepare the dipole-dipole interactions I came across another error message which says: relax>interatom.unit_vectors(ave=True) Averaging all vectors. RelaxError: Positional information could not be found for any spins. I found an entry in the mailing list archive, where another user had this issue (https://www.mail-archive.com/rel...@gn.../msg01491.html). Is it right, that I have to use a 3D structure instead of a sequence file for the model free analysis, as only this will provide the necessary positional information or did I do something wrong with loading my sequence file? Best regards and thanks for your help! Hendrik Am 16.02.22 um 23:55 schrieb Edward d'Auvergne: > On Wed, 16 Feb 2022 at 19:25, Hendrik Kirschner via nmr-relax-users > <nmr...@li...> wrote: >> Dear all, >> >> I am new to Relax and wanted to analyze my relaxation data with it. I used CCPNMR V2.4.2 to export my peak lists from my T1, T2 and NOE data sets in sparky format. Also the sequence file was generated with CCPNMR V2.4.2. Then i started the analysis with relax 5.0.0 at NMRbox.org. The loading of the spins from the sequence file was successfull and spin container for all residues were generated with H and N spins in it each. >> >> My sequence file is formatted like this: >> >> res_num res_name spin_num spin_name >> -2 Gly -2 N >> -1 His -1 N >> 0 Met 0 N >> 1 Ser 1 N >> >> But when i tried to load my relaxation data ("peak intensity reading" window) i always get the following error message: >> >> relax> spectrum.read_intensities(file='/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms', dir=None, spectrum_id='5', dim=1, int_method='height', int_col=None, spin_id_col=None, mol_name_col=1, res_num_col=2, res_name_col=3, spin_num_col=4, spin_name_col=5, sep=None, spin_id=None, ncproc=None) >> Opening the file '/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms' for reading. >> Sparky formatted data file. >> >> Number of header lines found: 1 >> 2D peak list detected. >> >> Traceback (most recent call last): >> File "/usr/software/relax/gui/interpreter.py", line 109, in apply >> fn(*args, **kwds) >> File "/usr/software/relax/pipe_control/spectrum.py", line 610, in read >> peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) >> File "/usr/software/relax/lib/spectrum/peak_list.py", line 213, in read_peak_list >> sparky.read_list(peak_list=peak_list, file_data=file_data) >> File "/usr/software/relax/lib/spectrum/sparky.py", line 127, in read_list >> name2 = row2[-2] + row2[-1] >> IndexError: list index out of range >> >> The sparky format of the data looks like this: >> >> Assignment w1 w2 Height >> K18H-K18N 8.134 123.117 6.63E+05 >> >> >> I tried different settings in the "peak intensity reading" window (e.g. intensity column set to 3 or 4). I also searched in the mailing list and Wiki for a solution and found an entry which seems to describe my problem (-> bug #22961; found at:http://wiki.nmr-relax.com/Relax_release_bugfixes), but the description does not really helped me. I also tried a different Relax version (V4.1.3) and to load the spins via a PDB-file, but it leads to the same error. >> >> Is there anything wrong with my formats or did I do something wrong in the settings? > Hi Hendrik, > > Welcome to the relax mailing lists! That is an interesting problem > that I don't think anyone has encountered before. I wonder if it is > due to CCPN not producing a conformant Sparky formatted file. For the > example you gave, I can see that the 'K18H-K18N' assignment works ok. > Using the python3 interpreter and replicating the failing lines of > code: > >>>> from re import split >>>> assign2 = "K18N" >>>> row2 = split('([a-zA-Z]+)', assign2) >>>> row2[-2] + row2[-1] > 'N' > > relax correctly finds 'N' for the name of the 2nd dimension. What you > could do is to randomly delete blocks of lines from your > Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms file, running relax > to find which line is causing the error. You should be able to repeat > this until you only have a single failing line left. If you could > find that line and report it back, that would be appreciated. I can > then see if it is something that should be fixed in relax or in the > CCPN Sparky export. > > Cheers, > > Edward -- Hendrik Kirschner, M.Sc. Biomolecular NMR (NC5/173 Süd) Faculty of Chemistry and Biochemistry Ruhr-University Bochum Universitaetsstrasse 150 44801 Bochum Germany Hen...@ru... Tel.: +49 234 32-26246 |
From: Mark B. <mar...@tu...> - 2022-02-18 21:44:03
|
Dear Edward, I used CCPN for my spectra and exported a CCPN peak list, which contains all relaxation planes and peaks in the same file (my data is acquired as a pseudo 3D). I wrote a python converter, which writes out sparky format peak lists and the sequence file. I then use these for the input to relax. Best wishes, Mark On 16/02/2022 23:00, Edward d'Auvergne wrote: > On Wed, 16 Feb 2022 at 23:19, Mark Bostock <mar...@tu...> wrote: >> Dear Hendrik, >> >> I have not used Relax for T1/T2/NOE data (I assume you are planning to do model-free analysis). However, for CPMG analysis I have the following formats for my input files: >> >> Peak intensity files: >> >> Assignment w1 w2 Data Height >> ASP2N-HN 8.560 122.651 2.98553e+08 >> SER3N-HN 8.425 116.521 2.47809e+08 >> THR4N-HN 8.358 115.634 2.42621e+08 >> SER5N-HN 8.264 117.706 2.65439e+08 >> >> Sequence file (no headers): >> >> 2 Asp >> 3 Ser >> 4 Thr >> 5 Ser >> >> In the relax script this is loaded as: >> >> # Load the sequence. >> sequence.read('sequence.txt', dir=DATA_PATH, res_num_col=1, res_name_col=2) >> >> I hope this helps. The sequence file format seems to be quite different. The variable number of spaces between your first and second columns may cause problems. > Hi Mark, > > Thanks for replying! Did you use Sparky directly or CCPN export as > well? Maybe it is time to add support for CCPN peak lists. Note that > after 2 decades and hundreds of relax users throwing all sorts of data > at it, the handling of different file formats and conventions by relax > is now no problem at all and is mostly automated. The relax design > philosophy is that the user is never wrong, or never has incorrectly > formatted data ;) In the later case, a carefully crafted RelaxError > message is used to fully explain the issue. > > Cheers, > > Edward |
From: Edward d'A. <ed...@nm...> - 2022-02-16 23:01:06
|
On Wed, 16 Feb 2022 at 23:19, Mark Bostock <mar...@tu...> wrote: > > Dear Hendrik, > > I have not used Relax for T1/T2/NOE data (I assume you are planning to do model-free analysis). However, for CPMG analysis I have the following formats for my input files: > > Peak intensity files: > > Assignment w1 w2 Data Height > ASP2N-HN 8.560 122.651 2.98553e+08 > SER3N-HN 8.425 116.521 2.47809e+08 > THR4N-HN 8.358 115.634 2.42621e+08 > SER5N-HN 8.264 117.706 2.65439e+08 > > Sequence file (no headers): > > 2 Asp > 3 Ser > 4 Thr > 5 Ser > > In the relax script this is loaded as: > > # Load the sequence. > sequence.read('sequence.txt', dir=DATA_PATH, res_num_col=1, res_name_col=2) > > I hope this helps. The sequence file format seems to be quite different. The variable number of spaces between your first and second columns may cause problems. Hi Mark, Thanks for replying! Did you use Sparky directly or CCPN export as well? Maybe it is time to add support for CCPN peak lists. Note that after 2 decades and hundreds of relax users throwing all sorts of data at it, the handling of different file formats and conventions by relax is now no problem at all and is mostly automated. The relax design philosophy is that the user is never wrong, or never has incorrectly formatted data ;) In the later case, a carefully crafted RelaxError message is used to fully explain the issue. Cheers, Edward |
From: Edward d'A. <ed...@nm...> - 2022-02-16 22:55:19
|
On Wed, 16 Feb 2022 at 19:25, Hendrik Kirschner via nmr-relax-users <nmr...@li...> wrote: > > Dear all, > > I am new to Relax and wanted to analyze my relaxation data with it. I used CCPNMR V2.4.2 to export my peak lists from my T1, T2 and NOE data sets in sparky format. Also the sequence file was generated with CCPNMR V2.4.2. Then i started the analysis with relax 5.0.0 at NMRbox.org. The loading of the spins from the sequence file was successfull and spin container for all residues were generated with H and N spins in it each. > > My sequence file is formatted like this: > > res_num res_name spin_num spin_name > -2 Gly -2 N > -1 His -1 N > 0 Met 0 N > 1 Ser 1 N > > But when i tried to load my relaxation data ("peak intensity reading" window) i always get the following error message: > > relax> spectrum.read_intensities(file='/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms', dir=None, spectrum_id='5', dim=1, int_method='height', int_col=None, spin_id_col=None, mol_name_col=1, res_num_col=2, res_name_col=3, spin_num_col=4, spin_name_col=5, sep=None, spin_id=None, ncproc=None) > Opening the file '/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms' for reading. > Sparky formatted data file. > > Number of header lines found: 1 > 2D peak list detected. > > Traceback (most recent call last): > File "/usr/software/relax/gui/interpreter.py", line 109, in apply > fn(*args, **kwds) > File "/usr/software/relax/pipe_control/spectrum.py", line 610, in read > peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) > File "/usr/software/relax/lib/spectrum/peak_list.py", line 213, in read_peak_list > sparky.read_list(peak_list=peak_list, file_data=file_data) > File "/usr/software/relax/lib/spectrum/sparky.py", line 127, in read_list > name2 = row2[-2] + row2[-1] > IndexError: list index out of range > > The sparky format of the data looks like this: > > Assignment w1 w2 Height > K18H-K18N 8.134 123.117 6.63E+05 > > > I tried different settings in the "peak intensity reading" window (e.g. intensity column set to 3 or 4). I also searched in the mailing list and Wiki for a solution and found an entry which seems to describe my problem (-> bug #22961; found at: http://wiki.nmr-relax.com/Relax_release_bugfixes), but the description does not really helped me. I also tried a different Relax version (V4.1.3) and to load the spins via a PDB-file, but it leads to the same error. > > Is there anything wrong with my formats or did I do something wrong in the settings? Hi Hendrik, Welcome to the relax mailing lists! That is an interesting problem that I don't think anyone has encountered before. I wonder if it is due to CCPN not producing a conformant Sparky formatted file. For the example you gave, I can see that the 'K18H-K18N' assignment works ok. Using the python3 interpreter and replicating the failing lines of code: >>> from re import split >>> assign2 = "K18N" >>> row2 = split('([a-zA-Z]+)', assign2) >>> row2[-2] + row2[-1] 'N' relax correctly finds 'N' for the name of the 2nd dimension. What you could do is to randomly delete blocks of lines from your Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms file, running relax to find which line is causing the error. You should be able to repeat this until you only have a single failing line left. If you could find that line and report it back, that would be appreciated. I can then see if it is something that should be fixed in relax or in the CCPN Sparky export. Cheers, Edward |
From: Mark B. <mar...@tu...> - 2022-02-16 22:19:45
|
Dear Hendrik, I have not used Relax for T1/T2/NOE data (I assume you are planning to do model-free analysis). However, for CPMG analysis I have the following formats for my input files: _Peak intensity files:_ Assignment w1 w2 Data Height ASP2N-HN 8.560 122.651 2.98553e+08 SER3N-HN 8.425 116.521 2.47809e+08 THR4N-HN 8.358 115.634 2.42621e+08 SER5N-HN 8.264 117.706 2.65439e+08 _Sequence file (no headers):_ 2 Asp 3 Ser 4 Thr 5 Ser In the relax script this is loaded as: # Load the sequence. sequence.read('sequence.txt', dir=DATA_PATH, res_num_col=1, res_name_col=2) I hope this helps. The sequence file format seems to be quite different. The variable number of spaces between your first and second columns may cause problems. Best wishes, Mark On 16/02/2022 18:25, Hendrik Kirschner via nmr-relax-users wrote: > > Dear all, > > I am new to Relax and wanted to analyze my relaxation data with it. I > used CCPNMR V2.4.2 to export my peak lists from my T1, T2 and NOE data > sets in sparky format. Also the sequence file was generated with > CCPNMR V2.4.2. Then i started the analysis with relax 5.0.0 at > NMRbox.org. The loading of the spins from the sequence file was > successfull and spin container for all residues were generated with H > and N spins in it each. > > My sequence file is formatted like this: > > res_num res_name spin_num spin_name > -2 Gly -2 N > -1 His -1 N > 0 Met 0 N > 1 Ser 1 N > > But when i tried to load my relaxation data ("peak intensity reading" > window) i always get the following error message: > > relax> > spectrum.read_intensities(file='/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms', > dir=None, spectrum_id='5', dim=1, int_method='height', > int_col=None, spin_id_col=None, mol_name_col=1, res_num_col=2, > res_name_col=3, spin_num_col=4, spin_name_col=5, sep=None, > spin_id=None, ncproc=None) > Opening the file > '/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms' > for reading. > Sparky formatted data file. > > Number of header lines found: 1 > 2D peak list detected. > > Traceback (most recent call last): > File "/usr/software/relax/gui/interpreter.py", line 109, in apply > fn(*args, **kwds) > File "/usr/software/relax/pipe_control/spectrum.py", line 610, > in read > peak_list = read_peak_list(file=file[file_index], dir=dir, > int_col=int_col, spin_id_col=spin_id_col, > mol_name_col=mol_name_col, res_num_col=res_num_col, > res_name_col=res_name_col, spin_num_col=spin_num_col, > spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) > File "/usr/software/relax/lib/spectrum/peak_list.py", line 213, > in read_peak_list > sparky.read_list(peak_list=peak_list, file_data=file_data) > File "/usr/software/relax/lib/spectrum/sparky.py", line 127, in > read_list > name2 = row2[-2] + row2[-1] > IndexError: list index out of range > > The sparky format of the data looks like this: > > Assignment w1 w2 Height > K18H-K18N 8.134 123.117 6.63E+05 > > > I tried different settings in the "peak intensity reading" window > (e.g. intensity column set to 3 or 4). I also searched in the mailing > list and Wiki for a solution and found an entry which seems to > describe my problem (-> bug #22961; found at: > http://wiki.nmr-relax.com/Relax_release_bugfixes), but the description > does not really helped me. I also tried a different Relax version > (V4.1.3) and to load the spins via a PDB-file, but it leads to the > same error. > > Is there anything wrong with my formats or did I do something wrong in > the settings? > > Best regards, > > Hendrik > > -- > Hendrik Kirschner, M.Sc. > Biomolecular NMR (NC5/173 Süd) > Faculty of Chemistry and Biochemistry > Ruhr-University Bochum > Universitaetsstrasse 150 > 44801 Bochum > Germany > > Hen...@ru... > Tel.: +49 234 32-26246 > > > _______________________________________________ > nmr-relax-users mailing list > nmr...@li... > https://lists.sourceforge.net/lists/listinfo/nmr-relax-users |
From: Hendrik K. <hen...@ru...> - 2022-02-16 18:25:47
|
Dear all, I am new to Relax and wanted to analyze my relaxation data with it. I used CCPNMR V2.4.2 to export my peak lists from my T1, T2 and NOE data sets in sparky format. Also the sequence file was generated with CCPNMR V2.4.2. Then i started the analysis with relax 5.0.0 at NMRbox.org. The loading of the spins from the sequence file was successfull and spin container for all residues were generated with H and N spins in it each. My sequence file is formatted like this: res_num res_name spin_num spin_name -2 Gly -2 N -1 His -1 N 0 Met 0 N 1 Ser 1 N But when i tried to load my relaxation data ("peak intensity reading" window) i always get the following error message: relax> spectrum.read_intensities(file='/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms', dir=None, spectrum_id='5', dim=1, int_method='height', int_col=None, spin_id_col=None, mol_name_col=1, res_num_col=2, res_name_col=3, spin_num_col=4, spin_name_col=5, sep=None, spin_id=None, ncproc=None) Opening the file '/home/nmrbox/0001/hkirschner/Desktop/Apo_Dynamik_Auswertung/T1_600er_sparky/T1_600_5ms' for reading. Sparky formatted data file. Number of header lines found: 1 2D peak list detected. Traceback (most recent call last): File "/usr/software/relax/gui/interpreter.py", line 109, in apply fn(*args, **kwds) File "/usr/software/relax/pipe_control/spectrum.py", line 610, in read peak_list = read_peak_list(file=file[file_index], dir=dir, int_col=int_col, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, sep=sep, spin_id=spin_id) File "/usr/software/relax/lib/spectrum/peak_list.py", line 213, in read_peak_list sparky.read_list(peak_list=peak_list, file_data=file_data) File "/usr/software/relax/lib/spectrum/sparky.py", line 127, in read_list name2 = row2[-2] + row2[-1] IndexError: list index out of range The sparky format of the data looks like this: Assignment w1 w2 Height K18H-K18N 8.134 123.117 6.63E+05 I tried different settings in the "peak intensity reading" window (e.g. intensity column set to 3 or 4). I also searched in the mailing list and Wiki for a solution and found an entry which seems to describe my problem (-> bug #22961; found at: http://wiki.nmr-relax.com/Relax_release_bugfixes), but the description does not really helped me. I also tried a different Relax version (V4.1.3) and to load the spins via a PDB-file, but it leads to the same error. Is there anything wrong with my formats or did I do something wrong in the settings? Best regards, Hendrik -- Hendrik Kirschner, M.Sc. Biomolecular NMR (NC5/173 Süd) Faculty of Chemistry and Biochemistry Ruhr-University Bochum Universitaetsstrasse 150 44801 Bochum Germany Hen...@ru... Tel.: +49 234 32-26246 |
From: Edward d'A. <ed...@nm...> - 2022-02-01 22:02:26
|
On Mon, 24 Jan 2022 at 22:49, Jessica M. Gonzalez-Delgado via nmr-relax-users <nmr...@li...> wrote: > > Hi everyone, > > I was wondering if there is a way to know if the model-free-analysis is > running properly or if it is stuck? I've run dauvergne_protocol.py on a > cluster before for data of the same protein and it finished in less than > 48h. I can see that the local_tm/ and sphere/ directories have been > created, but the sphere directory only contains init/ and round_1/ and > it's been 24h. The sphere/round_1/ directory contains directories aic/ > and m0/ through m9/, so I'm guessing it's done with round_1 but I don't > know how to check. Thanks in advance! Hi Jessica, Welcome to the relax mailing lists! Sorry for not replying to you earlier. I have been stuck at home for the last 2.5 weeks looking after my 5 year old son due to local outbreaks of COVID-19 in his kindergarten. I'll respond to your other email here too: > After a lot of troubleshooting it seems like the dauvergne_protocol.py > only works when it is run with 5 processors. Is that right? I have > access to a cluster and was trying to lower the calculation time by > running it on more than 5 processors with no luck. I tried a variety of > combinations (up to 59 processors) but 5 was the only one that allowed > the calculation to not get stuck for days on round_1 of every model. This is a clear sign that at least one input data point is bad. The model-free calculations on Gary Thompson's uni-processor and multi-processor 'fabrics' will return the exact same result to machine precision. This is very thoroughly tested. What you see with 5 processors tells me that you are getting stuck in the Monte Carlo simulations for error analysis. The relax logs should show this. Monte Carlo simulations are random by design so that you do not get the same result to machine precision ever. Hence if you ran on 5 processors again, you might find yourself stuck. The reason for being stuck on Monte Carlo simulations is the same in all fields where it is used. This is a common problem often seen by those performing a model-free analysis with relax that normally requires removal of bad input data. It is not actually stuck, but rather some or many of the simulation data points are very difficult to optimise. This has been seen a lot with relax due to it's very high default accuracy and precision. But it has always been due to bad input data for one or more spin systems. You should be able to spot these in the relax log. Then go back to your peak intensity plots when you fit the R1 and R2 data and see if the curves for those spin systems look reasonable. The Grace graphs relax produces are very useful for this data trimming process. If the curve looks bad, exclude that data. I hope this helps. Regards, Edward |
From: Edward d'A. <ed...@nm...> - 2022-02-01 21:42:46
|
Hi Mark, Sorry for the late reply. I've been stuck at home for the last 2.5 weeks looking after my 5 year old son, due to local COVID-19 outbreaks in his Kindergarten. It is difficult to do anything at the moment! I received your data files in your private email but have not been able to even open them for now. My instinct is that this is input data related. If you were to plot the peak intensities (heights) in a HSQC, could it be that the problematic data points are those with the lowest intensity? Or those with the greatest intensity? Related to this, do you use the standard process of variance averaging across all peaks in the spectra? You can probe if the errors are underestimated by artificially increasing the errors and seeing if there is a point where relax can fit the data with no patterns left in the residuals. I hope these leeds might help. Regards, Edward On Fri, 21 Jan 2022 at 19:17, Bostock, Mark <mar...@tu...> wrote: > > Dear Edward, > > > Thanks for your reply - this issue occurs for a small subset of residues. Most of the data is well fitted, hence I don't think there are any systematic issues with the data. The errors are estimated based on repeat planes. The data is single-scan interleaved and the cpmg pulsing frequencies are randomised. > > > I have managed to get better fits for these residues on some occasions, however, it's not systematic. e.g. I tested LM63 and CR72 models just on one residue and the fit was much better. However, when I refitted all the residues the improved fit wasn't replicated. It looks like the fit gets stuck in a local-minimum and doesn't converge. Perhaps I could share the script and sample data with you to see if there are any parameters I can further optimise? > > > Many thanks again for your help, > > > Best wishes, > > > Mark > > ________________________________ > From: Edward d'Auvergne <ed...@nm...> > Sent: 21 January 2022 11:54:29 > To: Bostock, Mark > Cc: nmr...@li... > Subject: Re: [relax-users] Inconsistent relaxation dispersion fits. > > Hi Mark, > > Do you have this issue with all your data? The residuals are quite > small but probably statistically significant and there seems to be a > pattern to it, but these can sometimes be due to incorrect error > estimates and biases respectively. What techniques did you use for > temperature control and calibration > (https://www.nmr-relax.com/manual/Temperature_control_and_calibration.html)? > Improper control can lead to bias and "patterns" in residuals. And > how did you estimate the errors for each data point? If these are > out, the non-linear least squares fitting algorithms can fail. The > errors influence the curvature of the optimization space (rather than > topology) and incorrect errors can sometimes squeeze valleys in this > space creating false minima. > > Regards, > > Edward > > On Fri, 21 Jan 2022 at 00:06, Mark Bostock <mar...@tu...> wrote: > > > > Dear relax-users, > > > > I'm trying to fit some methyl-13C SQ CPMG data. I have a number of residues, which appear to have an exchange contribution, but result in poor fits e.g. > > > > I've tried a variety of different relaxation dispersion models (CR72 full, B14 full, NS CPMG 2-site expanded, IT99, TSMFK01) but the fit doesn't improve. I've also tried increasing the grid increment parameter from 11 to 21, but again this doesn't improve the fit. Very occasionaly when I have been testing conditions, a model has accurately fitted the data (in the following NS CPMG 2-site expanded) but I am unable to replicate this consistently. > > > > > > Any suggestions to improve the reliability of this fitting would be very much appreciated. > > > > Many thanks, > > > > Mark > > > > > > > > _______________________________________________ > > nmr-relax-users mailing list > > nmr...@li... > > https://lists.sourceforge.net/lists/listinfo/nmr-relax-users |
From: Jessica M. Gonzalez-D. <jmg...@nc...> - 2022-01-30 13:50:42
|
Hi everyone, After a lot of troubleshooting it seems like the dauvergne_protocol.py only works when it is run with 5 processors. Is that right? I have access to a cluster and was trying to lower the calculation time by running it on more than 5 processors with no luck. I tried a variety of combinations (up to 59 processors) but 5 was the only one that allowed the calculation to not get stuck for days on round_1 of every model. Best, Jessica On 2022-01-24 16:18, Jessica M. Gonzalez-Delgado wrote: > Hi everyone, > > I was wondering if there is a way to know if the model-free-analysis > is running properly or if it is stuck? I've run dauvergne_protocol.py > on a cluster before for data of the same protein and it finished in > less than 48h. I can see that the local_tm/ and sphere/ directories > have been created, but the sphere directory only contains init/ and > round_1/ and it's been 24h. The sphere/round_1/ directory contains > directories aic/ and m0/ through m9/, so I'm guessing it's done with > round_1 but I don't know how to check. Thanks in advance! > > Best, > Jessica > -- Jessica M. González-Delgado | she/her Ph.D. Candidate Department of Chemistry Franzen Research Group North Carolina State University |
From: Jessica M. Gonzalez-D. <jmg...@nc...> - 2022-01-24 21:49:02
|
Hi everyone, I was wondering if there is a way to know if the model-free-analysis is running properly or if it is stuck? I've run dauvergne_protocol.py on a cluster before for data of the same protein and it finished in less than 48h. I can see that the local_tm/ and sphere/ directories have been created, but the sphere directory only contains init/ and round_1/ and it's been 24h. The sphere/round_1/ directory contains directories aic/ and m0/ through m9/, so I'm guessing it's done with round_1 but I don't know how to check. Thanks in advance! Best, Jessica -- Jessica M. González-Delgado | she/her Ph.D. Candidate Department of Chemistry Franzen Research Group North Carolina State University |
From: Bostock, M. <mar...@tu...> - 2022-01-21 18:17:17
|
Dear Edward, Thanks for your reply - this issue occurs for a small subset of residues. Most of the data is well fitted, hence I don't think there are any systematic issues with the data. The errors are estimated based on repeat planes. The data is single-scan interleaved and the cpmg pulsing frequencies are randomised. I have managed to get better fits for these residues on some occasions, however, it's not systematic. e.g. I tested LM63 and CR72 models just on one residue and the fit was much better. However, when I refitted all the residues the improved fit wasn't replicated. It looks like the fit gets stuck in a local-minimum and doesn't converge. Perhaps I could share the script and sample data with you to see if there are any parameters I can further optimise? Many thanks again for your help, Best wishes, Mark ________________________________ From: Edward d'Auvergne <ed...@nm...> Sent: 21 January 2022 11:54:29 To: Bostock, Mark Cc: nmr...@li... Subject: Re: [relax-users] Inconsistent relaxation dispersion fits. Hi Mark, Do you have this issue with all your data? The residuals are quite small but probably statistically significant and there seems to be a pattern to it, but these can sometimes be due to incorrect error estimates and biases respectively. What techniques did you use for temperature control and calibration (https://www.nmr-relax.com/manual/Temperature_control_and_calibration.html)? Improper control can lead to bias and "patterns" in residuals. And how did you estimate the errors for each data point? If these are out, the non-linear least squares fitting algorithms can fail. The errors influence the curvature of the optimization space (rather than topology) and incorrect errors can sometimes squeeze valleys in this space creating false minima. Regards, Edward On Fri, 21 Jan 2022 at 00:06, Mark Bostock <mar...@tu...> wrote: > > Dear relax-users, > > I'm trying to fit some methyl-13C SQ CPMG data. I have a number of residues, which appear to have an exchange contribution, but result in poor fits e.g. > > I've tried a variety of different relaxation dispersion models (CR72 full, B14 full, NS CPMG 2-site expanded, IT99, TSMFK01) but the fit doesn't improve. I've also tried increasing the grid increment parameter from 11 to 21, but again this doesn't improve the fit. Very occasionaly when I have been testing conditions, a model has accurately fitted the data (in the following NS CPMG 2-site expanded) but I am unable to replicate this consistently. > > > Any suggestions to improve the reliability of this fitting would be very much appreciated. > > Many thanks, > > Mark > > > > _______________________________________________ > nmr-relax-users mailing list > nmr...@li... > https://lists.sourceforge.net/lists/listinfo/nmr-relax-users |
From: Edward d'A. <ed...@nm...> - 2022-01-21 12:33:45
|
Hi Mark, Do you have this issue with all your data? The residuals are quite small but probably statistically significant and there seems to be a pattern to it, but these can sometimes be due to incorrect error estimates and biases respectively. What techniques did you use for temperature control and calibration (https://www.nmr-relax.com/manual/Temperature_control_and_calibration.html)? Improper control can lead to bias and "patterns" in residuals. And how did you estimate the errors for each data point? If these are out, the non-linear least squares fitting algorithms can fail. The errors influence the curvature of the optimization space (rather than topology) and incorrect errors can sometimes squeeze valleys in this space creating false minima. Regards, Edward On Fri, 21 Jan 2022 at 00:06, Mark Bostock <mar...@tu...> wrote: > > Dear relax-users, > > I'm trying to fit some methyl-13C SQ CPMG data. I have a number of residues, which appear to have an exchange contribution, but result in poor fits e.g. > > I've tried a variety of different relaxation dispersion models (CR72 full, B14 full, NS CPMG 2-site expanded, IT99, TSMFK01) but the fit doesn't improve. I've also tried increasing the grid increment parameter from 11 to 21, but again this doesn't improve the fit. Very occasionaly when I have been testing conditions, a model has accurately fitted the data (in the following NS CPMG 2-site expanded) but I am unable to replicate this consistently. > > > Any suggestions to improve the reliability of this fitting would be very much appreciated. > > Many thanks, > > Mark > > > > _______________________________________________ > nmr-relax-users mailing list > nmr...@li... > https://lists.sourceforge.net/lists/listinfo/nmr-relax-users |
From: Mark B. <mar...@tu...> - 2022-01-20 23:06:42
|
Dear relax-users, I'm trying to fit some methyl-13C SQ CPMG data. I have a number of residues, which appear to have an exchange contribution, but result in poor fits e.g. I've tried a variety of different relaxation dispersion models (CR72 full, B14 full, NS CPMG 2-site expanded, IT99, TSMFK01) but the fit doesn't improve. I've also tried increasing the grid increment parameter from 11 to 21, but again this doesn't improve the fit. Very occasionaly when I have been testing conditions, a model has accurately fitted the data (in the following NS CPMG 2-site expanded) but I am unable to replicate this consistently. Any suggestions to improve the reliability of this fitting would be very much appreciated. Many thanks, Mark |
From: Johan W. <joh...@gu...> - 2021-12-15 08:01:54
|
Hi Edward and relax user list! Thanks a lot for useful comments, advice and links for further reading. I'll continue using, arguing with, and hugging the relax software and because of this it is likely I'll return with more questions. Best regards Johan > On 4 Dec 2021, at 11:23, Edward d'Auvergne <ed...@nm...> wrote: > > On Mon, 29 Nov 2021 at 12:15, Johan Wallerstein <joh...@gu...> wrote: >> Thanks a lot for many clarifications and useful comments in your answer. Much appreciated. Most important for me, the value.write() command solved the main issues, now I easily get all s2, te, and chi2. > > No problems! > > >> One, quite small, question mark remains. How do I get an estimate of the global correlation time (tm)? > > For this, there is no *.write() user function. You can however use > the diffusion_tensor.display() user function and output to a log file > or read the values directly from screen. > > >> I use one magnetic field only, I have read in many comments on the mail list that one field is suboptimal, for many reasons, I’m aware of this, but since my protein (in)stability doesn’t admit more NMR-time, one field I hope is better than no field! > > The best resource for this is: > > http://wiki.nmr-relax.com/Model-free_analysis_single_field > > In summary, if you have lots of internal motions, internal motions > that are directionally clustered due to the 3D structure, multi-domain > motions, or a slightly anisotropic system, you must study and be > prepared for the motions to be absorbed into the diffusion tensor and > hidden. > > >> So based on my analysis, >> Protein ca 400 residues >> one field 800 MHz, >> T1-, T2- and NOE-data for most residues >> is there any neat way to get an estimate of the global correlation time (or diffusion tensor) from relax? > > See the above link. The local tm models from relax are essential for > your study. > > >> Or is this only possible using two or more fields? > > As the wiki article states, single field strength is possible. But > you must know your demons if you go down that route ;) > > >> (I could get a rough estimate of tm from other software, e.g. David Fushman’s Matlab-based ROTDIF.) > > relax will give you the exact value (but the single strength data > might mean that the diffusion tensor absorbs some of your internal > motions or 3D anisotropy). And it will give you the all important > local tm values for comparison. > > >> I apologise if the answer to my question is already clearly posted, I’ve searched through quite many posts on the mail list to try to find answers or protocols to get a rough estimate of tm, but I haven’t found any. > > The wiki article and all it's links is our best summary and covers > absolutely everything you'll need to know (that the field is currently > aware of). But if you can in any way measure it, multiple field > strength data is superior in every way. I hope this helps. > > Regards, > > Edward > > > _______________________________________________ > nmr-relax-users mailing list > nmr...@li... > https://lists.sourceforge.net/lists/listinfo/nmr-relax-users |
From: Edward d'A. <ed...@nm...> - 2021-12-04 10:23:56
|
On Mon, 29 Nov 2021 at 12:15, Johan Wallerstein <joh...@gu...> wrote: > Thanks a lot for many clarifications and useful comments in your answer. Much appreciated. Most important for me, the value.write() command solved the main issues, now I easily get all s2, te, and chi2. No problems! > One, quite small, question mark remains. How do I get an estimate of the global correlation time (tm)? For this, there is no *.write() user function. You can however use the diffusion_tensor.display() user function and output to a log file or read the values directly from screen. > I use one magnetic field only, I have read in many comments on the mail list that one field is suboptimal, for many reasons, I’m aware of this, but since my protein (in)stability doesn’t admit more NMR-time, one field I hope is better than no field! The best resource for this is: http://wiki.nmr-relax.com/Model-free_analysis_single_field In summary, if you have lots of internal motions, internal motions that are directionally clustered due to the 3D structure, multi-domain motions, or a slightly anisotropic system, you must study and be prepared for the motions to be absorbed into the diffusion tensor and hidden. > So based on my analysis, > Protein ca 400 residues > one field 800 MHz, > T1-, T2- and NOE-data for most residues > is there any neat way to get an estimate of the global correlation time (or diffusion tensor) from relax? See the above link. The local tm models from relax are essential for your study. > Or is this only possible using two or more fields? As the wiki article states, single field strength is possible. But you must know your demons if you go down that route ;) > (I could get a rough estimate of tm from other software, e.g. David Fushman’s Matlab-based ROTDIF.) relax will give you the exact value (but the single strength data might mean that the diffusion tensor absorbs some of your internal motions or 3D anisotropy). And it will give you the all important local tm values for comparison. > I apologise if the answer to my question is already clearly posted, I’ve searched through quite many posts on the mail list to try to find answers or protocols to get a rough estimate of tm, but I haven’t found any. The wiki article and all it's links is our best summary and covers absolutely everything you'll need to know (that the field is currently aware of). But if you can in any way measure it, multiple field strength data is superior in every way. I hope this helps. Regards, Edward |
From: Johan W. <joh...@gu...> - 2021-11-29 11:14:58
|
Hi, Thanks a lot for many clarifications and useful comments in your answer. Much appreciated. Most important for me, the value.write() command solved the main issues, now I easily get all s2, te, and chi2. One, quite small, question mark remains. How do I get an estimate of the global correlation time (tm)? I use one magnetic field only, I have read in many comments on the mail list that one field is suboptimal, for many reasons, I’m aware of this, but since my protein (in)stability doesn’t admit more NMR-time, one field I hope is better than no field! So based on my analysis, Protein ca 400 residues one field 800 MHz, T1-, T2- and NOE-data for most residues is there any neat way to get an estimate of the global correlation time (or diffusion tensor) from relax? Or is this only possible using two or more fields? (I could get a rough estimate of tm from other software, e.g. David Fushman’s Matlab-based ROTDIF.) I apologise if the answer to my question is already clearly posted, I’ve searched through quite many posts on the mail list to try to find answers or protocols to get a rough estimate of tm, but I haven’t found any. Best regards Johan Wallerstein > On 26 Nov 2021, at 12:26, Edward d'Auvergne <ed...@nm...> wrote: > > On Fri, 26 Nov 2021 at 12:14, Johan Wallerstein <joh...@gu...> wrote: >> >> Hi Edward, >> >> Thanks for all efforts put into developing the relax-software. > > Hi Johan, > > You're welcome! And welcome to the relax mailing lists! > > >> I'm using the model-free script ‘single_model.py', and get as output an XML-file called ‘results’ ca 5 Mb. >> >> How do you suggest the user to best deal with that file? > > That file contains all data you would ever require. It is probably > best to use relax to extract what you need - just load that file into > a new instance of relax and use the appropriate relax users functions > to output text lists or graphs of the data. > > >> More specifically what confuses me is that I get many lists of ‘s2_sim’ (see below), are we supposed to collect all of these lists and compute mean and standard deviation? > > These are the values fitted for each of the Monte Carlo simulation > data sets. The relax script you used should already have calculated > the standard deviation for this and placed it into the *_err data > structures. Note that the average value of fitted Monte Carlo > simulations has no practical or statistical uses so we never calculate > it. > > >> Or do you suggest me to retrieve the s2-data from the log-file? >> (using command 'relax —log log.txt single_model.py’) > > It's best to simply load the file back into relax and use user > functions such as value.write(), grace.write(), pymol.macro_write(), > molmol.macro_write(), etc to output the data in a way you can use it - > text lists with values and errors, 2D graphs with error bars, or > values represented via a 3D structure. > > >> before when I've used the ‘dauvergne_protocol.py’ I got all output data in the folder ‘final', and as text files. > > There should also be Grace graph *.agr files in there, as well as > PyMOL macros. I hope this information helps. > > Regards, > > Edward |
From: Edward d'A. <ed...@nm...> - 2021-11-26 11:26:32
|
On Fri, 26 Nov 2021 at 12:14, Johan Wallerstein <joh...@gu...> wrote: > > Hi Edward, > > Thanks for all efforts put into developing the relax-software. Hi Johan, You're welcome! And welcome to the relax mailing lists! > I'm using the model-free script ‘single_model.py', and get as output an XML-file called ‘results’ ca 5 Mb. > > How do you suggest the user to best deal with that file? That file contains all data you would ever require. It is probably best to use relax to extract what you need - just load that file into a new instance of relax and use the appropriate relax users functions to output text lists or graphs of the data. > More specifically what confuses me is that I get many lists of ‘s2_sim’ (see below), are we supposed to collect all of these lists and compute mean and standard deviation? These are the values fitted for each of the Monte Carlo simulation data sets. The relax script you used should already have calculated the standard deviation for this and placed it into the *_err data structures. Note that the average value of fitted Monte Carlo simulations has no practical or statistical uses so we never calculate it. > Or do you suggest me to retrieve the s2-data from the log-file? > (using command 'relax —log log.txt single_model.py’) It's best to simply load the file back into relax and use user functions such as value.write(), grace.write(), pymol.macro_write(), molmol.macro_write(), etc to output the data in a way you can use it - text lists with values and errors, 2D graphs with error bars, or values represented via a 3D structure. > before when I've used the ‘dauvergne_protocol.py’ I got all output data in the folder ‘final', and as text files. There should also be Grace graph *.agr files in there, as well as PyMOL macros. I hope this information helps. Regards, Edward |
From: Johan W. <joh...@gu...> - 2021-11-26 10:52:12
|
Hi Edward, Thanks for all efforts put into developing the relax-software. I'm using the model-free script ‘single_model.py', and get as output an XML-file called ‘results’ ca 5 Mb. How do you suggest the user to best deal with that file? More specifically what confuses me is that I get many lists of ‘s2_sim’ (see below), are we supposed to collect all of these lists and compute mean and standard deviation? Or do you suggest me to retrieve the s2-data from the log-file? (using command 'relax —log log.txt single_model.py’) before when I've used the ‘dauvergne_protocol.py’ I got all output data in the folder ‘final', and as text files. Best regards Johan Wallerstein <s2_sim type="list"> <value>[0.95152860432291564, 0.9672981514452762, 0.95993059277057047, 0.95820377205217477, 0.96576414838251423, 0.96298967077335906, 0.9660400356093225, 0.95806003445541332, 0.96352640333163264, 0.96799452139576059, 0.95103783952720344, 0.93179489311192121, 0.94756550129829631, 0.93701232174596849, 0.96745568145390715, 0.95570689835190303, 0.94074080863593468, 0.96185047761905462, 0.9454986299660546, 0.95168467832510473, 0.96592806548011878, 0.9630206675640175, 0.97200373396509343, 0.92707180780737219, 0.96091550639027667, 0.95072286744860601, 0.95316000616480434, 0.96556410133854342, 0.97563138691827134, 0.92753299459301297, 0.94150377267173535, 0.975807615034611, 0.97138473322344343, 0.96464597713441258, 0.95074791457667795, 0.94129870309293173, 0.96438535936538972, 0.95058762836881538, 0.94685664104203915, 0.95873958667027759, 0.94133863455681022, 0.93584128880204898, 0.96511687366901122, 0.96637041894396469, 0.95577224255387916, 0.94573785921818221, 0.95374049176497777, 0.96792674745821594, 0.9501576357775543, 0.96030921627237686, 0.96022377233562939, 0.95112752976492076, 0.96590544989452254, 0.96677040462371044, 0.96277904160613581, 0.95574338297356842, 0.9570489237288633, 0.94970419098199654, 0.95282951387368708, 0.92975666513207811, 0.96908995371472195, 0.96737612885865887, 0.96399552391524324, 0.94687317116297987, 0.95868506400099918, 0.96698337179090099, 0.95133324280051546, 0.90673134346370898, 0.9468765720830995, 0.96938221729680085, 0.96871934977650642, 0.95144510772897029, 0.9575589651269576, 0.91699114721408237, 0.95216041788641825, 0.93541671262345072, 0.9357367035937314, 0.92272266756073262, 0.95479292510879288, 0.96166197221550132, 0.96835165479833119, 0.9561238148772091, 0.95933124341405895, 0.96509420536939838, 0.96183841633384004, 0.95613277963448084, 0.94300185793219771, 0.96156913949502498, 0.97112257809868563, 0.92917153242950434, 0.949528023173974, 0.96707362385535922, 0.91089810814233851, 0.95893712178733659, 0.96576236746040656, 0.96730988292895037, 0.9679862738051378, 0.96964730547450639, 0.97138932817592882, 0.96658455004026855]</value> |