From: <mat...@or...> - 2014-04-14 15:11:48
|
osaf/tools/saflog/saflogger/saf_logger.c | 82 +++++++++++++++++++++++++------ 1 files changed, 66 insertions(+), 16 deletions(-) Currently the saflogger tool does not honours the SA_AIS_ERR_TRY_AGAIN for the saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), saLogFinalize() APIs. This can create problems in the context of the overload protection scheme in the LOG Server and the upcoming #793 that provides for a flow control mechanism. The absence of a try again mechanism can lead to discarded writes(ofcourse depends on how the enduser has integrated the saflogger tool) and other problems for eg:- It has been observed that if a TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead to other serious problems. This patch does the following: - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. - Waits for 10 ms before trying again, until a worstcase of 10 seconds. - Removes an undesired error mesasge when try_again is hit. - Avoids an indefinite loop for log Writes() and instead makes it re-attempt till a worstcase of 10 seconds. diff --git a/osaf/tools/saflog/saflogger/saf_logger.c b/osaf/tools/saflog/saflogger/saf_logger.c --- a/osaf/tools/saflog/saflogger/saf_logger.c +++ b/osaf/tools/saflog/saflogger/saf_logger.c @@ -48,6 +48,11 @@ #define DEFAULT_APP_LOG_FILE_SIZE 1024 #define VENDOR_ID 193 #define DEFAULT_MAX_FILES_ROTATED 4 +/* Try for 10 seconds before giving up on an API */ +#define TEN_SECONDS 60*1000*1000 +/* Sleep for 100 ms before retrying an API */ +#define HUNDRED_MS 100*1000 +/* To the reviewer: Should we increase either of the above two timeperiod? */ static void logWriteLogCallbackT(SaInvocationT invocation, SaAisErrorT error); @@ -121,9 +126,9 @@ static SaAisErrorT write_log_record(SaLo SaAisErrorT errorCode; SaInvocationT invocation; int i = 0; - int try_agains = 0; struct pollfd fds[1]; int ret; + unsigned int wait_time = 0; i++; @@ -131,13 +136,15 @@ static SaAisErrorT write_log_record(SaLo retry: errorCode = saLogWriteLogAsync(logStreamHandle, invocation, SA_LOG_RECORD_WRITE_ACK, logRecord); - if (errorCode == SA_AIS_ERR_TRY_AGAIN) { - usleep(100000); /* 100 ms */ - try_agains++; + if (errorCode == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; goto retry; } if (errorCode != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogWriteLogAsync FAILED: %s\n", saf_error(errorCode)); return errorCode; } @@ -172,28 +179,25 @@ poll_retry: return errorCode; } - if (cb_error == SA_AIS_ERR_TRY_AGAIN) { - usleep(100000); /* 100 ms */ - try_agains++; + if (cb_error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; goto retry; } if (cb_error == SA_AIS_ERR_TIMEOUT) { - usleep(100000); /* 100 ms */ + usleep(HUNDRED_MS); fprintf(stderr, "got SA_AIS_ERR_TIMEOUT, retry\n"); goto retry; } if (cb_error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "logWriteLogCallbackT FAILED: %s\n", saf_error(cb_error)); return errorCode; } - if (try_agains > 0) { - fprintf(stderr, "got %u SA_AIS_ERR_TRY_AGAIN, waited %u secs\n", try_agains, try_agains / 10); - try_agains = 0; - } - return errorCode; } @@ -249,6 +253,7 @@ int main(int argc, char *argv[]) SaLogHandleT logHandle; SaLogStreamHandleT logStreamHandle; SaSelectionObjectT selectionObject; + unsigned int wait_time; srandom(getpid()); @@ -338,8 +343,17 @@ int main(int argc, char *argv[]) logRecord.logBuffer = &logBuffer; } + wait_time = 0; error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); + } + if (error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogInitialize FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } @@ -353,33 +367,69 @@ int main(int argc, char *argv[]) /* Try open the stream before creating it. It might be a configured app * stream with other attributes than we have causing open with default * attributes to fail */ + wait_time = 0; error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, SA_TIME_ONE_SECOND, &logStreamHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, + SA_TIME_ONE_SECOND, &logStreamHandle); + } if (error == SA_AIS_ERR_NOT_EXIST) { + wait_time = 0; error = saLogStreamOpen_2(logHandle, &logStreamName, logFileCreateAttributes, logStreamOpenFlags, SA_TIME_ONE_SECOND, &logStreamHandle); - if (error != SA_AIS_OK) { - fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", saf_error(error)); - exit(EXIT_FAILURE); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamOpen_2(logHandle, &logStreamName, logFileCreateAttributes, + logStreamOpenFlags, SA_TIME_ONE_SECOND, &logStreamHandle); } } + if (error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); + fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", saf_error(error)); + exit(EXIT_FAILURE); + } + if (write_log_record(logHandle, logStreamHandle, selectionObject, &logRecord) != SA_AIS_OK) { exit(EXIT_FAILURE); } + wait_time = 0; error = saLogStreamClose(logStreamHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamClose(logStreamHandle); + } + if (SA_AIS_OK != error) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogStreamClose FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } + wait_time = 0; error = saLogFinalize(logHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogFinalize(logHandle); + } + if (SA_AIS_OK != error) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogFinalize FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } + |
From: <mat...@or...> - 2014-04-14 15:11:46
|
Summary: saflogger tool shall TRY_AGAIN for all log apis [#839] Review request for Trac Ticket(s): #839 Peer Reviewer(s): Lennart Pull request to: <<LIST THE PERSON WITH PUSH ACCESS HERE>> Affected branch(es): opensaf-4.3.x, 4.4.x, default Development branch: <<IF ANY GIVE THE REPO URL>> -------------------------------- Impacted area Impact y/n -------------------------------- Docs n Build system n RPM/packaging n Configuration files n Startup scripts n SAF services y OpenSAF services n Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): --------------------------------------------- changeset 07b5a631ccb7a024ba0aafb38f6d3f95a8baefe6 Author: Mathivanan N.P.<mat...@or...> Date: Mon, 14 Apr 2014 19:14:46 -0400 log: saflogger tool shall TRY_AGAIN for all log apis [#839] Currently the saflogger tool does not honours the SA_AIS_ERR_TRY_AGAIN for the saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), saLogFinalize() APIs. This can create problems in the context of the overload protection scheme in the LOG Server and the upcoming #793 that provides for a flow control mechanism. The absence of a try again mechanism can lead to discarded writes(ofcourse depends on how the enduser has integrated the saflogger tool) and other problems for eg:- It has been observed that if a TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead to other serious problems. This patch does the following: - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. - Waits for 10 ms before trying again, until a worstcase of 10 seconds. - Removes an undesired error mesasge when try_again is hit. - Avoids an indefinite loop for log Writes() and instead makes it re-attempt till a worstcase of 10 seconds. Complete diffstat: ------------------ osaf/tools/saflog/saflogger/saf_logger.c | 82 +++++++++++++++++++++++++------ 1 files changed, 66 insertions(+), 16 deletions(-) Testing Commands: ----------------- It is very difficult to reproduce. I could only reproduce try_agains for the app streams(when configured with a smaller highlimit). Testing, Expected Results: -------------------------- - Upon receiving try_again for any of the LOG apis, saflogger should retry every 100ms for a maximum of 10 seconds and return failure to the invoker. - For try_agains involving writes, saflogger shall not loop infinitely trying_again, but rather shall try every 100ms for a maximum of 10 seconds and return failure to the invoker. - Currently, there is an error message printed when try again is attmempted, This should not be seen until all attempts(till 10 seconds) for try_agains are finished and the command returns.. Conditions of Submission: ------------------------- Ack from Lennart. Arch Built Started Linux distro ------------------------------------------- mips n n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: ------------------- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. |
From: <mat...@or...> - 2014-04-14 15:21:47
|
Summary: saflogger tool shall TRY_AGAIN for all log apis: v2 [#839] Review request for Trac Ticket(s): #839 Peer Reviewer(s): Lennart Pull request to: <<LIST THE PERSON WITH PUSH ACCESS HERE>> Affected branch(es): opensaf-4.3.x, 4.4.x, default Development branch: <<IF ANY GIVE THE REPO URL>> -------------------------------- Impacted area Impact y/n -------------------------------- Docs n Build system n RPM/packaging n Configuration files n Startup scripts n SAF services y OpenSAF services n Core libraries n Samples n Tests n Other n Comments (indicate scope for each "y" above): --------------------------------------------- changeset 07b5a631ccb7a024ba0aafb38f6d3f95a8baefe6 Author: Mathivanan N.P.<mat...@or...> Date: Mon, 14 Apr 2014 19:14:46 -0400 log: saflogger tool shall TRY_AGAIN for all log apis [#839] Currently the saflogger tool does not honours the SA_AIS_ERR_TRY_AGAIN for the saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), saLogFinalize() APIs. This can create problems in the context of the overload protection scheme in the LOG Server and the upcoming #793 that provides for a flow control mechanism. The absence of a try again mechanism can lead to discarded writes(ofcourse depends on how the enduser has integrated the saflogger tool) and other problems for eg:- It has been observed that if a TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead to other serious problems. This patch does the following: - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. - Waits for 10 ms before trying again, until a worstcase of 10 seconds. - Removes an undesired error mesasge when try_again is hit. - Avoids an indefinite loop for log Writes() and instead makes it re-attempt till a worstcase of 10 seconds. Complete diffstat: ------------------ osaf/tools/saflog/saflogger/saf_logger.c | 82 +++++++++++++++++++++++++------ 1 files changed, 66 insertions(+), 16 deletions(-) Testing Commands: ----------------- It is very difficult to reproduce. I could only reproduce try_agains for the app streams(when configured with a smaller highlimit). Testing, Expected Results: -------------------------- - Upon receiving try_again for any of the LOG apis, saflogger should retry every 100ms for a maximum of 10 seconds and return failure to the invoker. - For try_agains involving writes, saflogger shall not loop infinitely trying_again, but rather shall try every 100ms for a maximum of 10 seconds and return failure to the invoker. - Currently, there is an error message printed when try again is attmempted, This should not be seen until all attempts(till 10 seconds) for try_agains are finished and the command returns.. Conditions of Submission: ------------------------- Ack from Lennart. Arch Built Started Linux distro ------------------------------------------- mips n n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: ------------------- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.hgrc file (i.e. username, email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. |
From: <mat...@or...> - 2014-04-14 15:21:49
|
osaf/tools/saflog/saflogger/saf_logger.c | 82 +++++++++++++++++++++++++------ 1 files changed, 66 insertions(+), 16 deletions(-) Currently the saflogger tool does not honours the SA_AIS_ERR_TRY_AGAIN for the saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), saLogFinalize() APIs. This can create problems in the context of the overload protection scheme in the LOG Server and the upcoming #793 that provides for a flow control mechanism. The absence of a try again mechanism can lead to discarded writes(ofcourse depends on how the enduser has integrated the saflogger tool) and other problems for eg:- It has been observed that if a TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead to other serious problems. This patch does the following: - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. - Waits for 10 ms before trying again, until a worstcase of 10 seconds. - Removes an undesired error mesasge when try_again is hit. - Avoids an indefinite loop for log Writes() and instead makes it re-attempt till a worstcase of 10 seconds. diff --git a/osaf/tools/saflog/saflogger/saf_logger.c b/osaf/tools/saflog/saflogger/saf_logger.c --- a/osaf/tools/saflog/saflogger/saf_logger.c +++ b/osaf/tools/saflog/saflogger/saf_logger.c @@ -48,6 +48,11 @@ #define DEFAULT_APP_LOG_FILE_SIZE 1024 #define VENDOR_ID 193 #define DEFAULT_MAX_FILES_ROTATED 4 +/* Try for 10 seconds before giving up on an API */ +#define TEN_SECONDS 10*1000*1000 +/* Sleep for 100 ms before retrying an API */ +#define HUNDRED_MS 100*1000 +/* To the reviewer: Should we increase either of the above two timeperiod? */ static void logWriteLogCallbackT(SaInvocationT invocation, SaAisErrorT error); @@ -121,9 +126,9 @@ static SaAisErrorT write_log_record(SaLo SaAisErrorT errorCode; SaInvocationT invocation; int i = 0; - int try_agains = 0; struct pollfd fds[1]; int ret; + unsigned int wait_time = 0; i++; @@ -131,13 +136,15 @@ static SaAisErrorT write_log_record(SaLo retry: errorCode = saLogWriteLogAsync(logStreamHandle, invocation, SA_LOG_RECORD_WRITE_ACK, logRecord); - if (errorCode == SA_AIS_ERR_TRY_AGAIN) { - usleep(100000); /* 100 ms */ - try_agains++; + if (errorCode == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; goto retry; } if (errorCode != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogWriteLogAsync FAILED: %s\n", saf_error(errorCode)); return errorCode; } @@ -172,28 +179,25 @@ poll_retry: return errorCode; } - if (cb_error == SA_AIS_ERR_TRY_AGAIN) { - usleep(100000); /* 100 ms */ - try_agains++; + if (cb_error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; goto retry; } if (cb_error == SA_AIS_ERR_TIMEOUT) { - usleep(100000); /* 100 ms */ + usleep(HUNDRED_MS); fprintf(stderr, "got SA_AIS_ERR_TIMEOUT, retry\n"); goto retry; } if (cb_error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "logWriteLogCallbackT FAILED: %s\n", saf_error(cb_error)); return errorCode; } - if (try_agains > 0) { - fprintf(stderr, "got %u SA_AIS_ERR_TRY_AGAIN, waited %u secs\n", try_agains, try_agains / 10); - try_agains = 0; - } - return errorCode; } @@ -249,6 +253,7 @@ int main(int argc, char *argv[]) SaLogHandleT logHandle; SaLogStreamHandleT logStreamHandle; SaSelectionObjectT selectionObject; + unsigned int wait_time; srandom(getpid()); @@ -338,8 +343,17 @@ int main(int argc, char *argv[]) logRecord.logBuffer = &logBuffer; } + wait_time = 0; error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); + } + if (error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogInitialize FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } @@ -353,33 +367,69 @@ int main(int argc, char *argv[]) /* Try open the stream before creating it. It might be a configured app * stream with other attributes than we have causing open with default * attributes to fail */ + wait_time = 0; error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, SA_TIME_ONE_SECOND, &logStreamHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, + SA_TIME_ONE_SECOND, &logStreamHandle); + } if (error == SA_AIS_ERR_NOT_EXIST) { + wait_time = 0; error = saLogStreamOpen_2(logHandle, &logStreamName, logFileCreateAttributes, logStreamOpenFlags, SA_TIME_ONE_SECOND, &logStreamHandle); - if (error != SA_AIS_OK) { - fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", saf_error(error)); - exit(EXIT_FAILURE); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamOpen_2(logHandle, &logStreamName, logFileCreateAttributes, + logStreamOpenFlags, SA_TIME_ONE_SECOND, &logStreamHandle); } } + if (error != SA_AIS_OK) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); + fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", saf_error(error)); + exit(EXIT_FAILURE); + } + if (write_log_record(logHandle, logStreamHandle, selectionObject, &logRecord) != SA_AIS_OK) { exit(EXIT_FAILURE); } + wait_time = 0; error = saLogStreamClose(logStreamHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogStreamClose(logStreamHandle); + } + if (SA_AIS_OK != error) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogStreamClose FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } + wait_time = 0; error = saLogFinalize(logHandle); + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < TEN_SECONDS) { + usleep(HUNDRED_MS); + wait_time += HUNDRED_MS; + error = saLogFinalize(logHandle); + } + if (SA_AIS_OK != error) { + if (wait_time) + fprintf(stderr, "Waited for %u seconds.\n", wait_time/1000000); fprintf(stderr, "saLogFinalize FAILED: %s\n", saf_error(error)); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } + |
From: Lennart L. <len...@er...> - 2014-04-23 07:15:44
|
Ack. See also related #442. I think this is also fixed with this patch Thanks Lennart > -----Original Message----- > From: mat...@or... [mailto:mat...@or...] > Sent: den 15 april 2014 02:56 > To: Lennart Lund > Cc: ope...@li... > Subject: [PATCH 1 of 1] log: saflogger tool to honour try again [#839] > > osaf/tools/saflog/saflogger/saf_logger.c | 82 > +++++++++++++++++++++++++------ > 1 files changed, 66 insertions(+), 16 deletions(-) > > > Currently the saflogger tool does not honours the > SA_AIS_ERR_TRY_AGAIN for the > saLogInitialize(), saLogStreamOpen_2(), saLogStreamClose(), > saLogFinalize() > APIs. This can create problems in the context of the overload protection > scheme in the LOG Server and the upcoming #793 that provides for a > flow > control mechanism. The absence of a try again mechanism can lead to > discarded writes(ofcourse depends on how the enduser has integrated > the > saflogger tool) and other problems for eg:- It has been observed that if a > TRY_AGAIN(after sometime) is not attmepted on streamOpen, can lead > to other > serious problems. This patch does the following: > - Enables the saflogger tool to TRY_AGAIN for all the LOG APIs. > - Waits for 10 ms before trying again, until a worstcase of 10 seconds. > - Removes an undesired error mesasge when try_again is hit. > - Avoids an indefinite loop for log Writes() and instead makes it re- > attempt > till a worstcase of 10 seconds. > > diff --git a/osaf/tools/saflog/saflogger/saf_logger.c > b/osaf/tools/saflog/saflogger/saf_logger.c > --- a/osaf/tools/saflog/saflogger/saf_logger.c > +++ b/osaf/tools/saflog/saflogger/saf_logger.c > @@ -48,6 +48,11 @@ > #define DEFAULT_APP_LOG_FILE_SIZE 1024 > #define VENDOR_ID 193 > #define DEFAULT_MAX_FILES_ROTATED 4 > +/* Try for 10 seconds before giving up on an API */ #define TEN_SECONDS > +10*1000*1000 > +/* Sleep for 100 ms before retrying an API */ #define HUNDRED_MS > +100*1000 > +/* To the reviewer: Should we increase either of the above two > +timeperiod? */ > > static void logWriteLogCallbackT(SaInvocationT invocation, SaAisErrorT > error); > > @@ -121,9 +126,9 @@ static SaAisErrorT write_log_record(SaLo > SaAisErrorT errorCode; > SaInvocationT invocation; > int i = 0; > - int try_agains = 0; > struct pollfd fds[1]; > int ret; > + unsigned int wait_time = 0; > > i++; > > @@ -131,13 +136,15 @@ static SaAisErrorT write_log_record(SaLo > > retry: > errorCode = saLogWriteLogAsync(logStreamHandle, invocation, > SA_LOG_RECORD_WRITE_ACK, logRecord); > - if (errorCode == SA_AIS_ERR_TRY_AGAIN) { > - usleep(100000); /* 100 ms */ > - try_agains++; > + if (errorCode == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > goto retry; > } > > if (errorCode != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogWriteLogAsync FAILED: %s\n", > saf_error(errorCode)); > return errorCode; > } > @@ -172,28 +179,25 @@ poll_retry: > return errorCode; > } > > - if (cb_error == SA_AIS_ERR_TRY_AGAIN) { > - usleep(100000); /* 100 ms */ > - try_agains++; > + if (cb_error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > goto retry; > } > > if (cb_error == SA_AIS_ERR_TIMEOUT) { > - usleep(100000); /* 100 ms */ > + usleep(HUNDRED_MS); > fprintf(stderr, "got SA_AIS_ERR_TIMEOUT, retry\n"); > goto retry; > } > > if (cb_error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "logWriteLogCallbackT FAILED: %s\n", > saf_error(cb_error)); > return errorCode; > } > > - if (try_agains > 0) { > - fprintf(stderr, "got %u SA_AIS_ERR_TRY_AGAIN, waited %u > secs\n", try_agains, try_agains / 10); > - try_agains = 0; > - } > - > return errorCode; > } > > @@ -249,6 +253,7 @@ int main(int argc, char *argv[]) > SaLogHandleT logHandle; > SaLogStreamHandleT logStreamHandle; > SaSelectionObjectT selectionObject; > + unsigned int wait_time; > > srandom(getpid()); > > @@ -338,8 +343,17 @@ int main(int argc, char *argv[]) > logRecord.logBuffer = &logBuffer; > } > > + wait_time = 0; > error = saLogInitialize(&logHandle, &logCallbacks, &logVersion); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogInitialize(&logHandle, &logCallbacks, > &logVersion); > + } > + > if (error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogInitialize FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > @@ -353,33 +367,69 @@ int main(int argc, char *argv[]) > /* Try open the stream before creating it. It might be a configured > app > * stream with other attributes than we have causing open with > default > * attributes to fail */ > + wait_time = 0; > error = saLogStreamOpen_2(logHandle, &logStreamName, NULL, 0, > SA_TIME_ONE_SECOND, &logStreamHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamOpen_2(logHandle, &logStreamName, > NULL, 0, > + SA_TIME_ONE_SECOND, > &logStreamHandle); > + } > > if (error == SA_AIS_ERR_NOT_EXIST) { > + wait_time = 0; > error = saLogStreamOpen_2(logHandle, &logStreamName, > logFileCreateAttributes, > logStreamOpenFlags, > SA_TIME_ONE_SECOND, &logStreamHandle); > - if (error != SA_AIS_OK) { > - fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", > saf_error(error)); > - exit(EXIT_FAILURE); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamOpen_2(logHandle, > &logStreamName, logFileCreateAttributes, > + logStreamOpenFlags, > SA_TIME_ONE_SECOND, &logStreamHandle); > } > } > > + if (error != SA_AIS_OK) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > + fprintf(stderr, "saLogStreamOpen_2 FAILED: %s\n", > saf_error(error)); > + exit(EXIT_FAILURE); > + } > + > if (write_log_record(logHandle, logStreamHandle, selectionObject, > &logRecord) != SA_AIS_OK) { > exit(EXIT_FAILURE); > } > > + wait_time = 0; > error = saLogStreamClose(logStreamHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogStreamClose(logStreamHandle); > + } > + > if (SA_AIS_OK != error) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogStreamClose FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > > + wait_time = 0; > error = saLogFinalize(logHandle); > + while (error == SA_AIS_ERR_TRY_AGAIN && wait_time < > TEN_SECONDS) { > + usleep(HUNDRED_MS); > + wait_time += HUNDRED_MS; > + error = saLogFinalize(logHandle); > + } > + > if (SA_AIS_OK != error) { > + if (wait_time) > + fprintf(stderr, "Waited for %u seconds.\n", > wait_time/1000000); > fprintf(stderr, "saLogFinalize FAILED: %s\n", > saf_error(error)); > exit(EXIT_FAILURE); > } > > exit(EXIT_SUCCESS); > } > + |