From: <mm...@us...> - 2007-09-26 19:13:18
|
Revision: 11153 http://bibdesk.svn.sourceforge.net/bibdesk/?rev=11153&view=rev Author: mmcc Date: 2007-09-26 12:13:21 -0700 (Wed, 26 Sep 2007) Log Message: ----------- Alter WebParser interface to only create an NSXMLDocument once despite having several parsers that need it. Added two new parser types for Citeulike.org and acm.org/dl. Modified Paths: -------------- trunk/bibdesk/BDSKHCiteParser.h trunk/bibdesk/BDSKHCiteParser.m trunk/bibdesk/BDSKWebParser.h trunk/bibdesk/BDSKWebParser.m Modified: trunk/bibdesk/BDSKHCiteParser.h =================================================================== --- trunk/bibdesk/BDSKHCiteParser.h 2007-09-26 19:12:04 UTC (rev 11152) +++ trunk/bibdesk/BDSKHCiteParser.h 2007-09-26 19:13:21 UTC (rev 11153) @@ -37,7 +37,9 @@ #import <Cocoa/Cocoa.h> #import "BDSKWebParser.h" +#import "NSXMLNode_BDSKExtensions.h" + @interface BDSKHCiteParser : BDSKWebParser @end Modified: trunk/bibdesk/BDSKHCiteParser.m =================================================================== --- trunk/bibdesk/BDSKHCiteParser.m 2007-09-26 19:12:04 UTC (rev 11152) +++ trunk/bibdesk/BDSKHCiteParser.m 2007-09-26 19:13:21 UTC (rev 11153) @@ -40,15 +40,7 @@ #import "BibItem.h" #import "BDSKTypeManager.h" -@interface NSXMLNode (BDSKExtensions) -- (NSString *)stringValueOfAttribute:(NSString *)attrName; -- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className error:(NSError **)err; -- (BOOL)hasParentWithClassName:(NSString *)class; -- (NSArray *)classNames; -- (NSString *)fullStringValueIfABBR; -@end - @interface BDSKHCiteParser (Private) + (NSCalendarDate *)dateFromNode:(NSXMLNode *)node; + (NSString *)BTAuthorStringFromVCardNode:(NSXMLNode *)node; @@ -59,45 +51,29 @@ @implementation BDSKHCiteParser -+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{ ++ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{ NSString *htmlString = [(id)[domDocument documentElement] outerHTML]; if (nil == htmlString) return NO; - NSError *error = nil; - NSXMLDocument *doc = [[NSXMLDocument alloc] initWithXMLString:htmlString - options:NSXMLDocumentTidyHTML error:&error]; - [doc autorelease]; - - if(doc == nil) + if(xmlDocument == nil) return NO; NSString *containsCitationPath = @".//*[contains(concat(' ', normalize-space(@class), ' '),' hcite ')]"; - - return [[[doc rootElement] nodesForXPath:containsCitationPath error:&error] count] > 0; + NSError *error = nil; + return [[[xmlDocument rootElement] nodesForXPath:containsCitationPath error:&error] count] > 0; } -+ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url error:(NSError **)outError{ - ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url error:(NSError **)outError{ + NSMutableArray *items = [NSMutableArray arrayWithCapacity:0]; - NSString *htmlString = [(id)[domDocument documentElement] outerHTML]; - if (nil == htmlString) - return nil; - NSError *error = nil; - NSXMLDocument *doc = [[NSXMLDocument alloc] initWithXMLString:htmlString - options:NSXMLDocumentTidyHTML error:&error]; - [doc autorelease]; - - if(doc == nil){ - if (outError) *outError = error; - return nil; - } - NSString *containsCitationPath = @".//*[contains(concat(' ', normalize-space(@class), ' '),' hcite ')]"; - - NSArray *mainNodes = [[doc rootElement] nodesForXPath:containsCitationPath + + NSError *error = nil; + + NSArray *mainNodes = [[xmlDocument rootElement] nodesForXPath:containsCitationPath error:&error]; unsigned int i, count = [mainNodes count]; @@ -386,81 +362,3 @@ @end - -@implementation NSXMLNode (BDSKExtensions) - -- (NSString *)stringValueOfAttribute:(NSString *)attrName{ - NSError *err = nil; - NSString *path = [NSString stringWithFormat:@"./@%@", attrName]; - NSArray *atts = [self nodesForXPath:path error:&err]; - if ([atts count] == 0) return nil; - return [[atts objectAtIndex:0] stringValue]; -} - -- (NSArray *)descendantOrSelfNodesWithClassName:(NSString *)className error:(NSError **)err{ - NSString *path = [NSString stringWithFormat:@".//*[contains(concat(' ', normalize-space(@class), ' '), ' %@ ')]", className]; - NSArray *ar = [self nodesForXPath:path error:err]; - return ar; -} - -- (BOOL)hasParentWithClassName:(NSString *)class{ - - NSXMLNode *parent = [self parent]; - - do{ - if([parent kind] != NSXMLElementKind) return NO; // handles root node - - NSArray *parentClassNames = [parent classNames]; - - if ([parentClassNames containsObject:class]){ - return YES; - } - - }while(parent = [parent parent]); - - return NO; -} - - -- (NSArray *)classNames{ - - if([self kind] != NSXMLElementKind) [NSException raise:NSInvalidArgumentException format:@"wrong node kind"]; - - NSMutableArray *a = [NSMutableArray arrayWithCapacity:0]; - - NSError *err = nil; - - NSArray *classNodes = [self nodesForXPath:@"@class" - error:&err]; - if([classNodes count] == 0) - return a; - - NSAssert ([classNodes count] == 1, @"too many nodes in classNodes"); - - NSXMLNode *classNode = [classNodes objectAtIndex:0]; - - [a addObjectsFromArray:[[classNode stringValue] componentsSeparatedByString:@" "]]; - - return a; -} - - -- (NSString *)fullStringValueIfABBR{ - NSError *err; - if([self kind] != NSXMLElementKind) [NSException raise:NSInvalidArgumentException format:@"wrong node kind"]; - - if([[self name] isEqualToString:@"abbr"]){ - //todo: will need more robust comparison for namespaced node titles. - - // return value of title attribute instead - NSArray *titleNodes = [self nodesForXPath:@"@title" - error:&err]; - if([titleNodes count] > 0){ - return [[titleNodes objectAtIndex:0] stringValue]; - } - } - - return [self stringValue]; -} - -@end \ No newline at end of file Modified: trunk/bibdesk/BDSKWebParser.h =================================================================== --- trunk/bibdesk/BDSKWebParser.h 2007-09-26 19:12:04 UTC (rev 11152) +++ trunk/bibdesk/BDSKWebParser.h 2007-09-26 19:13:21 UTC (rev 11153) @@ -11,17 +11,25 @@ enum { BDSKUnknownWebType = -1, - BDSKHCiteWebType + BDSKHCiteWebType, + BDSKCiteULikeWebType, + BDSKACMDLWebType }; @interface BDSKWebParser : NSObject -+ (int)webTypeOfDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url; +// IMPORTANT NOTE: +// Use this method as the main interface to this class. It will build the XMLDocument and figure out the type for you: ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url error:(NSError **)outError; -+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url ofType:(int)webType; -+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url; -+ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url ofType:(int)webType error:(NSError **)outError; -+ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url error:(NSError **)outError; ++ (int)webTypeOfDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url; ++ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url ofType:(int)webType; ++ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url; + ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url ofType:(int)webType error:(NSError **)outError; ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url error:(NSError **)outError; + + @end Modified: trunk/bibdesk/BDSKWebParser.m =================================================================== --- trunk/bibdesk/BDSKWebParser.m 2007-09-26 19:12:04 UTC (rev 11152) +++ trunk/bibdesk/BDSKWebParser.m 2007-09-26 19:13:21 UTC (rev 11153) @@ -9,14 +9,21 @@ #import "BDSKWebParser.h" #import <OmniBase/OBUtilities.h> #import "BDSKHCiteParser.h" +#import "BDSKCiteULikeParser.h" +#import "BDSKACMDLParser.h" - @implementation BDSKWebParser static Class webParserClassForType(int stringType) { Class parserClass = Nil; switch(stringType){ + case BDSKACMDLWebType: + parserClass = [BDSKACMDLParser class]; + break; + case BDSKCiteULikeWebType: + parserClass = [BDSKCiteULikeParser class]; + break; case BDSKHCiteWebType: parserClass = [BDSKHCiteParser class]; break; @@ -26,32 +33,64 @@ return parserClass; } -+ (int)webTypeOfDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{ - if([BDSKHCiteParser canParseDocument:domDocument fromURL:url]) ++ (int)webTypeOfDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{ + if([BDSKCiteULikeParser canParseDocument:domDocument xmlDocument:xmlDocument fromURL:url]) + return BDSKCiteULikeWebType; + if([BDSKACMDLParser canParseDocument:domDocument xmlDocument:xmlDocument fromURL:url]) + return BDSKACMDLWebType; + if([BDSKHCiteParser canParseDocument:domDocument xmlDocument:xmlDocument fromURL:url]) return BDSKHCiteWebType; return BDSKUnknownWebType; } -+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url ofType:(int)webType{ ++ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url ofType:(int)webType{ Class parserClass = webParserClassForType(webType); - return parserClass != Nil ? [parserClass canParseDocument:domDocument fromURL:url] : NO; + return parserClass != Nil ? [parserClass canParseDocument:domDocument xmlDocument:xmlDocument fromURL:url] : NO; } -+ (BOOL)canParseDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url{ ++ (BOOL)canParseDocument:(DOMDocument *)domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url{ return NO; } -+ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url ofType:(int)webType error:(NSError **)outError{ ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument + xmlDocument:(NSXMLDocument *)xmlDocument + fromURL:(NSURL *)url + ofType:(int)webType error:(NSError **)outError{ + Class parserClass = Nil; if (webType == BDSKUnknownWebType) - webType = [self webTypeOfDocument:domDocument fromURL:url]; + webType = [self webTypeOfDocument:domDocument xmlDocument:xmlDocument fromURL:url]; + parserClass = webParserClassForType(webType); - return [parserClass itemsFromDocument:domDocument fromURL:url error:outError]; + + return [parserClass itemsFromDocument:domDocument xmlDocument:xmlDocument fromURL:url error:outError]; } + (NSArray *)itemsFromDocument:(DOMDocument *)domDocument fromURL:(NSURL *)url error:(NSError **)outError{ + NSError *error = nil; + + NSString *htmlString = [(id)[domDocument documentElement] outerHTML]; + if (nil == htmlString) + return nil; + + NSXMLDocument *xmlDoc = [[NSXMLDocument alloc] initWithXMLString:htmlString + options:NSXMLDocumentTidyHTML error:&error]; + [xmlDoc autorelease]; + if(xmlDoc == nil){ + if(outError) *outError = error; + return nil; + } + + return [self itemsFromDocument:domDocument xmlDocument:xmlDoc fromURL:url error:outError]; +} + ++ (NSArray *)itemsFromDocument:(DOMDocument *)domDocument + xmlDocument:(NSXMLDocument *)xmlDocument + fromURL:(NSURL *)url + error:(NSError **)outError{ + if([self class] == [BDSKWebParser class]){ - return [self itemsFromDocument:domDocument fromURL:(NSURL *)url ofType:BDSKUnknownWebType error:outError]; + return [self itemsFromDocument:domDocument xmlDocument:(NSXMLDocument *)xmlDocument fromURL:(NSURL *)url ofType:BDSKUnknownWebType error:outError]; }else{ OBRequestConcreteImplementation(self, _cmd); return nil; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |