From: SVN c. m. f. t. SWORD-A. p. <swo...@li...> - 2012-01-05 17:32:26
|
Revision: 417 http://sword-app.svn.sourceforge.net/sword-app/?rev=417&view=rev Author: richard-jones Date: 2012-01-05 17:32:17 +0000 (Thu, 05 Jan 2012) Log Message: ----------- rename sss.py to sss-1.0.py so that we can keep it around for back compat testing and also as a fully functioning sword server in a single file Added Paths: ----------- sss/branches/sss-2/sss/sss-1.0.py Removed Paths: ------------- sss/branches/sss-2/sss/sss.py Copied: sss/branches/sss-2/sss/sss-1.0.py (from rev 416, sss/branches/sss-2/sss/sss.py) =================================================================== --- sss/branches/sss-2/sss/sss-1.0.py (rev 0) +++ sss/branches/sss-2/sss/sss-1.0.py 2012-01-05 17:32:17 UTC (rev 417) @@ -0,0 +1,3477 @@ +""" SSS - Simple SWORD Server """ + +__version__ = "2.0" +__author__ = ["Richard Jones <ri...@co...>"] +__license__ = "bsd" + +import web, uuid, os, re, base64, hashlib, urllib, sys, logging, logging.config +from lxml import etree +from datetime import datetime +from zipfile import ZipFile +from web.wsgiserver import CherryPyWSGIServer + +# SERVER CONFIGURATION +############################################################################# +# Use this class to specify all the bits of configuration that will be used +# in the sword server + +# Whether to run using SSL. This uses a default self-signed certificate. Change the paths to +# use an alternative set of keys +ssl = False +if ssl: + CherryPyWSGIServer.ssl_certificate = "./ssl/cacert.pem" + CherryPyWSGIServer.ssl_private_key = "./ssl/privkey.pem" + +class SSSLogger(object): + def __init__(self): + self.logging_config = "./sss_logging.conf" # default + self.basic_config = """[loggers] +keys=root + +[handlers] +keys=consoleHandler + +[formatters] +keys=basicFormatting + +[logger_root] +level=INFO +handlers=consoleHandler + +[handler_consoleHandler] +class=StreamHandler +level=DEBUG +formatter=basicFormatting +args=(sys.stdout,) + +[formatter_basicFormatting] +format=%(asctime)s - %(name)s - %(levelname)s - %(message)s +""" + + if not os.path.isfile(self.logging_config): + self.create_logging_config(self.logging_config) + + logging.config.fileConfig(self.logging_config) + + def create_logging_config(self, pathtologgingconf): + fn = open(pathtologgingconf, "w") + fn.write(self.basic_config) + fn.close() + + def getLogger(self): + return logging.getLogger(__name__) + +class Configuration(object): + def __init__(self): + # The base url of the webservice where SSS is deployed + self.base_url = "http://localhost:%s/" % (sys.argv[1] if len(sys.argv) > 1 else '8080') + + # The number of collections that SSS will create and give to users to deposit content into + self.num_collections = 10 + + # The directory where the deposited content should be stored + self.store_dir = os.path.join(os.getcwd(), "store") + + # explicitly set the sword version, so if you're testing validation of + # service documents you can "break" it. + self.sword_version = "2.0" # SWORD 2.0! Oh yes! + + # user details; the user/password pair should be used for HTTP Basic Authentication, and the obo is the user + # to use for On-Behalf-Of requests. Set authenticate=False if you want to test the server without caring + # about authentication, set mediation=False if you want to test the server's errors on invalid attempts at + # mediation + self.authenticate = True + self.user = "sword" + self.password = "sword" + + self.mediation = True + self.obo = "obo" + + # What media ranges should the app:accept element in the Service Document support + self.app_accept = ["*/*"] + self.multipart_accept = ["*/*"] + self.accept_nothing = False + + # use these app_accept and multipart_accept values to create an invalid Service Document + #self.app_accept = None + #self.multipart_accept = None + + # should we provide sub-service urls + self.use_sub = True + + # What packaging formats should the sword:acceptPackaging element in the Service Document support + self.sword_accept_package = [ + "http://purl.org/net/sword/package/SimpleZip", + "http://purl.org/net/sword/package/Binary", + "http://purl.org/net/sword/package/METSDSpaceSIP" + ] + + # maximum upload size to be allowed, in bytes (this default is 16Mb) + self.max_upload_size = 16777216 + #self.max_upload_size = 0 # used to generate errors + + # list of package formats that SSS can provide when retrieving the Media Resource + self.sword_disseminate_package = [ + "http://purl.org/net/sword/package/SimpleZip" + ] + + # Supported package format disseminators; for the content type (dictionary key), the associated + # class will be used to package the content for dissemination + self.package_disseminators = { + ContentType("application", "zip", None, "http://purl.org/net/sword/package/SimpleZip").media_format() : DefaultDisseminator, + ContentType("application", "zip").media_format() : DefaultDisseminator, + ContentType("application", "atom+xml", "type=feed").media_format() : FeedDisseminator + } + + # Supported package format ingesters; for the Packaging header (dictionary key), the associated class will + # be used to unpackage deposited content + self.package_ingesters = { + "http://purl.org/net/sword/package/Binary" : BinaryIngester, + "http://purl.org/net/sword/package/SimpleZip" : SimpleZipIngester, + "http://purl.org/net/sword/package/METSDSpaceSIP" : METSDSpaceIngester + } + + self.entry_ingester = DefaultEntryIngester + + # supply this header in the Packaging header to generate a http://purl.org/net/sword/error/ErrorContent + # sword error + self.error_content_package = "http://purl.org/net/sword/package/error" + + # we can turn off updates and deletes in order to examine the behaviour of Method Not Allowed errors + self.allow_update = True + self.allow_delete = True + + # we can turn off deposit receipts, which is allowed by the specification + self.return_deposit_receipt = True + + # generate a UUID to represent this request, for logging purposes + self.rid = str(uuid.uuid4()) + +class CherryPyConfiguration(Configuration): + def __init__(self): + Configuration.__init__(self) + +class ApacheConfiguration(Configuration): + def __init__(self): + Configuration.__init__(self) + self.base_url = 'http://localhost/sss/' + self.store_dir = '/Users/richard/tmp/store' + self.authenticate = False + +class Namespaces(object): + """ + This class encapsulates all the namespace declarations that we will need + """ + def __init__(self): + # AtomPub namespace and lxml format + self.APP_NS = "http://www.w3.org/2007/app" + self.APP = "{%s}" % self.APP_NS + + # Atom namespace and lxml format + self.ATOM_NS = "http://www.w3.org/2005/Atom" + self.ATOM = "{%s}" % self.ATOM_NS + + # SWORD namespace and lxml format + self.SWORD_NS = "http://purl.org/net/sword/terms/" + self.SWORD = "{%s}" % self.SWORD_NS + + # Dublin Core namespace and lxml format + self.DC_NS = "http://purl.org/dc/terms/" + self.DC = "{%s}" % self.DC_NS + + # RDF namespace and lxml format + self.RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + self.RDF = "{%s}" % self.RDF_NS + + # ORE namespace and lxml format + self.ORE_NS = "http://www.openarchives.org/ore/terms/" + self.ORE = "{%s}" % self.ORE_NS + + # ORE ATOM + self.ORE_ATOM_NS = "http://www.openarchives.org/ore/atom/" + self.ORE_ATOM = "{%s}" % self.ORE_ATOM_NS + +# SWORD URLS +############################################################################# +# Define our URL mappings for the web service. We are using URL parts immediately after the base of the service +# which reflect the short-hand terms used in the SWORD documentation (sd-uri, col-uri, cont-uri, em-uri and edit-uri +# +urls = ( + '/', 'WebUI', # Home page, with an intro and some handy links + '/sd-uri', 'ServiceDocument', # From which to retrieve the service document + '/sd-uri/(.+)', 'ServiceDocument', # for sub-service documents + '/col-uri/(.+)', 'Collection', # Representing a Collection as listed in the service document + '/cont-uri/(.+)', 'MediaResourceContent', # The URI used in atom:content@src + '/em-uri/(.+)', 'MediaResource', # The URI used in atom:link@rel=edit-media + '/edit-uri/(.+)', 'Container', # The URI used in atom:link@rel=edit + '/state-uri/(.+)', 'StatementHandler', # The URI used in atom:link@rel=sword:statement + + '/agg-uri/(.+)', 'Aggregation', # The URI used to represent the ORE aggregation + + # NOT PART OF SWORD: sword says nothing about how components of the item are identified, but here we use the + # PART-URI prefix to denote parts of the object in the server + '/part-uri/(.+)', 'Part', + + # NOT PART OF SWORD: for convenience to supply HTML pages of deposited content + '/html/(.+)', 'WebUI' +) + +# HTTP HANDLERS +############################################################################# +# Define a set of handlers for the various URLs defined above to be used by web.py + +class SwordHttpHandler(object): + def authenticate(self, web): + auth = web.ctx.env.get('HTTP_AUTHORIZATION') + obo = web.ctx.env.get("HTTP_ON_BEHALF_OF") + + cfg = global_configuration + + # we may have turned authentication off for development purposes + if not cfg.authenticate: + ssslog.info("Authentication is turned OFF") + return Auth(cfg.user) + else: + ssslog.info("Authentication required") + + # if we want to authenticate, but there is no auth string then bounce with a 401 (realm SSS) + if auth is None: + ssslog.info("No authentication credentials supplied, requesting authentication") + web.header('WWW-Authenticate','Basic realm="SSS"') + web.ctx.status = '401 Unauthorized' + return Auth() + else: + # assuming Basic authentication, get the username and password + auth = re.sub('^Basic ','',auth) + username, password = base64.decodestring(auth).split(':') + ssslog.info("Authentication details: " + str(username) + ":" + str(password) + "; On Behalf Of: " + str(obo)) + + # if the username and password don't match, bounce the user with a 401 + # meanwhile if the obo header has been passed but doesn't match the config value also bounce + # witha 401 (I know this is an odd looking if/else but it's for clarity of what's going on + if username != cfg.user or password != cfg.password: + ssslog.info("Authentication Failed; returning 401") + web.ctx.status = '401 Unauthorized' + return Auth() + elif obo is not None and obo != cfg.obo: + ssslog.info("Authentication Failed with Target Owner Unknown") + # we throw a sword error for TargetOwnerUnknown + return Auth(cfg.user, obo, target_owner_unknown=True) + + user = cfg.user + if obo is not None: + return Auth(user, obo) + return Auth(user) + +class ServiceDocument(SwordHttpHandler): + """ + Handle all requests for Service documents (requests to SD-URI) + """ + def GET(self, sub=None): + """ GET the service document - returns an XML document """ + ssslog.debug("GET on Service Document; Incoming HTTP headers: " + str(web.ctx.environ)) + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on (we don't care who authenticated) + ss = SWORDServer() + web.header("Content-Type", "text/xml") + use_sub = global_configuration.use_sub if sub is None else False + return ss.service_document(use_sub) + +class Collection(SwordHttpHandler): + """ + Handle all requests to SWORD/ATOM Collections (these are the collections listed in the Service Document) - Col-URI + """ + def GET(self, collection): + """ + GET a representation of the collection in XML + Args: + - collection: The ID of the collection as specified in the requested URL + Returns an XML document with some metadata about the collection and the contents of that collection + """ + ssslog.debug("GET on Collection (list collection contents); Incoming HTTP headers: " + str(web.ctx.environ)) + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on (we don't care who authenticated) + ss = SWORDServer() + web.header("Content-Type", "text/xml") + return ss.list_collection(collection) + + def POST(self, collection): + """ + POST either an Atom Multipart request, or a simple package into the specified collection + Args: + - collection: The ID of the collection as specified in the requested URL + Returns a Deposit Receipt + """ + ssslog.debug("POST to Collection (create new item); Incoming HTTP headers: " + str(web.ctx.environ)) + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_deposit_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # take the HTTP request and extract a Deposit object from it + deposit = spec.get_deposit(web, auth) + if deposit.too_large: + error = ss.sword_error(spec.error_max_upload_size_exceeded, "Your deposit exceeds the maximum upload size limit") + web.header("Content-Type", "text/xml") + web.ctx.status = "413 Request Entity Too Large" + return error + result = ss.deposit_new(collection, deposit) + + if result is None: + return web.notfound() + + cfg = global_configuration + + # created, accepted, or error + if result.created: + print cfg.rid + " Item created" + web.header("Content-Type", "application/atom+xml;type=entry") + web.header("Location", result.location) + web.ctx.status = "201 Created" + if cfg.return_deposit_receipt: + print cfg.rid + " Returning deposit receipt" + return result.receipt + else: + print cfg.rid + " Omitting deposit receipt" + return + else: + print cfg.rid + " Returning Error" + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + +class MediaResourceContent(SwordHttpHandler): + """ + Class to represent the content of the media resource. This is the object which appears under atom:content@src, not + the EM-URI. It has its own class handler because it is a distinct resource, which does not necessarily resolve to + the same location as the EM-URI. See the Atom and SWORD specs for more details. + """ + def GET(self, id): + """ + GET the media resource content in the requested format (web request will include content negotiation via + Accept header) + Args: + - id: the ID of the object in the store + Returns the content in the requested format + """ + + ssslog.debug("GET on MediaResourceContent; Incoming HTTP headers: " + str(web.ctx.environ)) + + # check to see if we're after the .atom version of the content + atom = False + if id.endswith(".atom"): + id = id[:-5] + atom = True + + # NOTE: this method is not authenticated - we imagine sharing this URL with end-users who will just want + # to retrieve the content. It's only for the purposes of example, anyway + ss = SWORDServer() + spec = SWORDSpec() + + # first thing we need to do is check that there is an object to return, because otherwise we may throw a + # 415 Unsupported Media Type without looking first to see if there is even any media to content negotiate for + # which would be weird from a client perspective + if not ss.exists(id): + return web.notfound() + + content_type = None + if not atom: + # do some content negotiation + cn = ContentNegotiator() + + # if no Accept header, then we will get this back + cn.default_type = "application" + cn.default_subtype = "zip" + cn.default_packaging = None + + # The list of acceptable formats (in order of preference). + # FIXME: ultimately to replace this with the negotiator + cn.acceptable = [ + ContentType("application", "zip", None, "http://purl.org/net/sword/package/SimpleZip"), + ContentType("application", "zip"), + ContentType("application", "atom+xml", "type=feed"), + ContentType("text", "html") + ] + + # do the negotiation + content_type = cn.negotiate(web.ctx.environ) + else: + content_type = ContentType("application", "atom+xml", "type=feed") + + # did we successfully negotiate a content type? + if content_type is None: + error = ss.sword_error(spec.error_content_uri, "Requsted Accept-Packaging is not supported by this server") + web.header("Content-Type", "text/xml") + web.ctx.status = "406 Not Acceptable" + return error + + # if we did, we can get hold of the media resource + media_resource = ss.get_media_resource(id, content_type) + + # either send the client a redirect, or stream the content out + if media_resource.redirect: + return web.found(media_resource.url) + else: + web.header("Content-Type", content_type.mimetype()) + if media_resource.packaging is not None: + web.header("Packaging", media_resource.packaging) + f = open(media_resource.filepath, "r") + web.ctx.status = "200 OK" + return f.read() + +class MediaResource(MediaResourceContent): + """ + Class to represent the media resource itself (EM-URI). This extends from the MediaResourceContent class to take advantage + of the GET method available there. In a real implementation of AtomPub/SWORD the MediaResource and the + MediaResourceContent are allowed to be separate entities, which can behave differently (see the specs for more + details). For the purposes of SSS, we are treating them the same for convenience. + """ + def PUT(self, id): + """ + PUT a new package onto the object identified by the supplied id + Args: + - id: the ID of the media resource as specified in the URL + Returns a Deposit Receipt + """ + ssslog.debug("PUT on Media Resource (replace); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if update is allowed + cfg = global_configuration + if not cfg.allow_update: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Update operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request (note that multipart requests are not permitted in this method) + invalid = spec.validate_deposit_request(web, allow_multipart=False) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # next, before processing the request, let's check that the id is valid, and if not 404 the client + if not ss.exists(id): + return web.notfound() + + # get a deposit object. The PUT operation only supports a single binary deposit, not an Atom Multipart one + # so if the deposit object has an atom part we should return an error + deposit = spec.get_deposit(web, auth) + if deposit.too_large: + error = ss.sword_error(spec.error_max_upload_size_exceeded, "Your deposit exceeds the maximum upload size limit") + web.header("Content-Type", "text/xml") + web.ctx.status = "413 Request Entity Too Large" + return error + + # now replace the content of the container + result = ss.replace(id, deposit) + + # created, accepted or error + if result.created: + ssslog.info("Content replaced") + web.ctx.status = "204 No Content" # notice that this is different from the POST as per AtomPub + return + else: + ssslog.info("Returning Error") + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + + def DELETE(self, id): + """ + DELETE the contents of an object in the store (but not the object's container), leaving behind an empty + container for further use + Args: + - id: the ID of the object to have its content removed as per the requested URI + Return a Deposit Receipt + """ + ssslog.debug("DELETE on Media Resource (remove content, leave container); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if delete is allowed + cfg = global_configuration + if not cfg.allow_delete: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Delete operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_delete_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # parse the delete request out of the HTTP request + delete = spec.get_delete(web.ctx.environ, auth) + + # next, before processing the request, let's check that the id is valid, and if not 404 the client + if not ss.exists(id): + return web.notfound() + + # carry out the delete + result = ss.delete_content(id, delete) + + # if there was an error, report it, otherwise return the deposit receipt + if result.error_code is not None: + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + else: + web.ctx.status = "204 No Content" # No Content + return + + def POST(self, id): + """ + POST a simple package into the specified media resource + Args: + - id: The ID of the media resource as specified in the requested URL + Returns a Deposit Receipt + """ + ssslog.debug("POST to Media Resource (add new file); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if update is allowed + cfg = global_configuration + if not cfg.allow_update: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Update operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_deposit_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # next, before processing the request, let's check that the id is valid, and if not 404 the client + if not ss.exists(id): + return web.notfound() + + # take the HTTP request and extract a Deposit object from it + deposit = spec.get_deposit(web, auth) + if deposit.too_large: + error = ss.sword_error(spec.error_max_upload_size_exceeded, "Your deposit exceeds the maximum upload size limit") + web.header("Content-Type", "text/xml") + web.ctx.status = "413 Request Entity Too Large" + return error + + result = ss.add_content(id, deposit) + + if result is None: + return web.notfound() + + cfg = global_configuration + + # created, accepted, or error + if result.created: + web.header("Content-Type", "application/atom+xml;type=entry") + web.header("Location", result.location) + web.ctx.status = "201 Created" + if cfg.return_deposit_receipt: + return result.receipt + else: + return + else: + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + +class Container(SwordHttpHandler): + """ + Class to deal with requests to the container, which is represented by the main Atom Entry document returned in + the deposit receipt (Edit-URI). + """ + def GET(self, id): + """ + GET a representation of the container in the appropriate (content negotiated) format as identified by + the supplied id + Args: + - id: The ID of the container as supplied in the request URL + Returns a representation of the container: SSS will return either the Atom Entry identical to the one supplied + as a deposit receipt or the pure RDF/XML Statement depending on the Accept header + """ + ssslog.debug("GET on Container (retrieve deposit receipt or statement); Incoming HTTP headers: " + str(web.ctx.environ)) + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on (we don't care who authenticated) + ss = SWORDServer() + + # first thing we need to do is check that there is an object to return, because otherwise we may throw a + # 415 Unsupported Media Type without looking first to see if there is even any media to content negotiate for + # which would be weird from a client perspective + if not ss.exists(id): + return web.notfound() + + # do some content negotiation + cn = ContentNegotiator() + + # if no Accept header, then we will get this back + cn.default_type = "application" + cn.default_subtype = "atom+xml" + cn.default_params = "type=entry" + cn.default_packaging = None + + # The list of acceptable formats (in order of preference). The tuples list the type and + # the parameters section respectively + cn.acceptable = [ + ContentType("application", "atom+xml", "type=entry"), + ContentType("application", "atom+xml", "type=feed"), + ContentType("application", "rdf+xml") + ] + + # do the negotiation + content_type = cn.negotiate(web.ctx.environ) + + # did we successfully negotiate a content type? + if content_type is None: + web.ctx.status = "415 Unsupported Media Type" + return + + # now actually get hold of the representation of the container and send it to the client + cont = ss.get_container(id, content_type) + return cont + + def PUT(self, id): + """ + PUT a new Entry over the existing entry, or a multipart request over + both the existing metadata and the existing content + """ + ssslog.debug("PUT on Container (replace); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if update is allowed + cfg = global_configuration + if not cfg.allow_update: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Update operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_deposit_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # take the HTTP request and extract a Deposit object from it + deposit = spec.get_deposit(web, auth) + if deposit.too_large: + error = ss.sword_error(spec.error_max_upload_size_exceeded, "Your deposit exceeds the maximum upload size limit") + web.header("Content-Type", "text/xml") + web.ctx.status = "413 Request Entity Too Large" + return error + result = ss.replace(id, deposit) + + # FIXME: this is no longer relevant + # take the HTTP request and extract a Deposit object from it + #deposit = spec.get_deposit(web, auth, atom_only=True) + #result = ss.update_metadata(id, deposit) + + if result is None: + return web.notfound() + + # created, accepted, or error + if result.created: + web.header("Location", result.location) + if cfg.return_deposit_receipt: + web.header("Content-Type", "application/atom+xml;type=entry") + web.ctx.status = "200 OK" + return result.receipt + else: + web.ctx.status = "204 No Content" + return + else: + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + + # NOTE: this POST action on the Container is represented in the specification + # by a POST to the SE-IRI (The SWORD Edit IRI), sections 6.7.2 and 6.7.3 and + # also to support completing unfinished deposits as per section 9.3 + def POST(self, id): + """ + POST some new content into the container identified by the supplied id, + or complete an existing deposit (using the In-Progress header) + Args: + - id: The ID of the container as contained in the URL + Returns a Deposit Receipt + """ + ssslog.debug("POST to Container (add new content and metadata); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if update is allowed + cfg = global_configuration + if not cfg.allow_update: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Update operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_deposit_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + # take the HTTP request and extract a Deposit object from it + deposit = spec.get_deposit(web, auth) + if deposit.too_large: + error = ss.sword_error(spec.error_max_upload_size_exceeded, "Your deposit exceeds the maximum upload size limit") + web.header("Content-Type", "text/xml") + web.ctx.status = "413 Request Entity Too Large" + return error + result = ss.deposit_existing(id, deposit) + + if result is None: + # we couldn't find the id + return web.notfound() + + # NOTE: spec says 201 Created for multipart and 200 Ok for metadata only + # we have implemented 200 OK across the board, in the understanding that + # in this case the spec is incorrect (correction need to be implemented + # asap) + + # created, accepted or error + if result.created: + web.header("Location", result.location) + web.ctx.status = "200 OK" + if cfg.return_deposit_receipt: + web.header("Content-Type", "application/atom+xml;type=entry") + return result.receipt + else: + return + else: + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + + def DELETE(self, id): + """ + DELETE the container (and everything in it) from the store, as identified by the supplied id + Args: + - id: the ID of the container + Returns nothing, as there is nothing to return (204 No Content) + """ + ssslog.debug("DELETE on Container (remove); Incoming HTTP headers: " + str(web.ctx.environ)) + + # find out if update is allowed + cfg = global_configuration + if not cfg.allow_delete: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_method_not_allowed_uri, "Delete operations not currently permitted") + web.header("Content-Type", "text/xml") + web.ctx.status = "405 Method Not Allowed" + return error + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on + ss = SWORDServer() + spec = SWORDSpec() + + # check the validity of the request + invalid = spec.validate_delete_request(web) + if invalid is not None: + error = ss.sword_error(spec.error_bad_request_uri, invalid) + web.header("Content-Type", "text/xml") + web.ctx.status = "400 Bad Request" + return error + + delete = spec.get_delete(web.ctx.environ, auth) + + # next, before processing the request, let's check that the id is valid, and if not 404 the client + if not ss.exists(id): + return web.notfound() + + # carry out the delete + result = ss.delete_container(id, delete) + + # if there was an error, report it, otherwise return the deposit receipt + if result.error_code is not None: + web.header("Content-Type", "text/xml") + web.ctx.status = result.error_code + return result.error + else: + web.ctx.status = "204 No Content" + return + +class StatementHandler(SwordHttpHandler): + def GET(self, id): + ssslog.debug("GET on Statement (retrieve); Incoming HTTP headers: " + str(web.ctx.environ)) + + # authenticate + auth = self.authenticate(web) + if not auth.success(): + if auth.target_owner_unknown: + spec = SWORDSpec() + ss = SWORDServer() + error = ss.sword_error(spec.error_target_owner_unknown_uri, auth.obo) + web.header("Content-Type", "text/xml") + web.ctx.status = "403 Forbidden" + return error + return + + # if we get here authentication was successful and we carry on (we don't care who authenticated) + ss = SWORDServer() + + # the get request will contain a suffix which is "rdf" or "atom" depending on + # the desired return type + content_type = None + if id.endswith("rdf"): + content_type = "application/rdf+xml" + id = id[:-4] + elif id.endswith("atom"): + content_type = "application/atom+xml;type=feed" + id = id[:-5] + + # first thing we need to do is check that there is an object to return, because otherwise we may throw a + # 415 Unsupported Media Type without looking first to see if there is even any media to content negotiate for + # which would be weird from a client perspective + if not ss.exists(id): + return web.notfound() + + # did we successfully negotiate a content type? + if content_type is None: + return web.notfound() + + # now actually get hold of the representation of the statement and send it to the client + cont = ss.get_statement(id, content_type) + return cont + +class Aggregation(SwordHttpHandler): + def GET(self, id): + # in this case we just redirect back to the Edit-URI with a 303 See Other + um = URIManager() + col, oid = um.interpret_oid(id) + edit_uri = um.edit_uri(col, oid) + web.ctx.status = "303 See Other" + web.header("Content-Location", edit_uri) + return + +class WebUI(SwordHttpHandler): + """ + Class to provide a basic web interface to the store for convenience + """ + def GET(self, id=None): + if id is not None: + if id.find("/") >= 0: + ip = ItemPage() + return ip.get_item_page(id) + else: + cp = CollectionPage() + return cp.get_collection_page(id) + else: + hp = HomePage() + return hp.get_home_page() + +class Part(SwordHttpHandler): + """ + Class to provide access to the component parts of the object on the server + """ + def GET(self, path): + ss = SWORDServer() + + # if we did, we can get hold of the media resource + fh = ss.get_part(path) + + if fh is None: + return web.notfound() + + web.header("Content-Type", "application/octet-stream") # we're not keeping track of content types + web.ctx.status = "200 OK" + return fh.read() + + def PUT(self, id): + # FIXME: the spec says that we should either support this or return + # 405 Method Not Allowed. + # This would be useful for DepositMO compliance, so we should consider + # implementing this when time permits + web.ctx.status = "405 Method Not Allowed" + return + + +# CONTENT NEGOTIATION +####################################################################### +# A sort of generic tool for carrying out content negotiation tasks with the web interface + +class ContentType(object): + """ + Class to represent a content type requested through content negotiation + """ + def __init__(self, type=None, subtype=None, params=None, packaging=None): + """ + Properties: + type - the main type of the content. e.g. in text/html, the type is "text" + subtype - the subtype of the content. e.g. in text/html the subtype is "html" + params - as per the mime specification, his represents the parameter extension to the type, e.g. with + application/atom+xml;type=entry, the params are "type=entry" + + So, for example: + application/atom+xml;type=entry => type="application", subtype="atom+xml", params="type=entry" + """ + self.type = type + self.subtype = subtype + self.params = params + self.packaging = packaging + + def from_mimetype(self, mimetype): + # mimetype is of the form <supertype>/<subtype>[;<params>] + parts = mimetype.split(";") + if len(parts) == 2: + self.type, self.subtype = parts[0].split("/", 1) + self.params = parts[1] + elif len(parts) == 1: + self.type, self.subtype = parts[0].split("/", 1) + + def mimetype(self): + """ + Turn the content type into its mimetype representation + """ + mt = self.type + "/" + self.subtype + if self.params is not None: + mt += ";" + self.params + return mt + + # NOTE: we only use this to construct a canonical form which includes the package to do comparisons over + def media_format(self): + mime = self.mimetype() + pack = "" + if self.packaging is not None: + pack = "(packaging=\"" + self.packaging + "\") " + mf = "(& (type=\"" + mime + "\") " + pack + ")" + return mf + + def matches(self, other, packaging_wildcard=False): + """ + Determine whether this ContentType and the supplied other ContentType are matches. This includes full equality + or whether the wildcards (*) which can be supplied for type or subtype properties are in place in either + partner in the match. + """ + tmatch = self.type == "*" or other.type == "*" or self.type == other.type + smatch = self.subtype == "*" or other.subtype == "*" or self.subtype == other.subtype + # FIXME: there is some ambiguity in mime as to whether the omission of the params part is the same as + # a wildcard. For the purposes of convenience we have assumed here that it is, otherwise a request for + # */* will not match any content type which has parameters + pmatch = self.params is None or other.params is None or self.params == other.params + + # A similar problem exists for packaging. We allow the user to tell us if packaging should be + # wildcard sensitive + packmatch = False + if packaging_wildcard: + packmatch = self.packaging is None or other.packaging is None or self.packaging == other.packaging + else: + packmatch = self.packaging == other.packaging + return tmatch and smatch and pmatch and packmatch + + def __eq__(self, other): + return self.media_format() == other.media_format() + + def __str__(self): + return self.media_format() + + def __repr__(self): + return str(self) + +class ContentNegotiator(object): + """ + Class to manage content negotiation. Given its input parameters it will provide a ContentType object which + the server can use to locate its resources + """ + def __init__(self): + """ + There are 4 parameters which must be set in order to start content negotiation + - acceptable - What ContentType objects are acceptable to return (in order of preference) + - default_type - If no Accept header is found use this type + - default_subtype - If no Accept header is found use this subtype + - default_params - If no Accept header is found use this subtype + """ + self.acceptable = [] + self.default_type = None + self.default_subtype = None + self.default_params = None + self.default_packaging = None + + def get_accept(self, dict): + """ + Get the Accept header out of the web.py HTTP dictionary. Return None if no accept header exists + """ + if dict.has_key("HTTP_ACCEPT"): + return dict["HTTP_ACCEPT"] + return None + + def get_packaging(self, dict): + if dict.has_key('HTTP_ACCEPT_PACKAGING'): + return dict['HTTP_ACCEPT_PACKAGING'] + return None + + def analyse_accept(self, accept, packaging=None): + # FIXME: we need to somehow handle q=0.0 in here and in other related methods + """ + Analyse the Accept header string from the HTTP headers and return a structured dictionary with each + content types grouped by their common q values, thus: + + dict = { + 1.0 : [<ContentType>, <ContentType>], + 0.8 : [<ContentType], + 0.5 : [<ContentType>, <ContentType>] + } + + This method will guarantee that ever content type has some q value associated with it, even if this was not + supplied in the original Accept header; it will be inferred based on the rules of content negotiation + """ + # accept headers are a list of content types and q values, in a comma separated list + parts = accept.split(",") + + # set up some registries for the coming analysis. unsorted will hold each part of the accept header following + # its analysis, but without respect to its position in the preferences list. highest_q and counter will be + # recorded during this first run so that we can use them to sort the list later + unsorted = [] + highest_q = 0.0 + counter = 0 + + # go through each possible content type and analyse it along with its q value + for part in parts: + # count the part number that we are working on, starting from 1 + counter += 1 + + # the components of the part can be "type;params;q" "type;params", "type;q" or just "type" + components = part.split(";") + + # the first part is always the type (see above comment) + type = components[0].strip() + + # create some default values for the other parts. If there is no params, we will use None, if there is + # no q we will use a negative number multiplied by the position in the list of this part. This allows us + # to later see the order in which the parts with no q value were listed, which is important + params = None + q = -1 * counter + + # There are then 3 possibilities remaining to check for: "type;q", "type;params" and "type;params;q" + # ("type" is already handled by the default cases set up above) + if len(components) == 2: + # "type;q" or "type;params" + if components[1].strip().startswith("q="): + # "type;q" + q = components[1].strip()[2:] # strip the "q=" from the start of the q value + # if the q value is the highest one we've seen so far, record it + if float(q) > highest_q: + highest_q = float(q) + else: + # "type;params" + params = components[1].strip() + elif len(components) == 3: + # "type;params;q" + params = components[1].strip() + q = components[1].strip()[2:] # strip the "q=" from the start of the q value + # if the q value is the highest one we've seen so far, record it + if float(q) > highest_q: + highest_q = float(q) + + # at the end of the analysis we have all of the components with or without their default values, so we + # just record the analysed version for the time being as a tuple in the unsorted array + unsorted.append((type, params, q)) + + # once we've finished the analysis we'll know what the highest explicitly requested q will be. This may leave + # us with a gap between 1.0 and the highest requested q, into which we will want to put the content types which + # did not have explicitly assigned q values. Here we calculate the size of that gap, so that we can use it + # later on in positioning those elements. Note that the gap may be 0.0. + q_range = 1.0 - highest_q + + # set up a dictionary to hold our sorted results. The dictionary will be keyed with the q value, and the + # value of each key will be an array of ContentType objects (in no particular order) + sorted = {} + + # go through the unsorted list + for (type, params, q) in unsorted: + # break the type into super and sub types for the ContentType constructor + supertype, subtype = type.split("/", 1) + if q > 0: + # if the q value is greater than 0 it was explicitly assigned in the Accept header and we can just place + # it into the sorted dictionary + self.insert(sorted, q, ContentType(supertype, subtype, params, packaging)) + else: + # otherwise, we have to calculate the q value using the following equation which creates a q value "qv" + # within "q_range" of 1.0 [the first part of the eqn] based on the fraction of the way through the total + # accept header list scaled by the q_range [the second part of the eqn] + qv = (1.0 - q_range) + (((-1 * q)/counter) * q_range) + self.insert(sorted, qv, ContentType(supertype, subtype, params, packaging)) + + # now we have a dictionary keyed by q value which we can return + return sorted + + def insert(self, d, q, v): + """ + Utility method: if dict d contains key q, then append value v to the array which is identified by that key + otherwise create a new key with the value of an array with a single value v + """ + if d.has_key(q): + d[q].append(v) + else: + d[q] = [v] + + def contains_match(self, source, target): + """ + Does the target list of ContentType objects contain a match for the supplied source + Args: + - source: A ContentType object which we want to see if it matches anything in the target + - target: A list of ContentType objects to try to match the source against + Returns the matching ContentTYpe from the target list, or None if no such match + """ + for ct in target: + if source.matches(ct): + # matches are symmetrical, so source.matches(ct) == ct.matches(source) so way round is irrelevant + # we return the target's content type, as this is considered the definitive list of allowed + # content types, while the source may contain wildcards + return ct + return None + + def get_acceptable(self, client, server): + """ + Take the client content negotiation requirements - as returned by analyse_accept() - and the server's + array of supported types (in order of preference) and determine the most acceptable format to return. + + This method always returns the client's most preferred format if the server supports it, irrespective of the + server's preference. If the client has no discernable preference between two formats (i.e. they have the same + q value) then the server's preference is taken into account. + + Returns a ContentType object represening the mutually acceptable content type, or None if no agreement could + be reached. + """ + + # get the client requirement keys sorted with the highest q first (the server is a list which should be + # in order of preference already) + ckeys = client.keys() + ckeys.sort(reverse=True) + + # the rule for determining what to return is that "the client's preference always wins", so we look for the + # highest q ranked item that the server is capable of returning. We only take into account the server's + # preference when the client has two equally weighted preferences - in that case we take the server's + # preferred content type + for q in ckeys: + # for each q in order starting at the highest + possibilities = client[q] + allowable = [] + for p in possibilities: + # for each content type with the same q value + + # find out if the possibility p matches anything in the server. This uses the ContentType's + # matches() method which will take into account wildcards, so content types like */* will match + # appropriately. We get back from this the concrete ContentType as specified by the server + # if there is a match, so we know the result contains no unintentional wildcards + match = self.contains_match(p, server) + if match is not None: + # if there is a match, register it + allowable.append(match) + + # we now know if there are 0, 1 or many allowable content types at this q value + if len(allowable) == 0: + # we didn't find anything, so keep looking at the next q value + continue + elif len(allowable) == 1: + # we found exactly one match, so this is our content type to use + return allowable[0] + else: + # we found multiple supported content types at this q value, so now we need to choose the server's + # preference + for i in range(len(server)): + # iterate through the server explicitly by numerical position + if server[i] in allowable: + # when we find our first content type in the allowable list, it is the highest ranked server content + # type that is allowable, so this is our type + return server[i] + + # we've got to here without returning anything, which means that the client and server can't come to + # an agreement on what content type they want and can deliver. There's nothing more we can do! + return None + + def negotiate(self, dict): + """ + Main method for carrying out content negotiation over the supplied HTTP headers dictionary. + Returns either the preferred ContentType as per the settings of the object, or None if no agreement could be + reached + """ + ssslog.debug("Fallback parameters are Accept: " + str(self.default_type) + "/" + str(self.default_subtype) + + ";" + str(self.default_params) + " and Accept-Packaging: " + str(self.default_packaging)) + + # get the accept header if available + accept = self.get_accept(dict) + packaging = self.get_packaging(dict) + ssslog.debug("Accept Header: " + str(accept)) + ssslog.debug("Packaging: "+ str(packaging)) + + if accept is None and pack... [truncated message content] |