[sword-app-changelog] SF.net SVN: sword-app:[416] sss/branches/sss-2/sss

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 416
          http://sword-app.svn.sourceforge.net/sword-app/?rev=416&view=rev
Author:   richard-jones
Date:     2012-01-05 17:30:49 +0000 (Thu, 05 Jan 2012)
Log Message:
-----------
first hack at dividing the original sss.py into multiple parts ready for refactoring and modularisation

Modified Paths:
--------------
    sss/branches/sss-2/sss/sss.py

Added Paths:
-----------
    sss/branches/sss-2/sss/config.py
    sss/branches/sss-2/sss/core.py
    sss/branches/sss-2/sss/ingesters_disseminators.py
    sss/branches/sss-2/sss/negotiator.py
    sss/branches/sss-2/sss/repository.py
    sss/branches/sss-2/sss/sss_logging.py
    sss/branches/sss-2/sss/webpy.py
    sss/branches/sss-2/sss/webui.py

Added: sss/branches/sss-2/sss/config.py
===================================================================

--- sss/branches/sss-2/sss/config.py	                        (rev 0)
+++ sss/branches/sss-2/sss/config.py	2012-01-05 17:30:49 UTC (rev 416)
@@ -0,0 +1,100 @@
+import os, uuid, sys
+from negotiator import ContentType
+from ingesters_disseminators import DefaultEntryIngester, DefaultDisseminator, FeedDisseminator, BinaryIngester, SimpleZipIngester, METSDSpaceIngester
+
+class Configuration(object):
+    def __init__(self):
+        # The base url of the webservice where SSS is deployed
+        self.base_url = "http://localhost:%s/" % (sys.argv[1] if len(sys.argv) > 1 else '8080')
+
+        # The number of collections that SSS will create and give to users to deposit content into
+        self.num_collections = 10
+
+        # The directory where the deposited content should be stored
+        self.store_dir = os.path.join(os.getcwd(), "store")
+
+        # explicitly set the sword version, so if you're testing validation of
+        # service documents you can "break" it.
+        self.sword_version = "2.0" # SWORD 2.0!  Oh yes!
+    
+        # user details; the user/password pair should be used for HTTP Basic Authentication, and the obo is the user
+        # to use for On-Behalf-Of requests.  Set authenticate=False if you want to test the server without caring
+        # about authentication, set mediation=False if you want to test the server's errors on invalid attempts at
+        # mediation
+        self.authenticate = True
+        self.user = "sword"
+        self.password = "sword"
+        
+        self.mediation = True
+        self.obo = "obo"
+
+        # What media ranges should the app:accept element in the Service Document support
+        self.app_accept = ["*/*"]
+        self.multipart_accept = ["*/*"]
+        self.accept_nothing = False
+        
+        # use these app_accept and multipart_accept values to create an invalid Service Document
+        #self.app_accept = None
+        #self.multipart_accept = None
+
+        # should we provide sub-service urls
+        self.use_sub = True
+
+        # What packaging formats should the sword:acceptPackaging element in the Service Document support
+        self.sword_accept_package = [
+                "http://purl.org/net/sword/package/SimpleZip",
+                "http://purl.org/net/sword/package/Binary",
+                "http://purl.org/net/sword/package/METSDSpaceSIP"
+            ]
+
+        # maximum upload size to be allowed, in bytes (this default is 16Mb)
+        self.max_upload_size = 16777216
+        #self.max_upload_size = 0 # used to generate errors
+        
+        # list of package formats that SSS can provide when retrieving the Media Resource
+        self.sword_disseminate_package = [
+            "http://purl.org/net/sword/package/SimpleZip"
+        ]
+
+        # Supported package format disseminators; for the content type (dictionary key), the associated
+        # class will be used to package the content for dissemination
+        self.package_disseminators = {
+                ContentType("application", "zip", None, "http://purl.org/net/sword/package/SimpleZip").media_format() : DefaultDisseminator,
+                ContentType("application", "zip").media_format() : DefaultDisseminator,
+                ContentType("application", "atom+xml", "type=feed").media_format() : FeedDisseminator
+            }
+
+        # Supported package format ingesters; for the Packaging header (dictionary key), the associated class will
+        # be used to unpackage deposited content
+        self.package_ingesters = {
+                "http://purl.org/net/sword/package/Binary" : BinaryIngester,
+                "http://purl.org/net/sword/package/SimpleZip" : SimpleZipIngester,
+                "http://purl.org/net/sword/package/METSDSpaceSIP" : METSDSpaceIngester
+            }
+            
+        self.entry_ingester = DefaultEntryIngester
+
+        # supply this header in the Packaging header to generate a http://purl.org/net/sword/error/ErrorContent
+        # sword error
+        self.error_content_package = "http://purl.org/net/sword/package/error"
+
+        # we can turn off updates and deletes in order to examine the behaviour of Method Not Allowed errors
+        self.allow_update = True
+        self.allow_delete = True
+
+        # we can turn off deposit receipts, which is allowed by the specification
+        self.return_deposit_receipt = True
+        
+        # generate a UUID to represent this request, for logging purposes
+        self.rid = str(uuid.uuid4())
+        
+class CherryPyConfiguration(Configuration):
+    def __init__(self):
+        Configuration.__init__(self)
+
+class ApacheConfiguration(Configuration):
+    def __init__(self):
+        Configuration.__init__(self)
+        self.base_url = 'http://localhost/sss/'
+        self.store_dir = '/Users/richard/tmp/store'
+        self.authenticate = False

Added: sss/branches/sss-2/sss/core.py
===================================================================
--- sss/branches/sss-2/sss/core.py	                        (rev 0)
+++ sss/branches/sss-2/sss/core.py	2012-01-05 17:30:49 UTC (rev 416)
@@ -0,0 +1,575 @@
+import web, os, base64
+from lxml import etree
+from sss_logging import SSSLogger
+from datetime import datetime
+
+# get the global logger
+sssl = SSSLogger()
+ssslog = sssl.getLogger()
+
+# create the global configuration
+from config import CherryPyConfiguration
+global_configuration = CherryPyConfiguration()
+
+# FIXME: SWORDSpec has a lot of webpy stuff in it; needs to be cleaned and
+# divided
+
+
+class Namespaces(object):
+    """
+    This class encapsulates all the namespace declarations that we will need
+    """
+    def __init__(self):
+        # AtomPub namespace and lxml format
+        self.APP_NS = "http://www.w3.org/2007/app"
+        self.APP = "{%s}" % self.APP_NS
+
+        # Atom namespace and lxml format
+        self.ATOM_NS = "http://www.w3.org/2005/Atom"
+        self.ATOM = "{%s}" % self.ATOM_NS
+
+        # SWORD namespace and lxml format
+        self.SWORD_NS = "http://purl.org/net/sword/terms/"
+        self.SWORD = "{%s}" % self.SWORD_NS
+
+        # Dublin Core namespace and lxml format
+        self.DC_NS = "http://purl.org/dc/terms/"
+        self.DC = "{%s}" % self.DC_NS
+
+        # RDF namespace and lxml format
+        self.RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+        self.RDF = "{%s}" % self.RDF_NS
+
+        # ORE namespace and lxml format
+        self.ORE_NS = "http://www.openarchives.org/ore/terms/"
+        self.ORE = "{%s}" % self.ORE_NS
+
+        # ORE ATOM
+        self.ORE_ATOM_NS = "http://www.openarchives.org/ore/atom/"
+        self.ORE_ATOM = "{%s}" % self.ORE_ATOM_NS
+
+# REQUEST/RESPONSE CLASSES
+#######################################################################
+# These classes are used as the glue between the web.py web interface layer and the underlying sword server, allowing
+# them to exchange messages agnostically to the interface
+
+class Auth(object):
+    def __init__(self, by=None, obo=None, target_owner_unknown=False):
+        self.by = by
+        self.obo = obo
+        self.target_owner_unknown = target_owner_unknown
+
+    def success(self):
+        return self.by is not None and not self.target_owner_unknown
+
+class SWORDRequest(object):
+    """
+    General class to represent any sword request (such as deposit or delete)
+    """
+    def __init__(self):
+        """
+        There are 4 HTTP sourced properties:
+        - on_behalf_of  - On-Behalf-Of in HTTP; the user being deposited on behalf of
+        - packaging     - Packaging in HTTP; the packaging format being used
+        - in_progress   - In-Progress in HTTP; whether the deposit is complete or not from a client perspective
+        - metadata_relevant - Metadata-Relevant; whether or not the deposit contains relevant metadata
+        """
+
+        self.on_behalf_of = None
+        self.packaging = "http://purl.org/net/sword/package/Binary" # if this isn't populated externally, use the default
+        self.in_progress = False
+        self.metadata_relevant = True # the server MAY assume that it is True
+        self.auth = None
+        self.content_md5 = None
+        self.slug = None
+
+    def set_by_header(self, key, value):
+        # FIXME: this is a webpy thing....
+        """
+        Convenience method to take a relevant HTTP header and its value and add it to this object.
+        e.g. set_by_header("On-Behalf-Of", "richard")  Notice that the format of the headers used
+        here is the web.py format which is all upper case, preceeding with HTTP_ with all - converted to _
+        (for some unknown reason)
+        """
+        ssslog.debug("Setting Header %s : %s" % (key, value))
+        if key == "HTTP_ON_BEHALF_OF":
+            self.on_behalf_of = value
+        elif key == "HTTP_PACKAGING" and value is not None:
+            self.packaging = value
+        elif key == "HTTP_IN_PROGRESS":
+            self.in_progress = (value.strip() == "true")
+        elif key == "HTTP_METADATA_RELEVANT":
+            self.metadata_relevant = (value.strip() == "true")
+        elif key == "HTTP_CONTENT_MD5":
+            self.content_md5 = value
+        elif key == "HTTP_SLUG":
+            self.slug = value
+
+class DepositRequest(SWORDRequest):
+    """
+    Class to represent a request to deposit some content onto the server
+    """
+    def __init__(self):
+        """
+        There are 3 content related properties:
+        - content   -   the incoming content file to be deposited
+        - atom      -   the incoming atom document to be deposited (may be None)
+        - filename  -   the desired name of the incoming content
+        """
+        SWORDRequest.__init__(self)
+
+        # content related
+        self.content_type = "application/octet-stream"
+        self.content = None
+        self.atom = None
+        self.filename = "unnamed.file"
+        self.too_large = False
+
+class DepositResponse(object):
+    """
+    Class to represent the response to a deposit request
+    """
+    def __init__(self):
+        """
+        Properties:
+        - created   - was the resource created on the server
+        - accepted  -   was the resource accepted by the server (but not yet created)
+        - error_code    -   if there was an error, what HTTP status code
+        - error     -   sword error document if relevant
+        - receipt   -   deposit receipt if successful deposit
+        - location  -   the Edit-URI which will be supplied to the client as the Location header in responses
+        """
+        self.created = False
+        self.accepted = False
+        self.error_code = None
+        self.error = None
+        self.receipt = None
+        self.location = None
+
+class MediaResourceResponse(object):
+    """
+    Class to represent the response to a request to retrieve the Media Resource
+    """
+    def __init__(self):
+        """
+        There are three properties:
+        redirect    -   boolean, does the client need to be redirected to another URL for the media resource
+        url         -   If redirect, then this is the URL to redirect the client to
+        filepath    -   If not redirect, then this is the path to the file that the server should serve
+        """
+        self.redirect = False
+        self.url = None
+        self.filepath = None
+        self.packaging = None
+
+class DeleteRequest(SWORDRequest):
+    """
+    Class Representing a request to delete either the content or the container itself.
+    """
+    def __init__(self):
+        """
+        The properties of this class are as per SWORDRequest
+        """
+        SWORDRequest.__init__(self)
+
+class DeleteResponse(object):
+    """
+    Class to represent the response to a request to delete the content or the container
+    """
+    def __init__(self):
+        """
+        There are 3 properties:
+        error_code  -   if there was an error, the http code associated
+        error       -   the sworderror if appropriate
+        receipt     -   if successful and a request for deleting content (not container) the deposit receipt
+        """
+        self.error_code = None
+        self.error = None
+        self.receipt = None
+        
+# Operational SWORD Classes
+#############################################################################
+# Classes which carry out the grunt work of the SSS
+
+class SWORDSpec(object):
+    """
+    Class which attempts to represent the specification itself.  Instead of being operational like the SWORDServer
+    class, it attempts to just be able to interpret the supplied http headers and content bodies and turn them into
+    the entities with which SWORD works.  The jury is out, in my mind, whether this class is a useful separation, but
+    for what it's worth, here it is ...
+    """
+    def __init__(self):
+        # FIXME: this is a webpy thing ...
+        # The HTTP headers that are part of the specification (from a web.py perspective - don't be fooled, these
+        # aren't the real HTTP header names - see the spec)
+        self.sword_headers = [
+            "HTTP_ON_BEHALF_OF", "HTTP_PACKAGING", "HTTP_IN_PROGRESS", "HTTP_METADATA_RELEVANT",
+            "HTTP_CONTENT_MD5", "HTTP_SLUG", "HTTP_ACCEPT_PACKAGING"
+        ]
+
+        self.error_content_uri = "http://purl.org/net/sword/error/ErrorContent"
+        self.error_checksum_mismatch_uri = "http://purl.org/net/sword/error/ErrorChecksumMismatch"
+        self.error_bad_request_uri = "http://purl.org/net/sword/error/ErrorBadRequest"
+        self.error_target_owner_unknown_uri = "http://purl.org/net/sword/error/TargetOwnerUnknown"
+        self.error_mediation_not_allowed_uri = "http://purl.org/net/sword/error/MediationNotAllowed"
+        self.error_method_not_allowed_uri = "http://purl.org/net/sword/error/MethodNotAllowed"
+        self.error_max_upload_size_exceeded = "http://purl.org/net/sword/error/MaxUploadSizeExceeded"
+
+    def validate_deposit_request(self, web, allow_multipart=True):
+        dict = web.ctx.environ
+
+        # get each of the allowed SWORD headers that can be validated and see if they do
+        ip = dict.get("HTTP_IN_PROGRESS")
+        if ip is not None and ip != "true" and ip != "false":
+            return "In-Progress must be 'true' or 'false'"
+
+        sm = dict.get("HTTP_METADATA_RELEVANT")
+        if sm is not None and sm != "true" and sm != "false":
+            return "Metadata-Relevant must be 'true' or 'false'"
+
+        # there must be both an "atom" and "payload" input or data in web.data()
+        webin = web.input()
+        if len(webin) != 2 and len(webin) > 0:
+            return "Multipart request does not contain exactly 2 parts"
+        if len(webin) >= 2 and not webin.has_key("atom") and not webin.has_key("payload"):
+            return "Multipart request must contain Content-Dispositions with names 'atom' and 'payload'"
+        if len(webin) > 0 and not allow_multipart:
+            return "Multipart request not permitted in this context"
+
+        # if we get to here then we have a valid multipart or no multipart
+        if len(webin) != 2: # if it is not multipart
+            if web.data() is None: # and there is no content
+                return "No content sent to the server"
+
+        # validates
+        return None
+
+    def validate_delete_request(self, web):
+        dict = web.ctx.environ
+
+        # get each of the allowed SWORD headers that can be validated and see if they do
+        ip = dict.get("HTTP_IN_PROGRESS")
+        if ip is not None and ip != "true" and ip != "false":
+            return "In-Progress must be 'true' or 'false'"
+
+        sm = dict.get("HTTP_METADATA_RELEVANT")
+        if sm is not None and sm != "true" and sm != "false":
+            return "Metadata-Relevant must be 'true' or 'false'"
+        
+        # validates
+        return None
+
+    def get_deposit(self, web, auth=None, atom_only=False):
+        # FIXME: this reads files into memory, and therefore does not scale
+        # FIXME: this does not deal with the Media Part headers on a multipart deposit
+        """
+        Take a web.py web object and extract from it the parameters and content required for a SWORD deposit.  This
+        includes determining whether this is an Atom Multipart request or not, and extracting the atom/payload where
+        appropriate.  It also includes extracting the HTTP headers which are relevant to deposit, and for those not
+        supplied providing their defaults in the returned DepositRequest object
+        """
+        d = DepositRequest()
+
+        # now go through the headers and populate the Deposit object
+        dict = web.ctx.environ
+
+        # get the headers that have been provided.  Any headers which have not been provided have default values
+        # supplied in the DepositRequest object's constructor
+        ssslog.debug("Incoming HTTP headers: " + str(dict))
+        empty_request = False
+        for head in dict.keys():
+            if head in self.sword_headers:
+                d.set_by_header(head, dict[head])
+            if head == "HTTP_CONTENT_DISPOSITION":
+                ssslog.debug("Reading Header %s : %s" % (head, dict[head]))
+                d.filename = self.extract_filename(dict[head])
+                ssslog.debug("Extracted filename %s from %s" % (d.filename, dict[head]))
+            if head == "CONTENT_TYPE":
+                ssslog.debug("Reading Header %s : %s" % (head, dict[head]))
+                ct = dict[head]
+                d.content_type = ct
+                if ct.startswith("application/atom+xml"):
+                    atom_only = True
+            if head == "CONTENT_LENGTH":
+                ssslog.debug("Reading Header %s : %s" % (head, dict[head]))
+                if dict[head] == "0":
+                    empty_request = True
+                cl = int(dict[head]) # content length as an integer
+                if cl > global_configuration.max_upload_size:
+                    d.too_large = True
+                    return d
+
+        # first we need to find out if this is a multipart or not
+        webin = web.input()
+        if len(webin) == 2:
+            ssslog.info("Received multipart deposit request")
+            d.atom = webin['atom']
+            # read the zip file from the base64 encoded string
+            d.content = base64.decodestring(webin['payload'])
+        elif not empty_request:
+            # if this wasn't a multipart, and isn't an empty request, then the data is in web.data().  This could be a binary deposit or
+            # an atom entry deposit - reply on the passed/determined argument to determine which
+            if atom_only:
+                ssslog.info("Received Entry deposit request")
+                d.atom = web.data()
+            else:
+                ssslog.info("Received Binary deposit request")
+                d.content = web.data()
+
+        # now just attach the authentication data and return
+        d.auth = auth
+        return d
+
+    def extract_filename(self, cd):
+        """ get the filename out of the content disposition header """
+        # ok, this is a bit obtuse, but it was fun making it.  It's not hard to understand really, if you break
+        # it down
+        return cd[cd.find("filename=") + len("filename="):cd.find(";", cd.find("filename=")) if cd.find(";", cd.find("filename=")) > -1 else len(cd)]
+
+    def get_delete(self, dict, auth=None):
+        """
+        Take a web.py web object and extract from it the parameters and content required for a SWORD delete request.
+        It mainly extracts the HTTP headers which are relevant to delete, and for those not supplied provides thier
+        defaults in the returned DeleteRequest object
+        """
+        d = DeleteRequest()
+
+        # we just want to parse out the headers that are relevant
+        for head in dict.keys():
+            if head in self.sword_headers:
+                d.set_by_header(head, dict[head])
+
+        # now just attach the authentication data and return
+        d.auth = auth
+        return d
+        
+class Statement(object):
+    """
+    Class representing the Statement; a description of the object as it appears on the server
+    """
+    def __init__(self):
+        """
+        The statement has 4 important properties:
+        - aggregation_uri   -   The URI of the aggregation in ORE terms
+        - rem_uri           -   The URI of the Resource Map in ORE terms
+        - original_deposits -   The list of original packages uploaded to the server (set with original_deposit())
+        - in_progress       -   Is the submission in progress (boolean)
+        - aggregates        -   the non-original deposit files associated with the item
+        """
+        self.aggregation_uri = None
+        self.rem_uri = None
+        self.original_deposits = []
+        self.aggregates = []
+        self.in_progress = False
+
+        # URIs to use for the two supported states in SSS
+        self.in_progress_uri = "http://purl.org/net/sword/state/in-progress"
+        self.archived_uri = "http://purl.org/net/sword/state/archived"
+
+        # the descriptions to associated with the two supported states in SSS
+        self.states = {
+            self.in_progress_uri : "The work is currently in progress, and has not passed to a reviewer",
+            self.archived_uri : "The work has passed through review and is now in the archive"
+        }
+
+        # Namespace map for XML serialisation
+        self.ns = Namespaces()
+        self.smap = {"rdf" : self.ns.RDF_NS, "ore" : self.ns.ORE_NS, "sword" : self.ns.SWORD_NS}
+        self.asmap = {"oreatom" : self.ns.ORE_ATOM_NS, "atom" : self.ns.ATOM_NS, "rdf" : self.ns.RDF_NS, "ore" : self.ns.ORE_NS, "sword" : self.ns.SWORD_NS}
+        self.fmap = {"atom" : self.ns.ATOM_NS, "sword" : self.ns.SWORD_NS}
+
+    def __str__(self):
+        return str(self.aggregation_uri) + ", " + str(self.rem_uri) + ", " + str(self.original_deposits)
+        
+    def original_deposit(self, uri, deposit_time, packaging_format, by, obo):
+        """
+        Add an original deposit to the statement
+        Args:
+        - uri:  The URI to the original deposit
+        - deposit_time:     When the deposit was originally made
+        - packaging_format:     The package format of the deposit, as supplied in the Packaging header
+        """
+        self.original_deposits.append((uri, deposit_time, packaging_format, by, obo))
+
+    def add_normalised_aggregations(self, aggs):
+        for agg in aggs:
+            if agg not in self.aggregates:
+                self.aggregates.append(agg)
+
+    def load(self, filepath):
+        """
+        Populate this statement object from the XML serialised statement to be found at the specified filepath
+        """
+        f = open(filepath, "r")
+        rdf = etree.fromstring(f.read())
+        
+        aggs = []
+        ods = []
+        for desc in rdf.getchildren():
+            packaging = None
+            depositedOn = None
+            deposit_by = None
+            deposit_obo = None
+            about = desc.get(self.ns.RDF + "about")
+            for element in desc.getchildren():
+                if element.tag == self.ns.ORE + "aggregates":
+                    resource = element.get(self.ns.RDF + "resource")
+                    aggs.append(resource)
+                if element.tag == self.ns.ORE + "describes":
+                    resource = element.get(self.ns.RDF + "resource")
+                    self.aggregation_uri = resource
+                    self.rem_uri = about
+                if element.tag == self.ns.SWORD + "state":
+                    state = element.get(self.ns.RDF + "resource")
+                    self.in_progress = state == "http://purl.org/net/sword/state/in-progress"
+                if element.tag == self.ns.SWORD + "packaging":
+                    packaging = element.get(self.ns.RDF + "resource")
+                if element.tag == self.ns.SWORD + "depositedOn":
+                    deposited = element.text
+                    depositedOn = datetime.strptime(deposited, "%Y-%m-%dT%H:%M:%SZ")
+                if element.tag == self.ns.SWORD + "depositedBy":
+                    deposit_by = element.text
+                if element.tag == self.ns.SWORD + "depositedOnBehalfOf":
+                    deposit_obo = element.text
+            if packaging is not None:
+                ods.append(about)
+                self.original_deposit(about, depositedOn, packaging, deposit_by, deposit_obo)
+        
+        # sort out the ordinary aggregations from the original deposits
+        self.aggregates = []
+        for agg in aggs:
+            if agg not in ods:
+                self.aggregates.append(agg)
+
+    def serialise(self):
+        """
+        Serialise this statement into an RDF/XML string
+        """
+        rdf = self.get_rdf_xml()
+        return etree.tostring(rdf, pretty_print=True)
+
+    def serialise_atom(self):
+        """
+        Serialise this statement to an Atom Feed document
+        """
+        # create the root atom feed element
+        feed = etree.Element(self.ns.ATOM + "feed", nsmap=self.fmap)
+
+        # create the sword:state term in the root of the feed
+        state_uri = self.in_progress_uri if self.in_progress else self.archived_uri
+        state = etree.SubElement(feed, self.ns.SWORD + "state")
+        state.set("href", state_uri)
+        meaning = etree.SubElement(state, self.ns.SWORD + "stateDescription")
+        meaning.text = self.states[state_uri]
+
+        # now do an entry for each original deposit
+        for (uri, datestamp, format_uri, by, obo) in self.original_deposits:
+            # FIXME: this is not an official atom entry yet
+            entry = etree.SubElement(feed, self.ns.ATOM + "entry")
+
+            category = etree.SubElement(entry, self.ns.ATOM + "category")
+            category.set("scheme", self.ns.SWORD_NS)
+            category.set("term", self.ns.SWORD_NS + "originalDeposit")
+            category.set("label", "Orignal Deposit")
+
+            # Media Resource Content URI (Cont-URI)
+            content = etree.SubElement(entry, self.ns.ATOM + "content")
+            content.set("type", "application/zip")
+            content.set("src", uri)
+
+            # add all the foreign markup
+
+            format = etree.SubElement(entry, self.ns.SWORD + "packaging")
+            format.text = format_uri
+
+            deposited = etree.SubElement(entry, self.ns.SWORD + "depositedOn")
+            deposited.text = datestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+            deposit_by = etree.SubElement(entry, self.ns.SWORD + "depositedBy")
+            deposit_by.text = by
+
+            if obo is not None:
+                deposit_obo = etree.SubElement(entry, self.ns.SWORD + "depositedOnBehalfOf")
+                deposit_obo.text = obo
+
+        # finally do an entry for all the ordinary aggregated resources
+        for uri in self.aggregates:
+            entry = etree.SubElement(feed, self.ns.ATOM + "entry")
+            content = etree.SubElement(entry, self.ns.ATOM + "content")
+            content.set("type", "application/octet-stream")
+            content.set("src", uri)
+
+        return etree.tostring(feed, pretty_print=True)
+
+    def get_rdf_xml(self):
+        """
+        Get an lxml Element object back representing this statement
+        """
+
+        # we want to create an ORE resource map, and also add on the sword specific bits for the original deposits and the state
+
+        # create the RDF root
+        rdf = etree.Element(self.ns.RDF + "RDF", nsmap=self.smap)
+
+        # in the RDF root create a Description for the REM which ore:describes the Aggregation
+        description1 = etree.SubElement(rdf, self.ns.RDF + "Description")
+        description1.set(self.ns.RDF + "about", self.rem_uri)
+        describes = etree.SubElement(description1, self.ns.ORE + "describes")
+        describes.set(self.ns.RDF + "resource", self.aggregation_uri)
+
+        # in the RDF root create a Description for the Aggregation which is ore:isDescribedBy the REM
+        description = etree.SubElement(rdf, self.ns.RDF + "Description")
+        description.set(self.ns.RDF + "about", self.aggregation_uri)
+        idb = etree.SubElement(description, self.ns.ORE + "isDescribedBy")
+        idb.set(self.ns.RDF + "resource", self.rem_uri)
+
+        # Create ore:aggreages for all ordinary aggregated files
+        for uri in self.aggregates:
+            aggregates = etree.SubElement(description, self.ns.ORE + "aggregates")
+            aggregates.set(self.ns.RDF + "resource", uri)
+
+        # Create ore:aggregates and sword:originalDeposit relations for the original deposits
+        for (uri, datestamp, format, by, obo) in self.original_deposits:
+            # standard ORE aggregates statement
+            aggregates = etree.SubElement(description, self.ns.ORE + "aggregates")
+            aggregates.set(self.ns.RDF + "resource", uri)
+
+            # assert that this is an original package
+            original = etree.SubElement(description, self.ns.SWORD + "originalDeposit")
+            original.set(self.ns.RDF + "resource", uri)
+
+        # now do the state information
+        state_uri = self.in_progress_uri if self.in_progress else self.archived_uri
+        state = etree.SubElement(description, self.ns.SWORD + "state")
+        state.set(self.ns.RDF + "resource", state_uri)
+
+        # Build the Description elements for the original deposits, with their sword:depositedOn and sword:packaging
+        # relations
+        for (uri, datestamp, format_uri, by, obo) in self.original_deposits:
+            desc = etree.SubElement(rdf, self.ns.RDF + "Description")
+            desc.set(self.ns.RDF + "about", uri)
+
+            format = etree.SubElement(desc, self.ns.SWORD + "packaging")
+            format.set(self.ns.RDF + "resource", format_uri)
+
+            deposited = etree.SubElement(desc, self.ns.SWORD + "depositedOn")
+            deposited.set(self.ns.RDF + "datatype", "http://www.w3.org/2001/XMLSchema#dateTime")
+            deposited.text = datestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+            deposit_by = etree.SubElement(desc, self.ns.SWORD + "depositedBy")
+            deposit_by.set(self.ns.RDF + "datatype", "http://www.w3.org/2001/XMLSchema#string")
+            deposit_by.text = by
+
+            if obo is not None:
+                deposit_obo = etree.SubElement(desc, self.ns.SWORD + "depositedOnBehalfOf")
+                deposit_obo.set(self.ns.RDF + "datatype", "http://www.w3.org/2001/XMLSchema#string")
+                deposit_obo.text = obo
+
+        # finally do a description for the state
+        sdesc = etree.SubElement(rdf, self.ns.RDF + "Description")
+        sdesc.set(self.ns.RDF + "about", state_uri)
+        meaning = etree.SubElement(sdesc, self.ns.SWORD + "stateDescription")
+        meaning.text = self.states[state_uri]
+
+        return rdf
+        

Added: sss/branches/sss-2/sss/ingesters_disseminators.py
===================================================================
--- sss/branches/sss-2/sss/ingesters_disseminators.py	                        (rev 0)
+++ sss/branches/sss-2/sss/ingesters_disseminators.py	2012-01-05 17:30:49 UTC (rev 416)
@@ -0,0 +1,281 @@
+from zipfile import ZipFile
+from lxml import etree
+
+# get the global logger
+from sss_logging import SSSLogger
+sssl = SSSLogger()
+ssslog = sssl.getLogger()
+
+# FIXME: this is a duplicate of the one in core.  We need to sort out our
+# circular imports *urgh*
+class Namespaces(object):
+    """
+    This class encapsulates all the namespace declarations that we will need
+    """
+    def __init__(self):
+        # AtomPub namespace and lxml format
+        self.APP_NS = "http://www.w3.org/2007/app"
+        self.APP = "{%s}" % self.APP_NS
+
+        # Atom namespace and lxml format
+        self.ATOM_NS = "http://www.w3.org/2005/Atom"
+        self.ATOM = "{%s}" % self.ATOM_NS
+
+        # SWORD namespace and lxml format
+        self.SWORD_NS = "http://purl.org/net/sword/terms/"
+        self.SWORD = "{%s}" % self.SWORD_NS
+
+        # Dublin Core namespace and lxml format
+        self.DC_NS = "http://purl.org/dc/terms/"
+        self.DC = "{%s}" % self.DC_NS
+
+        # RDF namespace and lxml format
+        self.RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+        self.RDF = "{%s}" % self.RDF_NS
+
+        # ORE namespace and lxml format
+        self.ORE_NS = "http://www.openarchives.org/ore/terms/"
+        self.ORE = "{%s}" % self.ORE_NS
+
+        # ORE ATOM
+        self.ORE_ATOM_NS = "http://www.openarchives.org/ore/atom/"
+        self.ORE_ATOM = "{%s}" % self.ORE_ATOM_NS
+
+class DisseminationPackager(object):
+    def __init__(self, dao, uri_manager):
+        pass
+        
+    """
+    Interface for all classes wishing to provide dissemination packaging services to the SSS
+    """
+    def package(self, collection, id):
+        """
+        Package up all the content in the specified container.  This method must be implemented by the extender.  The
+        method should create a package in the store directory, and then return to the caller the path to that file
+        so that it can be served back to the client
+        """
+        pass
+        
+    def get_uri(self):
+        return "http://purl.org/net/sword/package/SimpleZip"
+        
+class IngestPackager(object):
+    def __init__(self, dao):
+        pass
+        
+    def ingest(self, collection, id, filename, metadata_relevant):
+        """
+        The package with the supplied filename has been placed in the identified container.  This should be inspected
+        and unpackaged.  Implementations should note that there is optionally an atom document in the container which
+        may need to be inspected, and this can be retrieved from DAO.get_atom_content().  If the metadata_relevant
+        argument is False, implementations should not change the already extracted metadata in the container
+        """
+        return []
+   
+class DefaultDisseminator(DisseminationPackager):
+    """
+    Basic default packager, this just zips up everything except the SSS specific files in the container and stores
+    them in a file called sword-default-package.zip.
+    """
+    def __init__(self, dao, uri_manager):
+        self.dao = dao
+
+    def package(self, collection, id):
+        """ package up the content """
+
+        # get a list of the relevant content files
+        files = self.dao.list_content(collection, id, exclude=["sword-default-package.zip"])
+
+        # create a zip file with all the original zip files in it
+        zpath = self.dao.get_store_path(collection, id, "sword-default-package.zip")
+        z = ZipFile(zpath, "w")
+        for file in files:
+            z.write(self.dao.get_store_path(collection, id, file), file)
+        z.close()
+
+        # return the path to the package to the caller
+        return zpath
+
+class FeedDisseminator(DisseminationPackager):
+    def __init__(self, dao, uri_manager):
+        self.dao = dao
+        self.ns = Namespaces()
+        self.um = uri_manager
+        self.nsmap = {None: self.ns.ATOM_NS}
+
+    def package(self, collection, id):
+        """ create a feed representation of the package """
+        # get a list of the relevant content files
+        files = self.dao.list_content(collection, id, exclude=["mediaresource.feed.xml"])
+
+        # create a feed object with all the files as entries
+        feed = etree.Element(self.ns.ATOM + "feed", nsmap=self.nsmap)
+        
+        for file in files:
+            entry = etree.SubElement(feed, self.ns.ATOM + "entry")
+            
+            em = etree.SubElement(entry, self.ns.ATOM + "link")
+            em.set("rel", "edit-media")
+            em.set("href", self.um.part_uri(collection, id, file))
+            
+            edit = etree.SubElement(entry, self.ns.ATOM + "link")
+            edit.set("rel", "edit")
+            edit.set("href", self.um.part_uri(collection, id, file) + ".atom")
+            
+            content = etree.SubElement(entry, self.ns.ATOM + "link")
+            content.set("type", "application/octet-stream") # FIXME: we're not storing content types, so we don't know
+            content.set("src", self.um.part_uri(collection, id, file))
+        
+        fpath = self.dao.get_store_path(collection, id, "mediaresource.feed.xml")
+        f = open(fpath, "wb")
+        f.write(etree.tostring(feed, pretty_print=True))
+        f.close()
+        
+        return fpath
+        
+    def get_uri(self):
+        return None
+        
+class BinaryIngester(IngestPackager):
+    def __init__(self, dao):
+        pass
+        
+    def ingest(self, collection, id, filename, metadata_relevant):
+        # does nothing, we don't try to unpack binary deposits
+        return []
+
+class SimpleZipIngester(IngestPackager):
+    def __init__(self, dao):
+        self.dao = dao
+        self.ns = Namespaces()
+        
+    def ingest(self, collection, id, filename, metadata_relevant=True):
+        # First, let's just extract all the contents of the zip
+        z = ZipFile(self.dao.get_store_path(collection, id, filename))
+        
+        # keep track of the names of the files in the zip, as these will become
+        # our derived resources
+        derived_resources = z.namelist()
+        
+        # FIXME: what we do here is intrinsically insecure, but SSS is not a
+        # production service, so we're not worrying about it!
+        path = self.dao.get_store_path(collection, id)
+        z.extractall(path)
+        
+        # check for the atom document
+        atom = self.dao.get_atom_content(collection, id)
+        if atom is None:
+            # there's no metadata to extract so just leave it
+            return derived_resources
+
+        # if the metadata is not relevant, then we don't need to continue
+        if not metadata_relevant:
+            return derived_resources
+            
+        metadata = {}
+        entry = etree.fromstring(atom)
+
+        # go through each element in the atom entry and just process the ones we care about
+        # explicitly retrieve the atom based metadata first
+        for element in entry.getchildren():
+            if element.tag == self.ns.ATOM + "title":
+                self.a_insert(metadata, "title", element.text.strip())
+            if element.tag == self.ns.ATOM + "updated":
+                self.a_insert(metadata, "date", element.text.strip())
+            if element.tag == self.ns.ATOM + "author":
+                authors = ""
+                for names in element.getchildren():
+                    authors += names.text.strip() + " "
+                self.a_insert(metadata, "creator", authors.strip())
+            if element.tag == self.ns.ATOM + "summary":
+                self.a_insert(metadata, "abstract", element.text.strip())
+
+        # now go through and retrieve the dcterms from the entry
+        for element in entry.getchildren():
+            if not isinstance(element.tag, basestring):
+                continue
+                
+            # we operate an additive policy with metadata.  Duplicate
+            # keys are allowed, but duplicate key/value pairs are not.
+            if element.tag.startswith(self.ns.DC):
+                key = element.tag[len(self.ns.DC):]
+                val = element.text.strip()
+                self.a_insert(metadata, key, val)
+
+        self.dao.store_metadata(collection, id, metadata)
+        
+        return derived_resources
+        
+    def a_insert(self, d, key, value):
+        if d.has_key(key):
+            vs = d[key]
+            if value not in vs:
+                d[key].append(value)
+        else:
+            d[key] = [value]
+
+class METSDSpaceIngester(IngestPackager):
+    def ingest(self, collection, id, filename, metadata_relevant):
+        # we don't need to implement this, it is just for example.  it would unzip the file and import the metadata
+        # in the zip file
+        return []
+
+class DefaultEntryIngester(object):
+    def __init__(self, dao):
+        self.dao = dao
+        self.ns = Namespaces()
+        
+    def ingest(self, collection, id, atom, additive=False):
+        ssslog.debug("Ingesting Metadata; Additive? " + str(additive))
+        
+        # store the atom
+        self.dao.store_atom(collection, id, atom)
+        
+        # now extract/augment the metadata
+        metadata = {}
+        if additive:
+            # start with any existing metadata
+            metadata = self.dao.get_metadata(collection, id)
+        
+        ssslog.debug("Existing Metadata (before new ingest): " + str(metadata))
+        
+        entry = etree.fromstring(atom)
+
+        # go through each element in the atom entry and just process the ones we care about
+        # explicitly retrieve the atom based metadata first
+        for element in entry.getchildren():
+            if element.tag == self.ns.ATOM + "title":
+                self.a_insert(metadata, "title", element.text.strip())
+            if element.tag == self.ns.ATOM + "updated":
+                self.a_insert(metadata, "date", element.text.strip())
+            if element.tag == self.ns.ATOM + "author":
+                authors = ""
+                for names in element.getchildren():
+                    authors += names.text.strip() + " "
+                self.a_insert(metadata, "creator", authors.strip())
+            if element.tag == self.ns.ATOM + "summary":
+                self.a_insert(metadata, "abstract", element.text.strip())
+
+        # now go through and retrieve the dcterms from the entry
+        for element in entry.getchildren():
+            if not isinstance(element.tag, basestring):
+                continue
+                
+            # we operate an additive policy with metadata.  Duplicate
+            # keys are allowed, but duplicate key/value pairs are not.
+            if element.tag.startswith(self.ns.DC):
+                key = element.tag[len(self.ns.DC):]
+                val = element.text.strip()
+                self.a_insert(metadata, key, val)
+
+        ssslog.debug("Current Metadata (extracted + previously existing): " + str(metadata))
+
+        self.dao.store_metadata(collection, id, metadata)
+
+    def a_insert(self, d, key, value):
+        if d.has_key(key):
+            vs = d[key]
+            if value not in vs:
+                d[key].append(value)
+        else:
+            d[key] = [value]

Added: sss/branches/sss-2/sss/negotiator.py
===================================================================
--- sss/branches/sss-2/sss/negotiator.py	                        (rev 0)
+++ sss/branches/sss-2/sss/negotiator.py	2012-01-05 17:30:49 UTC (rev 416)
@@ -0,0 +1,342 @@
+# get the global logger
+from sss_logging import SSSLogger
+sssl = SSSLogger()
+ssslog = sssl.getLogger()
+
+# CONTENT NEGOTIATION
+#######################################################################
+# A sort of generic tool for carrying out content negotiation tasks with the web interface
+
+class ContentType(object):
+    """
+    Class to represent a content type requested through content negotiation
+    """
+    def __init__(self, type=None, subtype=None, params=None, packaging=None):
+        """
+        Properties:
+        type    - the main type of the content.  e.g. in text/html, the type is "text"
+        subtype - the subtype of the content.  e.g. in text/html the subtype is "html"
+        params  - as per the mime specification, his represents the parameter extension to the type, e.g. with
+                    application/atom+xml;type=entry, the params are "type=entry"
+
+        So, for example:
+        application/atom+xml;type=entry => type="application", subtype="atom+xml", params="type=entry"
+        """
+        self.type = type
+        self.subtype = subtype
+        self.params = params
+        self.packaging = packaging
+
+    def from_mimetype(self, mimetype):
+        # mimetype is of the form <supertype>/<subtype>[;<params>]
+        parts = mimetype.split(";")
+        if len(parts) == 2:
+            self.type, self.subtype = parts[0].split("/", 1)
+            self.params = parts[1]
+        elif len(parts) == 1:
+            self.type, self.subtype = parts[0].split("/", 1)
+
+    def mimetype(self):
+        """
+        Turn the content type into its mimetype representation
+        """
+        mt = self.type + "/" + self.subtype
+        if self.params is not None:
+            mt += ";" + self.params
+        return mt
+
+    # NOTE: we only use this to construct a canonical form which includes the package to do comparisons over
+    def media_format(self):
+        mime = self.mimetype()
+        pack = ""
+        if self.packaging is not None:
+            pack = "(packaging=\"" + self.packaging + "\") "
+        mf = "(& (type=\"" + mime + "\") " + pack + ")"
+        return mf
+
+    def matches(self, other, packaging_wildcard=False):
+        """
+        Determine whether this ContentType and the supplied other ContentType are matches.  This includes full equality
+        or whether the wildcards (*) which can be supplied for type or subtype properties are in place in either
+        partner in the match.
+        """
+        tmatch = self.type == "*" or other.type == "*" or self.type == other.type
+        smatch = self.subtype == "*" or other.subtype == "*" or self.subtype == other.subtype
+        # FIXME: there is some ambiguity in mime as to whether the omission of the params part is the same as
+        # a wildcard.  For the purposes of convenience we have assumed here that it is, otherwise a request for
+        # */* will not match any content type which has parameters
+        pmatch = self.params is None or other.params is None or self.params == other.params
+
+        # A similar problem exists for packaging.  We allow the user to tell us if packaging should be
+        # wildcard sensitive
+        packmatch = False
+        if packaging_wildcard:
+            packmatch = self.packaging is None or other.packaging is None or self.packaging == other.packaging
+        else:
+            packmatch = self.packaging == other.packaging
+        return tmatch and smatch and pmatch and packmatch
+
+    def __eq__(self, other):
+        return self.media_format() == other.media_format()
+
+    def __str__(self):
+        return self.media_format()
+
+    def __repr__(self):
+        return str(self)
+
+class ContentNegotiator(object):
+    """
+    Class to manage content negotiation.  Given its input parameters it will provide a ContentType object which
+    the server can use to locate its resources
+    """
+    def __init__(self):
+        """
+        There are 4 parameters which must be set in order to start content negotiation
+        - acceptable    -   What ContentType objects are acceptable to return (in order of preference)
+        - default_type  -   If no Accept header is found use this type
+        - default_subtype   -   If no Accept header is found use this subtype
+        - default_params    -   If no Accept header is found use this subtype
+        """
+        self.acceptable = []
+        self.default_type = None
+        self.default_subtype = None
+        self.default_params = None
+        self.default_packaging = None
+
+    def get_accept(self, dict):
+        """
+        Get the Accept header out of the web.py HTTP dictionary.  Return None if no accept header exists
+        """
+        if dict.has_key("HTTP_ACCEPT"):
+            return dict["HTTP_ACCEPT"]
+        return None
+
+    def get_packaging(self, dict):
+        if dict.has_key('HTTP_ACCEPT_PACKAGING'):
+            return dict['HTTP_ACCEPT_PACKAGING']
+        return None
+
+    def analyse_accept(self, accept, packaging=None):
+        # FIXME: we need to somehow handle q=0.0 in here and in other related methods
+        """
+        Analyse the Accept header string from the HTTP headers and return a structured dictionary with each
+        content types grouped by their common q values, thus:
+
+        dict = {
+            1.0 : [<ContentType>, <ContentType>],
+            0.8 : [<ContentType],
+            0.5 : [<ContentType>, <ContentType>]
+        }
+
+        This method will guarantee that ever content type has some q value associated with it, even if this was not
+        supplied in the original Accept header; it will be inferred based on the rules of content negotiation
+        """
+        # accept headers are a list of content types and q values, in a comma separated list
+        parts = accept.split(",")
+
+        # set up some registries for the coming analysis.  unsorted will hold each part of the accept header following
+        # its analysis, but without respect to its position in the preferences list.  highest_q and counter will be
+        # recorded during this first run so that we can use them to sort the list later
+        unsorted = []
+        highest_q = 0.0
+        counter = 0
+
+        # go through each possible content type and analyse it along with its q value
+        for part in parts:
+            # count the part number that we are working on, starting from 1
+            counter += 1
+
+            # the components of the part can be "type;params;q" "type;params", "type;q" or just "type"
+            components = part.split(";")
+
+            # the first part is always the type (see above comment)
+            type = components[0].strip()
+
+            # create some default values for the other parts.  If there is no params, we will use None, if there is
+            # no q we will use a negative number multiplied by the position in the list of this part.  This allows us
+            # to later see the order in which the parts with no q value were listed, which is important
+            params = None
+            q = -1 * counter
+
+            # There are then 3 possibilities remaining to check for: "type;q", "type;params" and "type;params;q"
+            # ("type" is already handled by the default cases set up above)
+            if len(components) == 2:
+                # "type;q" or "type;params"
+                if components[1].strip().startswith("q="):
+                    # "type;q"
+                    q = components[1].strip()[2:] # strip the "q=" from the start of the q value
+                    # if the q value is the highest one we've seen so far, record it
+                    if float(q) > highest_q:
+                        highest_q = float(q)
+                else:
+                    # "type;params"
+                    params = components[1].strip()
+            elif len(components) == 3:
+                # "type;params;q"
+                params = components[1].strip()
+                q = components[1].strip()[2:] # strip the "q=" from the start of the q value
+                # if the q value is the highest one we've seen so far, record it
+                if float(q) > highest_q:
+                    highest_q = float(q)
+
+            # at the end of the analysis we have all of the components with or without their default values, so we
+            # just record the analysed version for the time being as a tuple in the unsorted array
+            unsorted.append((type, params, q))
+
+        # once we've finished the analysis we'll know what the highest explicitly requested q will be.  This may leave
+        # us with a gap between 1.0 and the highest requested q, into which we will want to put the content types which
+        # did not have explicitly assigned q values.  Here we calculate the size of that gap, so that we can use it
+        # later on in positioning those elements.  Note that the gap may be 0.0.
+        q_range = 1.0 - highest_q
+
+        # set up a dictionary to hold our sorted results.  The dictionary will be keyed with the q value, and the
+        # value of each key will be an array of ContentType objects (in no particular order)
+        sorted = {}
+
+        # go through the unsorted list
+        for (type, params, q) in unsorted:
+            # break the type into super and sub types for the ContentType constructor
+            supertype, subtype = type.split("/", 1)
+            if q > 0:
+                # if the q value is greater than 0 it was explicitly assigned in the Accept header and we can just place
+                # it into the sorted dictionary
+                self.insert(sorted, q, ContentType(supertype, subtype, params, packaging))
+            else:
+                # otherwise, we have to calculate the q value using the following equation which creates a q value "qv"
+                # within "q_range" of 1.0 [the first part of the eqn] based on the fraction of the way through the total
+                # accept header list scaled by the q_range [the second part of the eqn]
+                qv = (1.0 - q_range) + (((-1 * q)/counter) * q_range)
+                self.insert(sorted, qv, ContentType(supertype, subtype, params, packaging))
+
+        # now we have a dictionary keyed by q value which we can return
+        return sorted
+
+    def insert(self, d, q, v):
+        """
+        Utility method: if dict d contains key q, then append value v to the array which is identified by that key
+        otherwise create a new key with the value of an array with a single value v
+        """
+        if d.has_key(q):
+            d[q].append(v)
+        else:
+            d[q] = [v]
+
+    def contains_match(self, source, target):
+        """
+        Does the target list of ContentType objects contain a match for the supplied source
+        Args:
+        - source:   A ContentType object which we want to see if it matches anything in the target
+        - target:   A list of ContentType objects to try to match the source against
+        Returns the matching ContentTYpe from the target list, or None if no such match
+        """
+        for ct in target:
+            if source.matches(ct):
+                # matches are symmetrical, so source.matches(ct) == ct.matches(source) so way round is irrelevant
+                # we return the target's content type, as this is considered the definitive list of allowed
+                # content types, while the source may contain wildcards
+                return ct
+        return None
+
+    def get_acceptable(self, client, server):
+        """
+        Take the client content negotiation requirements - as returned by analyse_accept() - and the server's
+        array of supported types (in order of preference) and determine the most acceptable format to return.
+
+        This method always returns the client's most preferred format if the server supports it, irrespective of the
+        server's preference.  If the client has no discernable preference between two formats (i.e. they have the same
+        q value) then the server's preference is taken into account.
+
+        Returns a ContentType object represening the mutually acceptable content type, or None if no agreement could
+        be reached.
+        """
+
+        # get the client requirement keys sorted with the highest q first (the server is a list which should be
+        # in order of preference already)
+        ckeys = client.keys()
+        ckeys.sort(reverse=True)
+
+        # the rule for determining what to return is that "the client's preference always wins", so we look for the
+        # highest q ranked item that the server is capable of returning.  We only take into account the server's
+        # preference when the client has two equally weighted preferences - in that case we take the server's
+        # preferred content type
+        for q in ckeys:
+            # for each q in order starting at the highest
+            possibilities = client[q]
+            allowable = []
+            for p in possibilities:
+                # for each content type with the same q value
+
+                # find out if the possibility p matches anything in the server.  This uses the ContentType's
+                # matches() method which will take into account wildcards, so content types like */* will match
+                # appropriately.  We get back from this the concrete ContentType as specified by the server
+                # if there is a match, so we know the result contains no unintentional wildcards
+                match = self.contains_match(p, server)
+                if match is not None:
+                    # if there is a match, register it
+                    allowable.append(match)
+
+            # we now know if there are 0, 1 or many allowable content types at this q value
+            if len(allowable) == 0:
+                # we didn't find anything, so keep looking at the next q value
+                continue
+            elif len(allowable) == 1:
+                # we found exactly one match, so this is our content type to use
+                return allowable[0]
+            else:
+                # we found multiple supported content types at this q value, so now we need to choose the server's
+                # preference
+                for i in range(len(server)):
+                    # iterate through the server explicitly by numerical position
+                    if server[i] in allowable:
+                        # when we find our first content type in the allowable list, it is the highest ranked server content
+                        # type that is allowable, so this is our type
+                        return server[i]
+
+        # we've got to here without returning anything, which means that the client and server can't come to
+        # an agreement on what content type they want and can deliver.  There's nothing more we can do!
+        return None
+
+    def negotiate(self, dict):
+        """
+        Main method for carrying out content negotiation over the supplied HTTP headers dictionary.
+        Returns either the preferred ContentType as per the settings of the object, or None if no agreement could be
+        reached
+        """
+        ssslog.debug("Fallback parameters are Accept: " + str(self.default_type) + "/" + str(self.default_subtype) + 
+                        ";" + str(self.default_params) + " and Accept-Packaging: " + str(self.default_packaging))
+        
+        # get the accept header if available
+        accept = self.get_accept(dict)
+        packaging = self.get_packaging(dict)
+        ssslog.debug("Accept Header: " + str(accept))
+        ssslog.debug("Packaging: "+ str(packaging))
+
+        if accept is None and packaging is None:
+            # if it is not available just return the defaults
+            return ContentType(self.default_type, self.default_subtype, self.default_params, self.default_packaging)
+
+        if packaging is None:
+            packaging = self.default_packaging
+        
+        if accept is None:
+            accept = self.default_type + "/" + self.default_subtype
+            if self.default_params is not None:
+                accept += ";" + self.default_params
+        
+        ssslog.debug("Negotiating on Accept: " + str(accept) + " and Accept-Packaging: " + str(packaging))
+        
+        # get us back a dictionary keyed by q value which tells us the order of preference that the client has
+        # requested
+        analysed = self.analyse_accept(accept, packaging)
+...
 
[truncated message content]