# -*- coding: utf-8 -*-
#
# This file is part of DataCite.
#
# Copyright (C) 2016 CERN.
# Copyright (C) 2019 Caltech.
#
# DataCite is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.
"""DataCite v4.2 JSON to XML transformations."""
import importlib.resources as importlib_resources
from lxml import etree
from lxml.builder import E
from .jsonutils import validator_factory
from .xmlutils import (
Rules,
dump_etree_helper,
etree_to_string,
set_elem_attr,
set_non_empty_attr,
)
rules = Rules()
ns = {
None: "http://datacite.org/schema/kernel-4",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"xml": "xml",
}
root_attribs = {
"{http://www.w3.org/2001/XMLSchema-instance}schemaLocation": "http://datacite.org/schema/kernel-4 "
"http://schema.datacite.org/meta/kernel-4.2/metadata.xsd",
}
validator = validator_factory(
importlib_resources.files("datacite") / "schemas/datacite-v4.2.json"
)
[docs]
def dump_etree(data):
"""Convert JSON dictionary to DataCite v4.2 XML as ElementTree."""
return dump_etree_helper(data, rules, ns, root_attribs)
[docs]
def tostring(data, **kwargs):
"""Convert JSON dictionary to DataCite v4.2 XML as string."""
return etree_to_string(dump_etree(data), **kwargs)
[docs]
def validate(data):
"""Validate DataCite v4.2 JSON dictionary."""
return validator.is_valid(data)
@rules.rule("identifiers")
def identifiers(path, values):
"""Transform identifiers to alternateIdentifiers and identifier."""
"""
We assume there will only be 1 DOI identifier for the record.
Any other identifiers are alternative identifiers.
"""
alt = ""
doi = ""
for value in values:
if value["identifierType"] == "DOI":
if doi != "":
# Don't know what to do with two DOIs
# Which is the actual identifier?
raise TypeError
doi = E.identifier(value["identifier"], identifierType="DOI")
else:
if alt == "":
alt = E.alternateIdentifiers()
elem = E.alternateIdentifier(value["identifier"])
elem.set("alternateIdentifierType", value["identifierType"])
alt.append(elem)
if alt == "":
# If we only have the DOI
return doi
elif doi == "":
# If we only have alt IDs
return alt
else:
return doi, alt
def affiliations(root, values):
"""Extract affiliation."""
vals = values.get("affiliations", [])
for val in vals:
elem = E.affiliation(val["affiliation"])
root.append(elem)
def familyname(root, value):
"""Extract family name."""
val = value.get("familyName")
if val:
root.append(E.familyName(val))
def givenname(root, value):
"""Extract family name."""
val = value.get("givenName")
if val:
root.append(E.givenName(val))
def person_or_org_name(root, value, xml_tagname, json_tagname):
"""Extract creator/contributor name and it's 'nameType' attribute."""
elem = E(xml_tagname, value[json_tagname])
set_elem_attr(elem, "nameType", value)
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
def nameidentifiers(root, values):
"""Extract nameidentifier."""
vals = values.get("nameIdentifiers", [])
for val in vals:
if val.get("nameIdentifier"):
elem = E.nameIdentifier(val["nameIdentifier"])
elem.set("nameIdentifierScheme", val["nameIdentifierScheme"])
set_elem_attr(elem, "schemeURI", val)
root.append(elem)
@rules.rule("creators")
def creators(path, values):
"""Transform creators."""
if not values:
return
root = E.creators()
for value in values:
creator = E.creator()
person_or_org_name(creator, value, "creatorName", "name")
givenname(creator, value)
familyname(creator, value)
nameidentifiers(creator, value)
affiliations(creator, value)
root.append(creator)
return root
@rules.rule("titles")
def titles(path, values):
"""Transform titles."""
if not values:
return
root = E.titles()
for value in values:
elem = etree.Element("title", nsmap=ns)
elem.text = value["title"]
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
# 'type' was a mistake in 4.0 serializer, which is supported
# for backwards compatibility until kernel 5 is released.
set_non_empty_attr(elem, "titleType", value.get("type"))
# 'titleType' will supersede 'type' if available
set_non_empty_attr(elem, "titleType", value.get("titleType"))
root.append(elem)
return root
@rules.rule("publisher")
def publisher(path, value):
"""Transform publisher."""
if not value:
return
return E.publisher(value)
@rules.rule("publicationYear")
def publication_year(path, value):
"""Transform publicationYear."""
if not value:
return
return E.publicationYear(str(value))
@rules.rule("subjects")
def subjects(path, values):
"""Transform subjects."""
if not values:
return
root = E.subjects()
for value in values:
elem = E.subject(value["subject"])
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
set_elem_attr(elem, "subjectScheme", value)
set_elem_attr(elem, "schemeURI", value)
set_elem_attr(elem, "valueURI", value)
root.append(elem)
return root
@rules.rule("contributors")
def contributors(path, values):
"""Transform contributors."""
if not values:
return
root = E.contributors()
for value in values:
contributor = E.contributor()
person_or_org_name(contributor, value, "contributorName", "name")
set_elem_attr(contributor, "contributorType", value)
givenname(contributor, value)
familyname(contributor, value)
nameidentifiers(contributor, value)
affiliations(contributor, value)
root.append(contributor)
return root
@rules.rule("dates")
def dates(path, values):
"""Transform dates."""
if not values:
return
root = E.dates()
for value in values:
elem = E.date(value["date"], dateType=value["dateType"])
set_elem_attr(elem, "dateInformation", value)
root.append(elem)
return root
@rules.rule("language")
def language(path, value):
"""Transform language."""
if not value:
return
return E.language(value)
@rules.rule("types")
def resource_type(path, value):
"""Transform resourceType."""
elem = E.resourceType()
elem.set("resourceTypeGeneral", value["resourceTypeGeneral"])
elem.text = value["resourceType"]
return elem
@rules.rule("relatedIdentifiers")
def related_identifiers(path, values):
"""Transform relatedIdentifiers."""
if not values:
return
root = E.relatedIdentifiers()
for value in values:
elem = E.relatedIdentifier()
elem.text = value["relatedIdentifier"]
elem.set("relatedIdentifierType", value["relatedIdentifierType"])
elem.set("relationType", value["relationType"])
set_elem_attr(elem, "relatedMetadataScheme", value)
set_elem_attr(elem, "schemeURI", value)
set_elem_attr(elem, "schemeType", value)
set_elem_attr(elem, "resourceTypeGeneral", value)
root.append(elem)
return root
def free_text_list(plural, singular, values):
"""List of elements with free text."""
if not values:
return
root = etree.Element(plural)
for value in values:
etree.SubElement(root, singular).text = value
return root
@rules.rule("sizes")
def sizes(path, values):
"""Transform sizes."""
return free_text_list("sizes", "size", values)
@rules.rule("formats")
def formats(path, values):
"""Transform sizes."""
return free_text_list("formats", "format", values)
@rules.rule("version")
def version(path, value):
"""Transform version."""
if not value:
return
return E.version(value)
@rules.rule("rightsList")
def rights(path, values):
"""Transform rights."""
if not values:
return
root = E.rightsList()
for value in values:
if "rights" in value:
elem = E.rights(value["rights"])
# Handle the odd case where no rights text present
else:
elem = E.rights()
set_elem_attr(elem, "rightsURI", value)
set_elem_attr(elem, "rightsIdentifierScheme", value)
set_elem_attr(elem, "rightsIdentifier", value)
set_elem_attr(elem, "schemeURI", value)
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
return root
@rules.rule("descriptions")
def descriptions(path, values):
"""Transform descriptions."""
if not values:
return
root = E.descriptions()
for value in values:
elem = E.description(
value["description"], descriptionType=value["descriptionType"]
)
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
return root
def geopoint(root, value):
"""Extract a point (either geoLocationPoint or polygonPoint)."""
root.append(E.pointLongitude(str(value["pointLongitude"])))
root.append(E.pointLatitude(str(value["pointLatitude"])))
@rules.rule("geoLocations")
def geolocations(path, values):
"""Transform geolocations."""
if not values:
return
root = E.geoLocations()
for value in values:
element = E.geoLocation()
place = value.get("geoLocationPlace")
if place:
element.append(E.geoLocationPlace(place))
point = value.get("geoLocationPoint")
if point:
elem = E.geoLocationPoint()
geopoint(elem, point)
element.append(elem)
box = value.get("geoLocationBox")
if box:
elem = E.geoLocationBox()
elem.append(E.westBoundLongitude(str(box["westBoundLongitude"])))
elem.append(E.eastBoundLongitude(str(box["eastBoundLongitude"])))
elem.append(E.southBoundLatitude(str(box["southBoundLatitude"])))
elem.append(E.northBoundLatitude(str(box["northBoundLatitude"])))
element.append(elem)
polygons = value.get("geoLocationPolygons", [])
for polygon in polygons:
elem = E.geoLocationPolygon()
points = polygon["polygonPoints"]
for p in points:
e = E.polygonPoint()
geopoint(e, p)
elem.append(e)
inPoint = polygon.get("inPolygonPoint")
if inPoint:
e = E.inPolygonPoint()
geopoint(e, inPoint)
elem.append(e)
element.append(elem)
root.append(element)
return root
@rules.rule("fundingReferences")
def fundingreferences(path, values):
"""Transform funding references."""
if not values:
return
root = E.fundingReferences()
for value in values:
element = E.fundingReference()
element.append(E.funderName(value.get("funderName")))
identifier = value.get("funderIdentifier")
if identifier:
elem = E.funderIdentifier(identifier)
typev = value.get("funderIdentifierType")
if typev:
elem.set("funderIdentifierType", typev)
element.append(elem)
number = value.get("awardNumber")
if number:
elem = E.awardNumber(number)
uri = value.get("awardURI")
if uri:
elem.set("awardURI", uri)
element.append(elem)
title = value.get("awardTitle")
if title:
element.append(E.awardTitle(title))
if len(element):
root.append(element)
return root