# -*- coding: utf-8 -*-
#
# This file is part of DataCite.
#
# Copyright (C) 2016 CERN.
# Copyright (C) 2019 Caltech.
# Copyright (C) 2024 IBT Czech Academy of Sciences.
#
# DataCite is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.
"""DataCite v4.5 JSON to XML transformations."""
import importlib.resources as importlib_resources
from lxml import etree
from lxml.builder import E
from .jsonutils import validator_factory
from .xmlutils import (
Rules,
dump_etree_helper,
etree_to_string,
set_elem_attr,
set_non_empty_attr,
)
rules = Rules()
ns = {
None: "http://datacite.org/schema/kernel-4",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"xml": "xml",
}
root_attribs = {
"{http://www.w3.org/2001/XMLSchema-instance}schemaLocation": "http://datacite.org/schema/kernel-4 "
"http://schema.datacite.org/meta/kernel-4.5/metadata.xsd",
}
validator = validator_factory(
importlib_resources.files("datacite") / "schemas/datacite-v4.5.json"
)
[docs]
def dump_etree(data):
"""Convert JSON dictionary to DataCite v4.5 XML as ElementTree."""
return dump_etree_helper(data, rules, ns, root_attribs)
[docs]
def tostring(data, **kwargs):
"""Convert JSON dictionary to DataCite v4.5 XML as string."""
return etree_to_string(dump_etree(data), **kwargs)
[docs]
def validate(data):
"""Validate DataCite v4.5 JSON dictionary."""
return validator.is_valid(data)
def affiliation(root, values):
"""Extract affiliation."""
vals = values.get("affiliation", [])
for val in vals:
if val.get("name"):
elem = E.affiliation(val["name"])
# affiliationIdentifier metadata as Attributes
# (0-1 cardinality, instead of 0-n as list of objects)
set_elem_attr(elem, "affiliationIdentifier", val)
set_elem_attr(elem, "affiliationIdentifierScheme", val)
if val.get("schemeUri"):
elem.set("schemeURI", val["schemeUri"])
root.append(elem)
def familyname(root, value):
"""Extract family name."""
val = value.get("familyName")
if val:
root.append(E.familyName(val))
def givenname(root, value):
"""Extract family name."""
val = value.get("givenName")
if val:
root.append(E.givenName(val))
def person_or_org_name(root, value, xml_tagname, json_tagname):
"""Extract creator/contributor name and it's 'nameType' attribute."""
elem = E(xml_tagname, value[json_tagname])
set_elem_attr(elem, "nameType", value)
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
def nameidentifiers(root, values):
"""Extract nameidentifier."""
vals = values.get("nameIdentifiers", [])
for val in vals:
if val.get("nameIdentifier"):
elem = E.nameIdentifier(val["nameIdentifier"])
elem.set("nameIdentifierScheme", val["nameIdentifierScheme"])
if val.get("schemeUri"):
elem.set("schemeURI", val["schemeUri"])
root.append(elem)
def fetch_creator(root, value):
"""Extract common values for creator and contributor."""
givenname(root, value)
familyname(root, value)
nameidentifiers(root, value)
affiliation(root, value)
def title(root, values):
"""Extract titles."""
if not values:
return
for value in values:
elem = etree.Element("title", nsmap=ns)
elem.text = value["title"]
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
# 'type' was a mistake in 4.0 serializer, which is supported
# for backwards compatibility until kernel 5 is released.
set_non_empty_attr(elem, "titleType", value.get("type"))
# 'titleType' will supersede 'type' if available
set_non_empty_attr(elem, "titleType", value.get("titleType"))
root.append(elem)
def related_object(root, value):
"""Extract attributes of relatedIdentifiers and relatedItems."""
if not value:
return
set_elem_attr(root, "relatedMetadataScheme", value)
if value.get("schemeUri"):
root.set("schemeURI", value["schemeUri"])
set_elem_attr(root, "schemeType", value)
set_elem_attr(root, "resourceTypeGeneral", value)
@rules.rule("alternateIdentifiers")
def alternate_identifiers(path, values):
"""Transform to alternateIdentifiers.
Note that as of version schema 4.5 the identifiers field is deprecated
in favour of using alternateIdentifiers and the doi field.
"""
if not values:
return
root = E.alternateIdentifiers()
for value in values:
elem = E.alternateIdentifier(value["alternateIdentifier"])
set_non_empty_attr(
elem, "alternateIdentifierType", value.get("alternateIdentifierType")
)
root.append(elem)
return root
@rules.rule("creators")
def creators(path, values):
"""Transform creators."""
if not values:
return
root = E.creators()
for value in values:
creator = E.creator()
person_or_org_name(creator, value, "creatorName", "name")
fetch_creator(creator, value)
root.append(creator)
return root
@rules.rule("titles")
def titles(path, values):
"""Transform titles."""
if not values:
return
root = E.titles()
title(root, values)
return root
@rules.rule("publisher")
def publisher(path, value):
"""Transform publisher."""
if not value:
return
elem = E.publisher(value.get("name"))
set_non_empty_attr(elem, "publisherIdentifier", value.get("publisherIdentifier"))
set_non_empty_attr(
elem, "publisherIdentifierScheme", value.get("publisherIdentifierScheme")
)
set_non_empty_attr(elem, "schemeURI", value.get("schemeUri"))
return elem
@rules.rule("publicationYear")
def publication_year(path, value):
"""Transform publicationYear."""
if not value:
return
return E.publicationYear(value)
@rules.rule("subjects")
def subjects(path, values):
"""Transform subjects."""
if not values:
return
root = E.subjects()
for value in values:
elem = E.subject(value["subject"])
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
set_elem_attr(elem, "subjectScheme", value)
if value.get("schemeUri"):
elem.set("schemeURI", value["schemeUri"])
if value.get("valueUri"):
elem.set("valueURI", value["valueUri"])
root.append(elem)
return root
@rules.rule("contributors")
def contributors(path, values):
"""Transform contributors."""
if not values:
return
root = E.contributors()
for value in values:
contributor = E.contributor()
person_or_org_name(contributor, value, "contributorName", "name")
fetch_creator(contributor, value)
set_elem_attr(contributor, "contributorType", value)
root.append(contributor)
return root
@rules.rule("dates")
def dates(path, values):
"""Transform dates."""
if not values:
return
root = E.dates()
for value in values:
elem = E.date(value["date"], dateType=value["dateType"])
set_elem_attr(elem, "dateInformation", value)
root.append(elem)
return root
@rules.rule("language")
def language(path, value):
"""Transform language."""
if not value:
return
return E.language(value)
@rules.rule("types")
def resource_type(path, value):
"""Transform resourceType."""
elem = E.resourceType()
elem.set("resourceTypeGeneral", value["resourceTypeGeneral"])
elem.text = value.get("resourceType")
return elem
@rules.rule("doi")
def identifier(path, value):
"""Transform doi into identifier."""
if not value:
return None
return E.identifier(value, identifierType="DOI")
@rules.rule("relatedIdentifiers")
def related_identifiers(path, values):
"""Transform relatedIdentifiers."""
if not values:
return
root = E.relatedIdentifiers()
for value in values:
elem = E.relatedIdentifier()
elem.text = value["relatedIdentifier"]
elem.set("relationType", value["relationType"])
related_object(elem, value)
set_elem_attr(elem, "relatedIdentifierType", value)
root.append(elem)
return root
def free_text_list(plural, singular, values):
"""List of elements with free text."""
if not values:
return
root = etree.Element(plural)
for value in values:
etree.SubElement(root, singular).text = value
return root
@rules.rule("sizes")
def sizes(path, values):
"""Transform sizes."""
return free_text_list("sizes", "size", values)
@rules.rule("formats")
def formats(path, values):
"""Transform sizes."""
return free_text_list("formats", "format", values)
@rules.rule("version")
def version(path, value):
"""Transform version."""
if not value:
return
return E.version(value)
@rules.rule("rightsList")
def rights(path, values):
"""Transform rights."""
if not values:
return
root = E.rightsList()
for value in values:
if "rights" in value:
elem = E.rights(value["rights"])
# Handle the odd case where no rights text present
else:
elem = E.rights()
if value.get("rightsUri"):
elem.set("rightsURI", value["rightsUri"])
set_elem_attr(elem, "rightsIdentifierScheme", value)
set_elem_attr(elem, "rightsIdentifier", value)
if value.get("schemeUri"):
elem.set("schemeURI", value["schemeUri"])
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
return root
@rules.rule("descriptions")
def descriptions(path, values):
"""Transform descriptions."""
if not values:
return
root = E.descriptions()
for value in values:
elem = E.description(
value["description"], descriptionType=value["descriptionType"]
)
set_non_empty_attr(elem, "{xml}lang", value.get("lang"))
root.append(elem)
return root
def geopoint(root, value):
"""Extract a point (either geoLocationPoint or polygonPoint)."""
root.append(E.pointLongitude(str(value["pointLongitude"])))
root.append(E.pointLatitude(str(value["pointLatitude"])))
@rules.rule("geoLocations")
def geolocations(path, values):
"""Transform geolocations."""
if not values:
return
root = E.geoLocations()
for value in values:
element = E.geoLocation()
place = value.get("geoLocationPlace")
if place:
element.append(E.geoLocationPlace(place))
point = value.get("geoLocationPoint")
if point:
elem = E.geoLocationPoint()
geopoint(elem, point)
element.append(elem)
box = value.get("geoLocationBox")
if box:
elem = E.geoLocationBox()
elem.append(E.westBoundLongitude(str(box["westBoundLongitude"])))
elem.append(E.eastBoundLongitude(str(box["eastBoundLongitude"])))
elem.append(E.southBoundLatitude(str(box["southBoundLatitude"])))
elem.append(E.northBoundLatitude(str(box["northBoundLatitude"])))
element.append(elem)
polygon = value.get("geoLocationPolygon")
if polygon:
elem = E.geoLocationPolygon()
for point in polygon:
plainPoint = point.get("polygonPoint")
if plainPoint:
e = E.polygonPoint()
geopoint(e, plainPoint)
elem.append(e)
inPoint = point.get("inPolygonPoint")
if inPoint:
e = E.inPolygonPoint()
geopoint(e, inPoint)
elem.append(e)
element.append(elem)
root.append(element)
return root
@rules.rule("fundingReferences")
def fundingreferences(path, values):
"""Transform funding references."""
if not values:
return
root = E.fundingReferences()
for value in values:
element = E.fundingReference()
element.append(E.funderName(value.get("funderName")))
identifier = value.get("funderIdentifier")
if identifier:
elem = E.funderIdentifier(identifier)
typev = value.get("funderIdentifierType")
if typev:
elem.set("funderIdentifierType", typev)
element.append(elem)
number = value.get("awardNumber")
if number:
elem = E.awardNumber(number)
uri = value.get("awardUri")
if uri:
elem.set("awardURI", uri)
element.append(elem)
title = value.get("awardTitle")
if title:
element.append(E.awardTitle(title))
if len(element):
root.append(element)
return root
@rules.rule("relatedItems")
def related_items(path, values):
"""Transform related items."""
if not values:
return None
pass
root = E.relatedItems()
for value in values:
elem = E.relatedItem()
set_elem_attr(elem, "relatedItemType", value)
set_elem_attr(elem, "relationType", value)
id_label = "relatedItemIdentifier"
if value.get(id_label):
related_item_identifier = E.relatedItemIdentifier()
re_id = value[id_label]
related_item_identifier.text = re_id[id_label]
set_elem_attr(related_item_identifier, "relatedItemIdentifierType", re_id)
related_object(related_item_identifier, value)
elem.append(related_item_identifier)
creator_values = value.get("creators")
if creator_values:
re_creators = E.creators()
for c in creator_values:
creator = E.creator()
person_or_org_name(creator, c, "creatorName", "name")
fetch_creator(creator, c)
re_creators.append(creator)
elem.append(re_creators)
related_titles = E.titles()
title(related_titles, value.get("titles"))
elem.append(related_titles)
pub_year = value.get("publicationYear")
if pub_year:
elem.append(E.publicationYear(pub_year))
vol = value.get("volume")
if vol:
elem.append(E.volume(vol))
issue = value.get("issue")
if issue:
elem.append(E.issue(issue))
number = value.get("number")
if number:
re_number = E.number(number)
if value.get("numberType"):
set_elem_attr(re_number, "numberType", value)
elem.append(re_number)
first_p = value.get("firstPage")
if first_p:
elem.append(E.firstPage(first_p))
last_p = value.get("lastPage")
if last_p:
elem.append(E.lastPage(last_p))
pub = value.get("publisher")
if pub:
elem.append(E.publisher(pub))
edi = value.get("edition")
if edi:
elem.append(E.edition(edi))
contributors_values = value.get("contributors")
if contributors_values:
re_contributors = E.contributors()
for c in contributors_values:
contributor = E.contributor()
person_or_org_name(contributor, c, "contributorName", "name")
fetch_creator(contributor, c)
set_elem_attr(contributor, "contributorType", c)
re_contributors.append(contributor)
elem.append(re_contributors)
root.append(elem)
return root