Quick startΒΆ

Suppose you have an xml document user.xml:

<?xml version="1.0" encoding="utf-8"?>
<doc:envelope xmlns="http://www.test.org"
              xmlns:doc="http://www.test1.org">
    <doc:user name="Alex" surname="Ivanov" age="26">

        <doc:birthdate year="1992" month="06" day="14"/>

        <doc:contacts>
            <doc:phone>+79204563539</doc:phone>
            <doc:email>alex@gmail.com</doc:email>
            <doc:email>alex@mail.ru</doc:email>
        </doc:contacts>

        <doc:documents>
            <doc:passport series="3127" number="836815"/>
        </doc:documents>

        <data:occupations xmlns:data="http://www.test2.org">
            <data:occupation title="yandex">
                <data:address>Moscow</data:address>
                <data:employees>8854</data:employees>
            </data:occupation>
            <data:occupation title="skbkontur">
                <data:address>Yekaterinburg</data:address>
                <data:employees>7742</data:employees>
            </data:occupation>
        </data:occupations>

    </doc:user>
</doc:envelope>

To deserialize the document you could use xml library api to parse the document and then access and modify the parsed xml DOM manually. Such an imperative code has a lot of boilerplate operations that takes a lot of time and can lead to bugs. Instead you can use paxb api to write a declarative style code. All you need to describe field mappings and types, paxb will serialize and deserialize data for you:

import json
import re
from datetime import date

import attr
import paxb as pb


@pb.model(name='occupation', ns='data', ns_map={'data': 'http://www.test2.org'})
class Occupation:
    title = pb.attr()
    address = pb.field()
    employees = pb.field(converter=int)


@pb.model(name='user', ns='doc', ns_map={'doc': 'http://www.test1.org'})
class User:
    name = pb.attr()
    surname = pb.attr()
    age = pb.attr(converter=int)

    birth_year = pb.wrap('birthdate', pb.attr('year', converter=int))
    birth_month = pb.wrap('birthdate', pb.attr('month', converter=int))
    birth_day = pb.wrap('birthdate', pb.attr('day', converter=int))

    @property
    def birthdate(self):
        return date(year=self.birth_year, month=self.birth_month, day=self.birth_day)

    @birthdate.setter
    def birthdate(self, value):
        self.birth_year = value.year
        self.birth_month = value.month
        self.birth_day = value.day

    phone = pb.wrap('contacts', pb.field())
    emails = pb.wrap('contacts', pb.as_list(pb.field(name='email')))

    passport_series = pb.wrap('documents/passport', pb.attr('series'))
    passport_number = pb.wrap('documents/passport', pb.attr('number'))

    occupations = pb.wrap(
        'occupations', pb.lst(pb.nested(Occupation)), ns='data', ns_map={'data': 'http://www.test2.org'}
    )

    citizenship = pb.field(default='RU')

    @phone.validator
    def check(self, attribute, value):
        if not re.match(r'\+\d{11,13}', value):
            raise ValueError("phone number is incorrect")


with open('user.xml') as file:
    xml = file.read()

Then the deserialized object can be modified and serialized back to xml document or converted to json format:

try:
    user = pb.from_xml(User, xml, envelope='doc:envelope', ns_map={'doc': 'http://www.test1.org'})
    user.birthdate = user.birthdate.replace(year=1993)

    with open('user.json') as file:
        json.dump(attr.asdict(user), file)

except (pb.exc.DeserializationError, ValueError) as e:
    print(f"deserialization error: {e}")

user.json:

{
    "age": 26,
    "birth_day": 14,
    "birth_month": 6,
    "birth_year": 1993,
    "citizenship": "RU",
    "emails": ["alex@gmail.com", "alex@mail.ru"],
    "name": "Alexey",
    "occupations": [
        {
            "address": "Moscow",
            "employees": 8854,
            "title": "yandex"
        },
        {
            "address": "Yekaterinburg",
            "employees": 7742,
            "title": "skbkontur"
        }
    ],
    "passport_number": "836815",
    "passport_series": "3127",
    "phone": "+79204563539",
    "surname": "Ivanov"
}