#!/usr/bin/python
#
# json2xml.py
#
# 2012-12-04: Written by Steven J. DeRose.
#
# To do:
# Fix -pad.
# Option to move atomic named items to parent attributes?
#
from __future__ import print_function
import sys
import os
import re
import argparse
from string import *
from math import *
import json
import pyxser
#from sjdUtils import *
#su = sjdUtils()
#su.setColors(args.color)
version = "2012-12-04"
###############################################################################
# Process options
#
parser = argparse.ArgumentParser()
parser.add_argument(
'-noprop', action='store_true',
help='Untag the "pyxs:prop" element surrounding data atoms.')
parser.add_argument(
'-nonamespaces', action='store_true',
help='Delete the "pyxs:" namespace prefix.')
parser.add_argument(
'-nosize', action='store_true',
help='Delete the "size" attribute everywhere.')
parser.add_argument(
'-notype', action='store_true',
help='Delete the "type" attribute everywhere.')
parser.add_argument(
'-pad', type=int,
help='Left-pad integers to this many columns.')
parser.add_argument(
"-quiet", action='store_true',
help='Suppress most messages.')
parser.add_argument(
"-verbose", action='count', default=0,
help='Add more messages (repeatable).')
parser.add_argument(
'-version', action='version', version=version)
parser.add_argument(
'files', nargs=argparse.REMAINDER,
help='Path(s) to input file(s).')
global args, su
args = parser.parse_args()
if (os.environ["PYTHONIOENCODING"] != "utf_8"):
print("Warning: PYTHONIOENCODING is not utf_8.")
###############################################################################
###############################################################################
# From http://stackoverflow.com/questions/1305532/convert-python-dict-to-object
# Promotes a dict to an Object.
#
class Str:
def __init__(self, **entries):
self.__dict__.update(entries)
###############################################################################
###############################################################################
# Main
#
if (len(args.files) == 0):
fh = sys.stdin
elif (not os.path.isfile(args.files[0])):
su.vMsg(0,"Can't find file '" + f + "'.")
sys.exit(0)
else:
fh = open(args.files[0], "r")
# Load the JSON and make a Python Object.
#
pyObject = json.load(fh)
s = Str(**pyObject)
theXml = pyxser.serialize(obj=s, enc="utf-8")
# De-clutter
#
if (args.pad): # incomplete
theXml = re.sub(r'(]*>)(\d+)', r'\1\t\2\t', theXml)
if (args.noprop):
sys.stderr.write("Dropping prop elements\n")
theXml = re.sub(r'', '', theXml)
theXml = re.sub(r'', '', theXml)
if (args.nonamespace):
sys.stderr.write("Dropping namespaces\n")
theXml = re.sub(r'', '>', theXml)
if (args.notype):
sys.stderr.write("Dropping type attributes\n")
theXml = re.sub(r' type="\w+">', '>', theXml)
print(theXml)
sys.exit(0)
###############################################################################
###############################################################################
###############################################################################
#
perldoc = """
=pod
=head1 Usage
json2xml.py [options]
Simple but thorough conversion using off-the-shelf packages 'json' and 'pyxser'.
=head2 Notes
The C decoder library produces exactly these Python types:
JSON -- Python
--------------------------
object -- dict
array -- list
string -- unicode
number (int) -- int, long
number (real) -- float
true -- True
false -- False
null -- None
Thus, these are the only Python types passed to the XML serialized library.
This makes some of the generality of C unnecessary here.
=head1 Options
=over
=item * B<-noprop>
Untag the "pyxs:prop" elements from the XML output. Where they have names,
the name is lost (should instead move these items onto the container element
as named attributes, or something like that).
=item * B<-nonamespace>
Delete the "pyxs:" namespace prefixes from all output XML elements.
=item * B<-nosize>
Delete the "size" attributes from the XML output.
=item * B<-notype>
Delete the "type" attributes from the XML output (this would mainly be
useful in environments that don't care, such as many scripting languages,
or JSON data such as this script deals with).
=item * B<-pad> I
Left-pad integers with spaces, to a minimum of I columns.
(incomplete -- presently just puts a tab on each side instead).
=item * B<-q>
Suppress most messages.
=item * B<--verbose>
Add more detailed messages (doesn't do much at the moment).
=item * B<-version>
Display version info and exit.
=back
=head Related Commands
C -- built-in Python package for JSON support.
C -- Python library (written in C by Daniel Molina Wegener),
to serialize any Python object as XML. L.
=head1 Known bugs and limitations
=head1 Ownership
This work by Steven J. DeRose is licensed under a Creative Commons
Attribution-Share Alike 3.0 Unported License. For further information on
this license, see L.
The author's present email is sderose at acm.org.
For the most recent version, see L.
=cut
"""