#!/usr/bin/python # # json2xml.py # # 2012-12-04: Written by Steven J. DeRose. # # To do: # Fix -pad. # Option to move atomic named items to parent attributes? # from __future__ import print_function import sys import os import re import argparse from string import * from math import * import json import pyxser #from sjdUtils import * #su = sjdUtils() #su.setColors(args.color) version = "2012-12-04" ############################################################################### # Process options # parser = argparse.ArgumentParser() parser.add_argument( '-noprop', action='store_true', help='Untag the "pyxs:prop" element surrounding data atoms.') parser.add_argument( '-nonamespaces', action='store_true', help='Delete the "pyxs:" namespace prefix.') parser.add_argument( '-nosize', action='store_true', help='Delete the "size" attribute everywhere.') parser.add_argument( '-notype', action='store_true', help='Delete the "type" attribute everywhere.') parser.add_argument( '-pad', type=int, help='Left-pad integers to this many columns.') parser.add_argument( "-quiet", action='store_true', help='Suppress most messages.') parser.add_argument( "-verbose", action='count', default=0, help='Add more messages (repeatable).') parser.add_argument( '-version', action='version', version=version) parser.add_argument( 'files', nargs=argparse.REMAINDER, help='Path(s) to input file(s).') global args, su args = parser.parse_args() if (os.environ["PYTHONIOENCODING"] != "utf_8"): print("Warning: PYTHONIOENCODING is not utf_8.") ############################################################################### ############################################################################### # From http://stackoverflow.com/questions/1305532/convert-python-dict-to-object # Promotes a dict to an Object. # class Str: def __init__(self, **entries): self.__dict__.update(entries) ############################################################################### ############################################################################### # Main # if (len(args.files) == 0): fh = sys.stdin elif (not os.path.isfile(args.files[0])): su.vMsg(0,"Can't find file '" + f + "'.") sys.exit(0) else: fh = open(args.files[0], "r") # Load the JSON and make a Python Object. # pyObject = json.load(fh) s = Str(**pyObject) theXml = pyxser.serialize(obj=s, enc="utf-8") # De-clutter # if (args.pad): # incomplete theXml = re.sub(r'(]*>)(\d+)', r'\1\t\2\t', theXml) if (args.noprop): sys.stderr.write("Dropping prop elements\n") theXml = re.sub(r'', '', theXml) theXml = re.sub(r'', '', theXml) if (args.nonamespace): sys.stderr.write("Dropping namespaces\n") theXml = re.sub(r'', '>', theXml) if (args.notype): sys.stderr.write("Dropping type attributes\n") theXml = re.sub(r' type="\w+">', '>', theXml) print(theXml) sys.exit(0) ############################################################################### ############################################################################### ############################################################################### # perldoc = """ =pod =head1 Usage json2xml.py [options] Simple but thorough conversion using off-the-shelf packages 'json' and 'pyxser'. =head2 Notes The C decoder library produces exactly these Python types: JSON -- Python -------------------------- object -- dict array -- list string -- unicode number (int) -- int, long number (real) -- float true -- True false -- False null -- None Thus, these are the only Python types passed to the XML serialized library. This makes some of the generality of C unnecessary here. =head1 Options =over =item * B<-noprop> Untag the "pyxs:prop" elements from the XML output. Where they have names, the name is lost (should instead move these items onto the container element as named attributes, or something like that). =item * B<-nonamespace> Delete the "pyxs:" namespace prefixes from all output XML elements. =item * B<-nosize> Delete the "size" attributes from the XML output. =item * B<-notype> Delete the "type" attributes from the XML output (this would mainly be useful in environments that don't care, such as many scripting languages, or JSON data such as this script deals with). =item * B<-pad> I Left-pad integers with spaces, to a minimum of I columns. (incomplete -- presently just puts a tab on each side instead). =item * B<-q> Suppress most messages. =item * B<--verbose> Add more detailed messages (doesn't do much at the moment). =item * B<-version> Display version info and exit. =back =head Related Commands C -- built-in Python package for JSON support. C -- Python library (written in C by Daniel Molina Wegener), to serialize any Python object as XML. L. =head1 Known bugs and limitations =head1 Ownership This work by Steven J. DeRose is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License. For further information on this license, see L. The author's present email is sderose at acm.org. For the most recent version, see L. =cut """