#!/usr/bin/python # # runCLD: Compare language detectors. # # 2012-04-26: Written by Steven J. DeRose. # # To do: # from __future__ import print_function import sys import os import re import argparse from string import * from math import * import cld #import subprocess #import codecs, locale from sjdUtils import * version = "2012-11-30" ############################################################################### # Process options # parser = argparse.ArgumentParser() parser.add_argument( "--color", action='store_true', help='Colorize the output.') parser.add_argument( "-nocolor", action='store_false', dest="color", help='Turn off colorizing.') parser.add_argument( "-i", "-ignoreCase", action='store_true', dest="ignoreCase", help='Disregard case distinctions.') parser.add_argument( "-q", action='store_true', dest='quiet', help='Suppress most messages.') parser.add_argument( "-tickInterval", type=int, default=10000, help='Report progress every n records.') parser.add_argument( "--verbose", action='count', default=0, help='Add more messages (repeatable).') parser.add_argument( 'files', nargs=argparse.REMAINDER, help='Path(s) to input file(s).') global args, su args = parser.parse_args() if (os.environ["PYTHONIOENCODING"] != "utf_8"): print("Warning: PYTHONIOENCODING is not utf_8.") su = sjdUtils() su.setColors(args.color) ############################################################################### ############################################################################### # def doOneFile(fh): rec = "" recnum = 0 rec = fh.readline() while (rec): recnum += 1 if (recnum % args.tickInterval == 0): print("Processing record " + `args.tickInterval` + ".") nm, code, rely, bytes, det = cld.detect(rec)[0] print(nm + ": " + rec) rec = fh.readline() # EOF return(recnum) ############################################################################### ############################################################################### # Main # totalRecords = 0 totalFiles = 0 if (len(args.files) == 0): fh = sys.stdin for fnum in (range(len(args.files))): totalFiles += 1 f = args.files[fnum] if (os.path.isfile(f)): fh = open(f, "r") totalRecords += doOneFile(fh) fh.close() else: su.vMsg(0,"Can't find file '" + f + "'.") if (not args.quiet): su.vMsg(0,"Done, " + `totalFiles` + " files, " + `totalRecords` + " records.") sys.exit(0) ############################################################################### ############################################################################### # perldoc = """ =pod =head1 Usage runCLD [options] Try out Language Detectors. L. L L L 99%, 53 lgs, Java =head1 Options =over =item * B<-color> Colorize the output. Defaults to on if the environment variable C is set and output is going to a terminal. =item * B<-ignoreCase> or B<-i> Ignore case distinctions. =item * B<-q> Suppress most messages. =item * B<-tickInterval> I Report progress every I records. =item * B<--verbose> Add more detailed messages (doesn't do much at the moment). =item * B<-version> Display version info and exit. =back =head Related Commands =head1 Known bugs and limitations =head1 Ownership This work by Steven J. DeRose is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License. For further information on this license, see L. The author's present email is sderose at acm.org. For the most recent version, see L. =cut """