#!/usr/bin/python
#
# runCLD: Compare language detectors.
#
# 2012-04-26: Written by Steven J. DeRose.
#
# To do:
#
from __future__ import print_function
import sys
import os
import re
import argparse
from string import *
from math import *
import cld
#import subprocess
#import codecs, locale
from sjdUtils import *
version = "2012-11-30"
###############################################################################
# Process options
#
parser = argparse.ArgumentParser()
parser.add_argument(
"--color", action='store_true',
help='Colorize the output.')
parser.add_argument(
"-nocolor", action='store_false', dest="color",
help='Turn off colorizing.')
parser.add_argument(
"-i", "-ignoreCase", action='store_true', dest="ignoreCase",
help='Disregard case distinctions.')
parser.add_argument(
"-q", action='store_true', dest='quiet',
help='Suppress most messages.')
parser.add_argument(
"-tickInterval", type=int, default=10000,
help='Report progress every n records.')
parser.add_argument(
"--verbose", action='count', default=0,
help='Add more messages (repeatable).')
parser.add_argument(
'files', nargs=argparse.REMAINDER,
help='Path(s) to input file(s).')
global args, su
args = parser.parse_args()
if (os.environ["PYTHONIOENCODING"] != "utf_8"):
print("Warning: PYTHONIOENCODING is not utf_8.")
su = sjdUtils()
su.setColors(args.color)
###############################################################################
###############################################################################
#
def doOneFile(fh):
rec = ""
recnum = 0
rec = fh.readline()
while (rec):
recnum += 1
if (recnum % args.tickInterval == 0):
print("Processing record " + `args.tickInterval` + ".")
nm, code, rely, bytes, det = cld.detect(rec)[0]
print(nm + ": " + rec)
rec = fh.readline()
# EOF
return(recnum)
###############################################################################
###############################################################################
# Main
#
totalRecords = 0
totalFiles = 0
if (len(args.files) == 0):
fh = sys.stdin
for fnum in (range(len(args.files))):
totalFiles += 1
f = args.files[fnum]
if (os.path.isfile(f)):
fh = open(f, "r")
totalRecords += doOneFile(fh)
fh.close()
else:
su.vMsg(0,"Can't find file '" + f + "'.")
if (not args.quiet):
su.vMsg(0,"Done, " + `totalFiles` + " files, " +
`totalRecords` + " records.")
sys.exit(0)
###############################################################################
###############################################################################
#
perldoc = """
=pod
=head1 Usage
runCLD [options]
Try out Language Detectors.
L.
L
L
L 99%, 53 lgs, Java
=head1 Options
=over
=item * B<-color>
Colorize the output.
Defaults to on if the environment variable C is set
and output is going to a terminal.
=item * B<-ignoreCase> or B<-i>
Ignore case distinctions.
=item * B<-q>
Suppress most messages.
=item * B<-tickInterval> I
Report progress every I records.
=item * B<--verbose>
Add more detailed messages (doesn't do much at the moment).
=item * B<-version>
Display version info and exit.
=back
=head Related Commands
=head1 Known bugs and limitations
=head1 Ownership
This work by Steven J. DeRose is licensed under a Creative Commons
Attribution-Share Alike 3.0 Unported License. For further information on
this license, see L.
The author's present email is sderose at acm.org.
For the most recent version, see L.
=cut
"""