Quick utilities to help with data analysis from the shell:
Print numbered column names of a csv or tsv. You can specify a file or it will read from stdin. It will also guess the separator, whichever of tab or comma is more common; or you may specify with --separator. This is particularly useful if you want to use awk to select columns.
#!/usr/bin/python# you use macports, you probably want the first line to be exactly #!/opt/local/bin/python# Copyright 2015 Earl Hathaway rblog Ray at Bans earlh dot com (take my sunglasses off to email me)# License: The author or authors of this code dedicate any and all copyright interest in this code to the public domain.## print numbered column names or headers from a file or stdin if present with an optional field separator# tested to work with python from 2.7 to 3.4from__future__importprint_functionimportargparseimportmathimportos.pathimportsysstdin=notsys.stdin.isatty()parser=argparse.ArgumentParser(description='print numbered column headers')parser.add_argument('file',nargs='?',help='filename(default: stdin if not a tty)')parser.add_argument('--separator',dest='separator',nargs=1,help='specify the field separator (default: whichever of comma or tab is more common)')parser.add_argument('--python_dict',dest='pydict',action="store_true",help='emit a python dict?')args=parser.parse_args(sys.argv[1:])ifargs.fileisnotNoneandnotos.path.isfile(args.file):print('File "%s" does not exist'%args.file)sys.exit(0)first=Noneifstdin:first=sys.stdin.readline()elifargs.fileisnotNone:withopen(args.file,'r')asf:first=f.readline()else:print('no file specified and nothing on stdin')parser.print_help()sys.exit(0)sep=Noneifargs.separatorisNone:n_comma=first.count(',')n_tabs=first.count('\t')sep="\t"ifn_tabs>=n_commaelse","else:sep=args.separator[0]fields=first.split(sep)# emit a python dict to copy into code; should be zero basedifargs.pydict:pydict='{'+(', '.join(['\'%s\': %d'%(val.strip(),idx)foridx,valinenumerate(fields)]))+'}'print(pydict)sys.exit(0)# calculate indentation for fields so they don't staggerwidth=0iflen(fields)<10elseint(math.ceil(math.log10(len(fields))))format=' %%%dd %%s'%widthforidx,valinenumerate(fields):print(format%(idx+1,val.strip()))