The impetus for this was that someone gave me an XYZ file the had generated by copying columns from Excel. On Mac. mac2unix took care of the line endings, but there were tabs (\t) all over the place.
Usage:
polish_xyz ugly.xyz pretty.xyz
Script:
#!/usr/bin/python import sys def getrawdata(infile): f=open(infile,'r') n=0 preamble=[] struct=[] for line in f: if n<2: data-blogger-escaped-if="" data-blogger-escaped-line.rstrip="" data-blogger-escaped-n="" data-blogger-escaped-preamble="">3: line=line.rstrip() struct+=[line] n+=1 xyz=[struct] return xyz, preamble def genxyzstring(coords,elementnumber): x_str='%10.5f'% coords[0] y_str='%10.5f'% coords[1] z_str='%10.5f'% coords[2] element=elementnumber xyz_string=element+(3-len(element))*' '+10*' '+\ (8-len(x_str))*' '+x_str+10*' '+(8-len(y_str))*' '+y_str+10*' '+(8-len(z_str))*' '+z_str+'\n' return xyz_string def getstructures(rawdata,preamble,outfile): n=0 for structure in rawdata: n=n+1 num="%03d" % (n,) g=open(outfile,'w') itson=False cartesian=[] for item in structure: coordx=filter(None,item.split(' ')) coordy=filter(None,item.split('\t')) if len(coordx)>len(coordy): coords=coordx else: coords=coordy coordinates=[float(coords[1]),float(coords[2]),float(coords[3])] element=coords[0] cartesian+=[genxyzstring(coordinates,element)] g.write(str(preamble[0])+'\n') g.write(str(preamble[1])+'\n') for line in cartesian: g.write(line) g.close() cartesian=[] return 0 if __name__ == "__main__": infile=sys.argv[1] outfile=sys.argv[2] xyz,preamble=getrawdata(infile) structures=getstructures(xyz,preamble,outfile)
No comments:
Post a Comment