python读取arc文件的小脚本

import gzip,re

file = "/arcs/some.arc.gz"

ARC_PATTERN = re.compile("(^.*:[^ ]*)\\s([0-9.]+)\\s(\\d+)\\s(\\S+)\\s(\\d+)$")

def readchunk(fp):
    meta = ARC_PATTERN.match(fp.readline()).groups()
    page = fp.read(long(meta[4])+1)
    return (meta,page)

def main():
    f = gzip.open(file,'rb')
    print readchunk(f)
    print readchunk(f)

if __name__ == '__main__':
    main()

windows下注意加rb,不然会坑。

Leave a Comment


NOTE - You can use these HTML tags and attributes:
<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>