Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
#! /usr/bin/env python3
"""
zipmerge [-sret] [-n size ] [--help] [-o name] file1 
  file2 [filen...]
  
Options:  
  -s      Sort file entries by file name in merged files.
  -r      If s is  specifyed,  will  append to the filename the name of the last
          file and the first file.
  -e      Erase source files.If process interrupted in the midle it could render
          the files useless. Always make backup copys of your files before using
          this option.
  -t      Test print out the inner structure but don't take any action.
  -n size Specify  the  maximum file size in Kb. If the merge exeeds the maximum size, 
          additional files will be created also not exeeding this size.
  -o name Name to be used for resulting merged file(s).
  file 1  list of files to be merged, the program accepts a minimum of 2  files, 
  file 2  wild cards are accepted.
  filen 
 
"""


import sys
import os.path
import getopt as gop
import zipfile as zp

SORTED=False
RANGE=False
KEEP=True
TEST=False
MFILENAME="merged.zip"
CHUNKSIZE=2097152
ZFHANDLES=[]
OUTFILES=[]    
MEMBERS=[]
#FILETREE={}
TOTALSIZE=0.0
ALL=False
def sortMembers():
    MEMBERS.sort(key=lambda x:x[1].filename)
def openOutputZipFile(filename):
  global ALL,OUTFILES
  try:
    if os.path.exists(filename):
      key="x"
      while  key not in ["y","Y","n","N","a","A","all","All",""] and not ALL:
        print "\nOutput file %s already exist and will be overwriten!!!" % filename
        key=input("Do you want to proceed? (y,n,[all])")
        ALL=key in ["a","A","all","All",""]
      if key in ["y","Y",""] or ALL: 
        os.remove(filename)
      else:
        sys.exit()
    zpout=zp.ZipFile(filename,"w")
    zpout.close()
    zpout=zp.ZipFile(filename,"a")
    OUTFILES.append(zpout)
    return zpout
  except:
    a,b,c=sys.exc_info()
    b.code == None or sys.stderr.write("Could not create output file\n")
    sys.exit(3)
def cleanup():
  global OUTFILES,ZFHANDLES
  for handle in ZFHANDLES:
      try:
        handle.close()
      except:
        pass
  for handle in OUTFILES:
      try:
        handle.close()
      except:
        pass

if __name__ == "__main__": 
  try:
    (args,filenames)=gop.getopt(sys.argv[1:],"srn:o:",["help"])
  except getopt.GetoptError:
    sys.stderr.write(  "Sintax Error unrecognised option" )
    sys.stderr.write( __doc__ )
    sys.exit(2)
  
  for item in args:
    if item[0]=="-s":
      SORTED=True
    elif item[0]=="-r":
      RANGE=True
    elif item[0]=="-e":
      KEEP=False
    elif item[0]=="-n":
      CHUNKSIZE=int(item[1])
    elif item[0]=="-o":
      MFILENAME=item[1]
    elif item[0]=="--help":
      sys.stdout.write(__doc__)
      sys.exit(1)

  if len(filenames) < 2 :
    sys.stdout.write(__doc__)
    sys.exit(2)
    
  basename,extension=MFILENAME.rsplit(".",1)
  for filename in filenames:
    print "Opening %s" % filename
    ZFHANDLES.append(zp.ZipFile(filename,"r"))
  
  print "gathering archived files statistics"
  for handle in ZFHANDLES:
    for info in handle.infolist():
      MEMBERS.append([handle,info])
      #FILETREE.setdefault(handle,[]).append(info)
      TOTALSIZE+=info.compress_size/1024
  print "found %d archived files in %d archives" % (len(MEMBERS),len(ZFHANDLES))
  numfiles=TOTALSIZE/CHUNKSIZE
  numfiles=int(round(numfiles) + (numfiles-round(numfiles)>0 and 1 or 0 ))
  print "The Number of expected requiered files is: %d (could be more)" % numfiles  
  
  print "Starting Merging process..."
  
  print "Creating file structure"
  print "\n".join([key[1].filename for key in MEMBERS[:10]])
  not SORTED or sortMembers()
  print "Sorted"
  print "\n".join([key[1].filename for key in MEMBERS[:10]])
  i=1
  while len(MEMBERS) > 0:
    filesize=0
    m=0
    filename="%s_%03d.%s" % (basename,i,extension)
    print "opening file %s" % filename
    outfile=openOutputZipFile(filename)
    while len(MEMBERS) > 0 and  m < len(MEMBERS):
      member=MEMBERS[m]
      if (member[1].compress_size+filesize)/1024 < CHUNKSIZE:
        try:
          print member[1].filename
          outfile.writestr(member[1],member[0].read(member[1].filename))
        except zp.BadZipfile:
          print member[1].filename
          print "Problem with file member: %s skiping" % member[1].filename
          del member
          del MEMBERS[m]
          continue
        filesize+=member[1].compress_size
        del member
        del MEMBERS[m]
      elif SORTED:
        break
      else:
        m+=1
    i+=1
    print "Closing file %s" % filename 
    outfile.close()
  cleanup()