Iterate in a directory for zip files using python

I need to iterate through a folder and find each instance where the file names are identical (except for the extension), and then zip (preferably using tarfile), each of them into one file.

So, I have 5 files with the name: "example1", each with different file extensions. I need to pin them together and output them as "example1.tar" or something similar.

This would be simple enough with a simple loop, for example:

tar = tarfile.open ('example1.tar', "w")

for output to glob ('example1 *'):

     
  

tar.add (exit)

         
    

tar.close ()

    

300 "" , 5 , . . .

+3
6

, , MapReduce. MapReduce , :

def map_reduce(data, mapper, reducer):
    d = {}
    for elem in data:
        key, value = mapper(elem)
        d.setdefault(key, []).append(value)
    for key, grp in d.items():
        d[key] = reducer(key, grp)
    return d

, os.path.splitext(fname)[0]. tarball , tarfile. , :

import os
import tarfile

def make_tar(basename, files):
    tar = tarfile.open(basename + '.tar', 'w')
    for f in files:
        tar.add(f)
    tar.close()

map_reduce(os.listdir('.'),
           lambda x: (os.path.splitext(x)[0], x),
           make_tar)

. -, map_reduce. , os.path.splitext(x)[0]. , , strip_all_ext(x) :

def strip_all_ext(path):
    head, tail = os.path.split(path)
    basename = tail.split(os.extsep)[0]
    return os.path.join(head, basename)
+2

:

  • , , -
  • tar

- :

import os
import tarfile
from collections import defaultdict

myfiles = os.listdir(".")   # List of all files
totar = defaultdict(list)

# now fill the defaultdict with entries; basename as keys, extensions as values
for name in myfiles:
    base, ext = os.path.splitext(name)
    totar[base].append(ext)

# iterate through all the basenames
for base in totar:
    files = [base+ext for ext in totar[base]]
    # now tar all the files in the list "files"
    tar = tarfile.open(base+".tar", "w")
    for item in files:    
        tar.add(item)
    tar.close()
+2

. .

  • . collections.defaultict

  • tar . .

. 1.

glob, . os.path.basename . os.path.splitext .

.

, ?


Part 2 puts files in tar archives. For this, you have most of the code you need.

+1
source

Try using the glob module: http://docs.python.org/library/glob.html

0
source
#! /usr/bin/env python

import os
import tarfile

tarfiles = {}
for f in os.listdir ('files'):
    prefix = f [:f.rfind ('.') ]
    if prefix in tarfiles: tarfiles [prefix] += [f]
    else: tarfiles [prefix] = [f]

for k, v in tarfiles.items ():
    tf = tarfile.open ('%s.tar.gz' % k, 'w:gz')
    for f in v: tf.addfile (tarfile.TarInfo (f), file ('files/%s' % f) )
    tf.close ()
0
source
import os
import tarfile

allfiles = {}

for filename in os.listdir("."):
    basename = '.'.join (filename.split(".")[:-1] )
    if not basename in all_files:
        allfiles[basename] = [filename]
    else:
        allfiles[basename].append(filename)

for basename, filenames in allfiles.items():
    if len(filenames) < 2:
        continue
    tardata = tarfile.open(basename+".tar", "w")
    for filename in filenames:
        tardata.add(filename)
    tardata.close()
-1
source

All Articles