Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 4 years ago.
Improve this question
Here is a scenario:
I write an iPhone app using NSLocalizedString incase I decide to release it in different countries.
I decide to release the App over in France.
The translator takes my Localized.strings and does a great job translating
I update the app, and need some more translating.
I'm using genstrings and it overwrites the good work the translator did, is there a easy way for me to manage my translations over App versions?
Check out this project on GitHub, which provides a python scripts which makes genstrings a little bit smarter.
Since I don't like link-only answers (links may die), I'll also drop here the python script (all credits go to the author of the linked project)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Localize.py - Incremental localization on XCode projects
# João Moreno 2009
# http://joaomoreno.com/
# Modified by Steve Streeting 2010 http://www.stevestreeting.com
# Changes
# - Use .strings files encoded as UTF-8
# This is useful because Mercurial and Git treat UTF-16 as binary and can't
# diff/merge them. For use on iPhone you can run an iconv script during build to
# convert back to UTF-16 (Mac OS X will happily use UTF-8 .strings files).
# - Clean up .old and .new files once we're done
# Modified by Pierre Dulac 2012 http://friendcashapp.com
# Changes
# - use logging instead of print
# Adds
# - MIT Licence
# - the first parameter in the command line to specify the path of *.lproj directories
# - an optional paramter to control the debug level (set to info by default)
# Fixes
# - do not convert a file if it is already in utf-8
# - allow multiline translations generated by genstrings by modifing the re_translation regex
# -
# MIT Licence
#
# Copyright (C) 2012 Pierre Dulac
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial
# portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from sys import argv
from codecs import open
from re import compile
from copy import copy
import os
import shutil
import optparse
import logging
logging.getLogger().level = logging.INFO
__version__ = "0.1"
__license__ = "MIT"
USAGE = "%prog [options] <url>"
VERSION = "%prog v" + __version__
re_translation = compile(r'^"((?:[^"]|\\")+)" = "((?:[^"]|\\")+)";(?:\n)?$')
re_comment_single = compile(r'^/\*.*\*/$')
re_comment_start = compile(r'^/\*.*$')
re_comment_end = compile(r'^.*\*/$')
class LocalizedString():
def __init__(self, comments, translation):
self.comments, self.translation = comments, translation
self.key, self.value = re_translation.match(self.translation).groups()
def __unicode__(self):
return u'%s%s\n' % (u''.join(self.comments), self.translation)
class LocalizedFile():
def __init__(self, fname=None, auto_read=False):
self.fname = fname
self.reset()
if auto_read:
self.read_from_file(fname)
def reset(self):
self.strings = []
self.strings_d = {}
def read_from_file(self, fname=None):
self.reset()
fname = self.fname if fname == None else fname
try:
#f = open(fname, encoding='utf_8', mode='r')
f = open(fname, encoding='utf_8', mode='r')
except:
print 'File %s does not exist.' % fname
exit(-1)
try:
line = f.readline()
logging.debug(line)
except:
logging.error("Can't read line for file: %s" % fname)
raise
i = 1
while line:
comments = [line]
if not re_comment_single.match(line):
while line and not re_comment_end.match(line):
line = f.readline()
comments.append(line)
line = f.readline()
i += 1
# handle multi lines
while len(line) > 1 and line[-2] != u';':
line += f.readline()
i += 1
logging.debug("%d %s" % (i, line.rstrip('\n')))
if line and re_translation.match(line):
translation = line
else:
logging.error("Line %d of file '%s' raising the exception: %s" % (i, self.fname, line))
raise Exception('invalid file')
line = f.readline()
i += 1
while line and line == u'\n':
line = f.readline()
i += 1
string = LocalizedString(comments, translation)
self.strings.append(string)
self.strings_d[string.key] = string
f.close()
def save_to_file(self, fname=None):
fname = self.fname if fname == None else fname
try:
f = open(fname, encoding='utf_8', mode='w')
except:
print 'Couldn\'t open file %s.' % fname
exit(-1)
# sort by key
self.strings.sort(key=lambda item: item.key)
for string in self.strings:
f.write(string.__unicode__())
f.close()
def merge_with(self, new):
merged = LocalizedFile()
for string in new.strings:
if self.strings_d.has_key(string.key):
new_string = copy(self.strings_d[string.key])
new_string.comments = string.comments
string = new_string
merged.strings.append(string)
merged.strings_d[string.key] = string
return merged
def update_with(self, new):
for string in new.strings:
if not self.strings_d.has_key(string.key):
self.strings.append(string)
self.strings_d[string.key] = string
def merge(merged_fname, old_fname, new_fname):
try:
old = LocalizedFile(old_fname, auto_read=True)
new = LocalizedFile(new_fname, auto_read=True)
merged = old.merge_with(new)
merged.save_to_file(merged_fname)
except Exception, inst:
logging.error('Error: input files have invalid format.')
raise
STRINGS_FILE = 'Localizable.strings'
def localize(path, excluded_paths):
languages = [os.path.join(path,name) for name in os.listdir(path) if name.endswith('.lproj') and os.path.isdir(os.path.join(path,name))]
print "languages found", languages
for language in languages:
original = merged = language + os.path.sep + STRINGS_FILE
old = original + '.old'
new = original + '.new'
if os.path.isfile(original):
try:
open(original, encoding='utf_8', mode='r').read()
os.rename(original, old)
except:
os.system('iconv -f UTF-16 -t UTF-8 "%s" > "%s"' % (original, old))
# gen
os.system('find %s -name \*.m -not -path "%s" | xargs genstrings -q -o "%s"' % (path, excluded_paths, language))
try:
open(original, encoding='utf_8', mode='r').read()
shutil.copy(original, new)
except:
os.system('iconv -f UTF-16 -t UTF-8 "%s" > "%s"' % (original, new))
# merge
merge(merged, old, new)
logging.info("Job done for language: %s" % language)
else:
os.system('genstrings -q -o "%s" `find %s -name "*.m" -not -path "%s"`' % (language, path, excluded_paths))
os.rename(original, old)
try:
open(old, encoding='utf_8', mode='r').read()
except:
os.system('iconv -f UTF-16 -t UTF-8 "%s" > "%s"' % (old, original))
if os.path.isfile(old):
os.remove(old)
if os.path.isfile(new):
os.remove(new)
def parse_options():
"""parse_options() -> opts, args
Parse any command-line options given returning both
the parsed options and arguments.
"""
parser = optparse.OptionParser(usage=USAGE, version=VERSION)
parser.add_option("-d", "--debug",
action="store_true", default=False, dest="debug",
help="Set to DEBUG the logging level (default to INFO)")
parser.add_option("-p", "--path",
action="store", type="str", default=os.getcwd(), dest="path",
help="Path (relative or absolute) to use for searching for *.lproj directories")
parser.add_option("-e", "--exclude",
action="store", type="str", default=None, dest="excluded_paths",
help="Regex for paths to exclude ex. ``./Folder1/*``")
opts, args = parser.parse_args()
return opts, args
if __name__ == '__main__':
opts, args = parse_options()
if opts.debug:
logging.getLogger().level = logging.DEBUG
if opts.path:
opts.path = os.path.realpath(opts.path)
if opts.excluded_paths:
opts.excluded_paths = os.path.realpath(opts.excluded_paths)
logging.info("Running the script on path %s" % opts.path)
localize(opts.path, opts.excluded_paths)
I use:
http://www.loc-suite.com
To only translate the new parts
I was having a similar issue. I changed a lot of keys for my NSLocalizedString-macros and was frightened that I'd ship the App with missing translations (didn't want to run through the whole App manually and check if everything's there either...).
I tried out the github project that Gabriella Petronella posted but I wasn't really that happy with it, so I wrote my own python module to accomplish what I wanted to do.
(I'm not gonna post the code here, since it's a whole module and not only one script :D)
Here is the couple of options you can chose to go with:
You can use some hand-written solution like the script mentioned above which will not completely rewrite the old files while adding a recently translated strings to them.
You can also create an additional strings.h file which will contain all the strings you do have so you will not need to rewrite them all the time, just in one place. So genstrings is not necessary anymore. However there is a con of using this: the string.h file will be unstructured which is probably not convenient for the big projects.
Thanks to Best practice using NSLocalizedString
// In strings.h
#define YOUR_STRING_KEY NSLocalizedString(#"Cancel", nil)
// Somewhere else in you code
NSLog(#"%#", YOUR_STRING_KEY);
I actually started using a tool called PhraseApp https://phraseapp.com/projects
It's worth looking into if you have to localise an app!
Related
import PIL
img = PIL.Image.new("RGB", (100,100))
img.show()
The error message:
FSPathMakeRef(/Applications/Preview.app) failed with error -43.
Following from Sean True's answer, an even quicker but temporary fix is to simply make a symbolic link to Preview.app in the old location. In the terminal run
ln -s /System/Applications/Preview.app /Applications/Preview.app
This fixed the problem for me.
There's an official fix in github for Pillow 7, but I'm still on 6.
This appears to be a PIL ImageShow issue, with the PIL MacViewer using /Applications/Preview.app as an absolute path to the OSX Preview app.
It's not there in Catalina. I did a quick hack to ImageShow.py changing /Applications/Preview.app to just Preview.app and the issue went away. That might or might not still work on pre-Catalina OSX, but I don't have an easy way to test.
It has apparently moved to /System/Applications/Preview.app so a quick check at run time would probably cover both cases.
elif sys.platform == "darwin":
class MacViewer(Viewer):
format = "PNG"
options = {'compress_level': 1}
preview_locations = ["/System/Applications/Preview.app","/Applications/Preview.app"]
preview_location = None
def get_preview_application(self):
if self.preview_location is None:
for pl in self.preview_locations:
if os.path.exists(pl):
self.preview_location = pl
break
if self.preview_location is None:
raise RuntimeError("Can't find Preview.app in %s" % self.preview_locations)
return self.preview_location
def get_command(self, file, **options):
# on darwin open returns immediately resulting in the temp
# file removal while app is opening
pa = self.get_preview_application()
command = "open -a %s" % pa
command = "(%s %s; sleep 20; rm -f %s)&" % (command, quote(file),
quote(file))
return command
def show_file(self, file, **options):
"""Display given file"""
pa = self.get_preview_application()
fd, path = tempfile.mkstemp()
with os.fdopen(fd, 'w') as f:
f.write(file)
with open(path, "r") as f:
subprocess.Popen([
'im=$(cat);'
'open %s $im;'
'sleep 20;'
'rm -f $im' % pa
], shell=True, stdin=f)
os.remove(path)
return 1
Student trying to compare two .txt files of string "answers" from a multiple choice test a,c,d,b, etc. I've found some information on different parts of the problems I'm having and found a possible way to get the comparisons I want, but the guide was meant for in script strings and not pulling a list from a file.
For the import of the two files and comparing them, I'm basing my code on my textbook and this video here: Video example
I've got the code up and running, but for some reason I'm only getting 0.0% match when I want to a 100.0% match, at least for the two text files I'm using with identical answer lists.
import difflib
answer_sheet = "TestAnswerList.txt"
student_sheet = "StudentAnswerList.txt"
ans_list = open(answer_sheet).readlines()
stu_list = open(student_sheet).readlines()
sequence = difflib.SequenceMatcher(isjunk=None, a=ans_list, b=stu_list)
check_list = sequence.ratio()*100
check_list = round(check_list,1)
print(str(check_list) + "% match")
if check_list == 100:
print('This grade is Plus Ultra!')
elif check_list >= 75:
print('Good job, you pass!')
else:
print('Please study harder for your next test.')
# not the crux of my issue, but will accept advice all the same
answer_sheet.close
student_sheet.close
If I add in the close statement at the end for both of the text files, then I receive this error:
Traceback (most recent call last): File
"c:/Users/jaret/Documents/Ashford U/CPT 200/Python Code/Wk 5 Int Assg
- Tester code.py", line 18, in
answer_sheet.close AttributeError: 'str' object has no attribute 'close'
I had to re-look at how my files were being opened and realized that the syntax was for Python 2 not 3. I chose to go w/ basic open and later close to reduce any potential errors on my novice part.
import difflib
f1 = open('TestAnswerList.txt')
tst_ans = f1.readlines()
f2 = open('StudentAnswerList.txt')
stu_ans = f2.readlines()
sequence = difflib.SequenceMatcher(isjunk=None, a=stu_ans, b=tst_ans)
check_list = sequence.ratio()*100
check_list = round(check_list,1)
print(str(check_list) + "% match") # Percentage correct
if check_list == 100:
print('This grade is Plus Ultra!')
elif check_list >= 75:
print('Good job, you pass!')
else:
print('Please study harder for your next test.')
# Visual Answer match-up
print('Test Answers: ', tst_ans)
print('Student Answers:', stu_ans)
f1.close()
f2.close()
I'm new to Python and want to write a script to recursively delete files in a directory which are older than 2 years and have a specific extension like .zip, .txt etc.
I know this isn't GitHub but: I spend quite some time trying to figure it out and I have to admit the answer isn't that obvious but I found it
eventually. I have no idea why I spent half an hour on this random program but I did.
Its lucky i'm using python 3.7 as well because I didn't see your tag on the bottom of the post. This Image is a demo of me running what is titled The Program
Features
- Deletes all files from directory and subdirectory
- Able to change the extension to whatever you want eg: txt, bat, png, jpg
- Lets you change the folder you want erased to what you want eg from your C drive to pictures
The Program
import glob,os,sys,re,datetime
os.chdir("C:\\Users\\") # ------> PLEASE CHANGE THIS TO PREVENT YOUR C DRIVE GETTING DESTROYED THIS IS JUST AN EXAMPLE
src = os.getcwd()#Scans src which must be set to the current working directory
cn = 0
filedate = '2019'
clrd = 0
def random_function_name():
print("No files match the given criteria!")
return;
def find(path, *exts):
dirs = [a[0] for a in os.walk(path)]
f_filter = [d+e for d in dirs for e in exts]
return [f for files in [glob.iglob(files) for files in f_filter] for f in files]
print(src)
my_files = find(src,'\*py', '\*txt') #you can also add parameters like '\*txt', '\*jpg' ect
for f in my_files:
cn += 1
if filedate in datetime.datetime.fromtimestamp(os.path.getctime(f)).strftime('%Y/%m/%d|%H:%M:%S'):
print(' | CREATED:',datetime.datetime.fromtimestamp(os.path.getctime(f)).strftime('%Y/%m/%d|%H:%M:%S'),'|', 'Folder:','[',os.path.basename(os.path.dirname(f)),']', 'File:', os.path.split(os.path.abspath(f))[1], ' Bytes:', os.stat(f).st_size)
clrd += os.stat(f).st_size
def delete():
if cn != 0:
x = str(input("Delete {} file(s)? >>> ".format(cn)))
if x.lower() == 'yes':
os.remove(f)
print("You have cleared {} bytes of data".format(clrd))
sys.exit()
if x.lower() == 'no':
print('Aborting...')
sys.exit()
if x != 'yes' or 'no':
if x != '':
print("type yes or no")
delete()
else: delete()
if cn == 0:
print(str("No files to delete."))
sys.exit()
delete()
if filedate not in datetime.datetime.fromtimestamp(os.path.getctime(f)).strftime('%Y/%m/%d|%H:%M:%S'):
sys.setrecursionlimit(2500)
random_function_name()
On its own
This is for applying it to your own code
import glob,os,sys,re,datetime
os.chdir('C:\\Users')
src = os.getcwd()
def find(path, *exts):
dirs = [a[0] for a in os.walk(path)]
f_filter = [d+e for d in dirs for e in exts]
return [f for files in [glob.iglob(files) for files in f_filter] for f in files]
my_files = find(src,'\*py', '\*txt') #to add extensions do \*extension
for f in my_files:
if filedate in datetime.datetime.fromtimestamp(os.path.getctime(f)).strftime('%Y/%m/%d|%H:%M:%S'):
os.remove(f)
I have set of .msg files stored in E:/ drive that I have to read and extract some information from it. For that i am using the below code in Python 3.6
from email.parser import Parser
p = Parser()
headers = p.parse(open('E:/Ratan/msg_files/Test1.msg', encoding='Latin-1'))
print('To: %s' % headers['To'])
print('From: %s' % headers['From'])
print('Subject: %s' % headers['subject'])
In the output I am getting as below.
To: None
From: None
Subject: None
I am not getting the actual values in To, FROM and subject fields.
Any thoughts why it is not printing the actual values?
Please download my sample msg file from this link:
drive.google.com/file/d/1pwWWG3BgsMKwRr0WmP8GqzG3WX4GmEy6/view
Here is a demonstration of how to use some of python's standard email libraries.
You didn't show us your input file in the question, and the g-drive URL is a deadlink.
The code below looks just like yours and works fine, so I don't know what is odd about your environment, modulo some Windows 'rb' binary open nonsense, CRLFs, or the Latin1 encoding.
I threw in .upper() but it does nothing beyond showing that the API is case insensitive.
#! /usr/bin/env python3
from email.parser import Parser
from pathlib import Path
import mailbox
def extract_messages(maildir, mbox_file, k=2, verbose=False):
for n, message in enumerate(mailbox.mbox(mbox_file)):
with open(maildir / f'{n}.txt', 'w') as fout:
fout.write(str(message))
hdrs = 'From Date Subject In-Reply-To References Message-ID'.split()
p = Parser()
for i in range(min(k, n)):
with open(maildir / f'{i}.txt') as fin:
msg = p.parse(fin)
print([len(msg[hdr.upper()] or '')
for hdr in hdrs])
for k, v in msg.items():
print(k, v)
print('')
if verbose:
print(msg.get_payload())
if __name__ == '__main__':
# from https://mail.python.org/pipermail/python-dev/
maildir = Path('/tmp/py-dev/')
extract_messages(maildir, maildir / '2018-January.txt')
I'm pretty happy with s3cmd, but there is one issue: How to copy all files from one S3 bucket to another? Is it even possible?
EDIT: I've found a way to copy files between buckets using Python with boto:
from boto.s3.connection import S3Connection
def copyBucket(srcBucketName, dstBucketName, maxKeys = 100):
conn = S3Connection(awsAccessKey, awsSecretKey)
srcBucket = conn.get_bucket(srcBucketName);
dstBucket = conn.get_bucket(dstBucketName);
resultMarker = ''
while True:
keys = srcBucket.get_all_keys(max_keys = maxKeys, marker = resultMarker)
for k in keys:
print 'Copying ' + k.key + ' from ' + srcBucketName + ' to ' + dstBucketName
t0 = time.clock()
dstBucket.copy_key(k.key, srcBucketName, k.key)
print time.clock() - t0, ' seconds'
if len(keys) < maxKeys:
print 'Done'
break
resultMarker = keys[maxKeys - 1].key
Syncing is almost as straight forward as copying. There are fields for ETag, size, and last-modified available for keys.
Maybe this helps others as well.
s3cmd sync s3://from/this/bucket/ s3://to/this/bucket/
For available options, please use:
$s3cmd --help
AWS CLI seems to do the job perfectly, and has the bonus of being an officially supported tool.
aws s3 sync s3://mybucket s3://backup-mybucket
http://docs.aws.amazon.com/cli/latest/reference/s3/sync.html
The answer with the most upvotes as I write this is this one:
s3cmd sync s3://from/this/bucket s3://to/this/bucket
It's a useful answer. But sometimes sync is not what you need (it deletes files, etc.). It took me a long time to figure out this non-scripting alternative to simply copy multiple files between buckets. (OK, in the case shown below it's not between buckets. It's between not-really-folders, but it works between buckets equally well.)
# Slightly verbose, slightly unintuitive, very useful:
s3cmd cp --recursive --exclude=* --include=file_prefix* s3://semarchy-inc/source1/ s3://semarchy-inc/target/
Explanation of the above command:
–recursiveIn my mind, my requirement is not recursive. I simply want multiple files. But recursive in this context just tells s3cmd cp to handle multiple files. Great.
–excludeIt’s an odd way to think of the problem. Begin by recursively selecting all files. Next, exclude all files. Wait, what?
–includeNow we’re talking. Indicate the file prefix (or suffix or whatever pattern) that you want to include.s3://sourceBucket/ s3://targetBucket/This part is intuitive enough. Though technically it seems to violate the documented example from s3cmd help which indicates that a source object must be specified:s3cmd cp s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]
You can also use the web interface to do so:
Go to the source bucket in the web interface.
Mark the files you want to copy (use shift and mouse clicks to mark several).
Press Actions->Copy.
Go to the destination bucket.
Press Actions->Paste.
That's it.
I needed to copy a very large bucket so I adapted the code in the question into a multi threaded version and put it up on GitHub.
https://github.com/paultuckey/s3-bucket-to-bucket-copy-py
It's actually possible. This worked for me:
import boto
AWS_ACCESS_KEY = 'Your access key'
AWS_SECRET_KEY = 'Your secret key'
conn = boto.s3.connection.S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY)
bucket = boto.s3.bucket.Bucket(conn, SRC_BUCKET_NAME)
for item in bucket:
# Note: here you can put also a path inside the DEST_BUCKET_NAME,
# if you want your item to be stored inside a folder, like this:
# bucket.copy(DEST_BUCKET_NAME, '%s/%s' % (folder_name, item.key))
bucket.copy(DEST_BUCKET_NAME, item.key)
Thanks - I use a slightly modified version, where I only copy files that don't exist or are a different size, and check on the destination if the key exists in the source. I found this a bit quicker for readying the test environment:
def botoSyncPath(path):
"""
Sync keys in specified path from source bucket to target bucket.
"""
try:
conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
srcBucket = conn.get_bucket(AWS_SRC_BUCKET)
destBucket = conn.get_bucket(AWS_DEST_BUCKET)
for key in srcBucket.list(path):
destKey = destBucket.get_key(key.name)
if not destKey or destKey.size != key.size:
key.copy(AWS_DEST_BUCKET, key.name)
for key in destBucket.list(path):
srcKey = srcBucket.get_key(key.name)
if not srcKey:
key.delete()
except:
return False
return True
I wrote a script that backs up an S3 bucket: https://github.com/roseperrone/aws-backup-rake-task
#!/usr/bin/env python
from boto.s3.connection import S3Connection
import re
import datetime
import sys
import time
def main():
s3_ID = sys.argv[1]
s3_key = sys.argv[2]
src_bucket_name = sys.argv[3]
num_backup_buckets = sys.argv[4]
connection = S3Connection(s3_ID, s3_key)
delete_oldest_backup_buckets(connection, num_backup_buckets)
backup(connection, src_bucket_name)
def delete_oldest_backup_buckets(connection, num_backup_buckets):
"""Deletes the oldest backup buckets such that only the newest NUM_BACKUP_BUCKETS - 1 buckets remain."""
buckets = connection.get_all_buckets() # returns a list of bucket objects
num_buckets = len(buckets)
backup_bucket_names = []
for bucket in buckets:
if (re.search('backup-' + r'\d{4}-\d{2}-\d{2}' , bucket.name)):
backup_bucket_names.append(bucket.name)
backup_bucket_names.sort(key=lambda x: datetime.datetime.strptime(x[len('backup-'):17], '%Y-%m-%d').date())
# The buckets are sorted latest to earliest, so we want to keep the last NUM_BACKUP_BUCKETS - 1
delete = len(backup_bucket_names) - (int(num_backup_buckets) - 1)
if delete <= 0:
return
for i in range(0, delete):
print 'Deleting the backup bucket, ' + backup_bucket_names[i]
connection.delete_bucket(backup_bucket_names[i])
def backup(connection, src_bucket_name):
now = datetime.datetime.now()
# the month and day must be zero-filled
new_backup_bucket_name = 'backup-' + str('%02d' % now.year) + '-' + str('%02d' % now.month) + '-' + str(now.day);
print "Creating new bucket " + new_backup_bucket_name
new_backup_bucket = connection.create_bucket(new_backup_bucket_name)
copy_bucket(src_bucket_name, new_backup_bucket_name, connection)
def copy_bucket(src_bucket_name, dst_bucket_name, connection, maximum_keys = 100):
src_bucket = connection.get_bucket(src_bucket_name);
dst_bucket = connection.get_bucket(dst_bucket_name);
result_marker = ''
while True:
keys = src_bucket.get_all_keys(max_keys = maximum_keys, marker = result_marker)
for k in keys:
print 'Copying ' + k.key + ' from ' + src_bucket_name + ' to ' + dst_bucket_name
t0 = time.clock()
dst_bucket.copy_key(k.key, src_bucket_name, k.key)
print time.clock() - t0, ' seconds'
if len(keys) < maximum_keys:
print 'Done backing up.'
break
result_marker = keys[maximum_keys - 1].key
if __name__ =='__main__':main()
I use this in a rake task (for a Rails app):
desc "Back up a file onto S3"
task :backup do
S3ID = "*****"
S3KEY = "*****"
SRCBUCKET = "primary-mzgd"
NUM_BACKUP_BUCKETS = 2
Dir.chdir("#{Rails.root}/lib/tasks")
system "./do_backup.py #{S3ID} #{S3KEY} #{SRCBUCKET} #{NUM_BACKUP_BUCKETS}"
end
mdahlman's code didn't work for me but this command copies all the files in the bucket1 to a new folder (command also creates this new folder) in bucket 2.
cp --recursive --include=file_prefix* s3://bucket1/ s3://bucket2/new_folder_name/
s3cmd won't cp with only prefixes or wildcards but you can script the behavior with 's3cmd ls sourceBucket', and awk to extract the object name. Then use 's3cmd cp sourceBucket/name destBucket' to copy each object name in the list.
I use these batch files in a DOS box on Windows:
s3list.bat
s3cmd ls %1 | gawk "/s3/{ print \"\\"\"\"substr($0,index($0,\"s3://\"))\"\\"\"\"; }"
s3copy.bat
#for /F "delims=" %%s in ('s3list %1') do #s3cmd cp %%s %2
You can also use s3funnel which uses multi-threading:
https://github.com/neelakanta/s3funnel
example (without the access key or secret key parameters shown):
s3funnel source-bucket-name list | s3funnel dest-bucket-name copy --source-bucket source-bucket-name --threads=10