Below is a partial codes of a sybase database script for matching some
security reference data related various files and then DML operation.
My prior experience are in Oracle so I wanted to know what are the special meaning of this [STRONG] or [VERY STRONG] keyword syntax in this below sybase script.
#---------------------------------------------#
# PDP INSERTION #
#---------------------------------------------#
ON NEW pdp SECURITY
DECLARE MATCH SECURITY: SECURITY_MATCH
DECLARE CHAR: LOCAL_EXC_1
DECLARE CHAR: LOCAL_TICKER_1
DECLARE CHAR: LOCAL_EXC_2
DECLARE CHAR: LOCAL_COUNTRY_1
DECLARE CHAR: LOCAL_SEDOL_1
DECLARE MATCH SECURITY ALLOW_VENDOR_COLLISIONS: PKEY_MATCH
IF $EXC = "HONG_KONG" AND LEFT($LOCAL_ID,4) = "HE!0" THEN
LOCAL_TICKER_1 = RIGHT($LOCAL_ID, STRLEN($LOCAL_ID) - 4)
ELSE
LOCAL_TICKER_1 = RIGHT($LOCAL_ID, STRLEN($LOCAL_ID) - 3)
ENDIF
#-----------------------------#
# PRIMARY KEY CHECK - CHECKS #
# FOR A DUPLICATE PRIMARY KEY.#
#-----------------------------#
PKEY_MATCH = MATCH SECURITY
$VALOREN = pdp.VALOREN
END MATCH
#--------------#
# SEDOL MATCH #
#--------------#
IF SECURITY_MATCH.NUMBER = 0 AND $SEDOL != NULL AND $EXC != NULL THEN
SECURITY_MATCH = MATCH SECURITY
MUST LOCAL_SEDOL_1 = SEDOL [VERY STRONG]
MUST LOCAL_EXC_1 = EXC OR
LOCAL_COUNTRY_1 = COUNTRY_TRADE [STRONG]
MUST ACTIVE = 1 [STRONG]
END MATCH
ENDIF
#-------------#
# ISIN MATCH #
# INDEXES #
#-------------#
IF $ISIN != NULL AND $TYPE = 34 AND SECURITY_MATCH.NUMBER = 0 THEN
SECURITY_MATCH = MATCH SECURITY
MUST $ISIN = ISIN [VERY STRONG]
MUST EXC = "ZZZ" [STRONG]
MUST ACTIVE = 1 [STRONG]
END MATCH
ENDIF
PRINT( "PKEY_MATCH.NUMBER IS: ", PKEY_MATCH.NUMBER)
PRINT ("ISIN IS: ", $ISIN)
PRINT ("EXC IS: ", $EXC)
PRINT ("LOCAL_EXC_1 IS:", LOCAL_EXC_1)
PRINT ("$LOCAL_ID IS: ", $LOCAL_ID)
IF PKEY_MATCH.NUMBER = 0 THEN
ADD_VENDOR_ROW(SECURITY_MATCH)
ENDIF
Related
I'm trying to get this code to run but I get this error above. Can someone please help? I've tried reading about this in other posts but I don't really know how to apply it here. I'm trying to iterate over lines of this database and select 1400 random ones. It blocks on the error above.
def paragraph_generator(test=True, itersize=5000, year=None, state=None):
con, cur = database_connection.connect(cursor_type="server")
cur.itersize = itersize
while True:
sql = f"""
SELECT
text_id,
lccn_sn,
date,
ed,
seq,
chroniclingamerica_meta.statefp,
chroniclingamerica_meta.countyfp,
text_ocr
FROM
chroniclingamerica natural join chroniclingamerica_meta
WHERE date_part('year',date) BETWEEN 1860 AND 1920
ORDER BY RANDOM()
LIMIT 1400
"""
if test:
sql = (
sql + " limit 10000"
) # limit 1000 means it only goes through 1000 lines of the database
else:
pass
print(sql)
cur.execute(sql)
for p in cur.fetchall():
tokens = stem_text(p[-1]) # Stem
# print(tokens)
tokens = (
p[-1]
.translate(str.maketrans("", "", punct))
.replace("\n", " ")
.lower()
.split(" ")
)
tokens_3 = [
a for a in tokens if len(a) == 3 if a in wn_lemmas
] # For 3-letter words, only keep WordNet recognized tokens
tokens = gensim.parsing.preprocessing.remove_short_tokens(
tokens, minsize=4
) # Remove 1-, 2-, and 3-letter words
tokens = tokens + tokens_3 # Add back in 3-letter WordNet-recognized tokens
tokens = gensim.parsing.preprocessing.remove_stopword_tokens(
tokens, stopwords=stop_words
) # Remove stopwords in stopword list above
print("THIS IS THE LENGTH OF TOKENS")
a = len(tokens)
print(a)
if len(tokens) != 0:
ocr_2 = 1 - (
len([a for a in tokens if a in wn_lemmas]) / len(tokens)
) # Generate a measure for proportion of OCR errors in a page
else:
ocr_2 = float("nan")
print("THIS IS OCR")
print(ocr_2)
ocr = ocr_2
if ocr < 0.75 and ~np.isnan(
ocr
): # If the % of OCR in a page is less than 75%, then keep the page and all tokens
tokens = tokens
else:
tokens = [] # Otherwise, give it an empty list (i.e. drop the page)
yield tokens
con.close()
Error:
cur.execute(sql)
psycopg2.ProgrammingError: can't call .execute() on named cursors more than once
When using the rapid annotator tool brat, it appears that the created annotations file will present the annotation in the order that the annotations were performed by the user. If you start at the beginning of a document and go the end performing annotation, then the annotations will naturally be in the correct offset order. However, if you need to go earlier in the document and add another annotation, the offset order of the annotations in the output .ann file will be out of order.
How then can you rearrange the .ann file such that the annotations are in offset order when you are done? Is there some option within brat that allows you to do this or is it something that one has to write their own script to perform?
Hearing nothing, I did write a python script to accomplish what I had set out to do. First, I reorder all annotations by begin index. Secondly, I resequence the label numbers so that they are once again in ascending order.
import optparse, sys
splitchar1 = '\t'
splitchar2 = ' '
# for brat, overlapped is not permitted (or at least a warning is generated)
# we could use this simplification in sorting by simply sorting on begin. it is
# probably a good idea anyway.
class AnnotationRecord:
label = 'T0'
type = ''
begin = -1
end = -1
text = ''
def __repr__(self):
return self.label + splitchar1
+ self.type + splitchar2
+ str(self.begin) + splitchar2
+ str(self.end) + splitchar1 + self.text
def create_record(parts):
record = AnnotationRecord()
record.label = parts[0]
middle_parts = parts[1].split(splitchar2)
record.type = middle_parts[0]
record.begin = middle_parts[1]
record.end = middle_parts[2]
record.text = parts[2]
return record
def main(filename, out_filename):
fo = open(filename, 'r')
lines = fo.readlines()
fo.close()
annotation_records = []
for line in lines:
parts = line.split(splitchar1)
annotation_records.append(create_record(parts))
# sort based upon begin
sorted_records = sorted(annotation_records, key=lambda a: int(a.begin))
# now relabel based upon the sorted order
label_value = 1
for sorted_record in sorted_records:
sorted_record.label = 'T' + str(label_value)
label_value += 1
# now write the resulting file to disk
fo = open(out_filename, 'w')
for sorted_record in sorted_records:
fo.write(sorted_record.__repr__())
fo.close()
#format of .ann file is T# Type Start End Text
#args are input file, output file
if __name__ == '__main__':
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
usage=globals()['__doc__'],
version='$Id$')
parser.add_option ('-v', '--verbose', action='store_true',
default=False, help='verbose output')
(options, args) = parser.parse_args()
if len(args) < 2:
parser.error ('missing argument')
main(args[0], args[1])
sys.exit(0)
I would like to shorten "NSLocalizedString" to "_" so I'm using macro
_(x) NSLocalizedString(#x, #__FILE__)
.
But now, when I want to generate strings for localization with
find . -name \*.m | xargs genstrings
it generates nothing.
Any help?
You can tell genstrings to look for a different function by using the '-s' argument:
genstring -s MyFunctionName ....
However, MyFunctionName must follow the same naming and argument conventions as one of the built in NSLocalizeString macros.
In your case, you can not just specify the string key, you must also specify the documentation string. In fact, you should never generate a strings file without both the string and documentation. There are many languages where the actual phrase or word will depend on context. German is a great example where a car is "das auto" and more than one is "die autos". There are many more examples that include changes for gender, number, time, question versus statement, and yes versus no. The documentation string helps your translator figure out what translation to use.
In addition, the best practice is to use a key that is different from the native language word. That says use NSLocalizedStringWithDefaultValue(key, table, bundle, val, comment).
You can specify nil for the table and [NSBundle mainBundle] for the bundle argument.
You can wrap this in a shorthand, but you still have to follow the StringWithDefaultValue name and the arguments for genstrings to work.
I strongly recommend you look at the WWDC 2012 session on Localization Tips and Tricks.
Maurice
You can use the -s option of genstrings. From the man page :
-s routine
Substitutes routine for NSLocalizedString. For example, -s MyLocalString will catch calls to MyLocalString and MyLocalStringFromTable.
So I think you could try :
genstrings -s _
I had the same problem when my NSLocalizedString macro was taking 1 argument instead of 2 like genstrings expects, so i wrote i python script that does the job.
the first argument for the script is the macro name and the second is the path to your project.
import fnmatch
import os
from xml.dom import minidom
function = sys.argv[1]
rootdir = sys.argv[2]
# Generate strings from .m files
files = []
for root, dirnames, filenames in os.walk(rootdir):
for filename in fnmatch.filter(filenames, '*.m'):
files.append(os.path.join(root, filename))
strings = []
for file in files:
lineNumber = 0
for line in open(file):
lineNumber += 1
index = line.find(function)
if (index != -1):
callStr = line[index:]
index = callStr.find('#')
if (index == -1):
print 'call with a variable/macro. file: ' + file + ' line: %d' % lineNumber
else:
callStr = callStr[index+1:]
index = callStr.find('")')
callStr = callStr[:index+1]
if callStr not in strings:
strings.append(callStr)
# Write strings to file
f = open('Localizable.strings', 'w+')
for string in strings:
f.write(string + ' = ' + string + ';\n\n')
f.close()
I have improved Or Arbel's script to include the cases where there's multiple macro-calls on a single line:
import fnmatch
import os
from xml.dom import minidom
import sys
function = sys.argv[1]
rootdir = sys.argv[2]
# Generate strings from .m files
files = []
for root, dirnames, filenames in os.walk(rootdir):
for filename in fnmatch.filter(filenames, '*.m'):
files.append(os.path.join(root, filename))
strings = []
for file in files:
lineNumber = 0
for line in open(file):
lineNumber += 1
index = line.find(function)
startIndex = 0
while (index != -1):
startIndex = index+1
callStr = line[index:]
index = callStr.find('#')
if (index == -1):
print 'call with a variable/macro. file: ' + file + ' line: %d' % lineNumber
else:
callStr = callStr[index+1:]
index = callStr.find('")')
callStr = callStr[:index+1]
if callStr not in strings:
strings.append(callStr)
index = line.find(function, startIndex)
# Write strings to file
f = open('Localizable.strings', 'w+')
for string in strings:
f.write(string + ' = ' + string + ';\n\n')
f.close()
Can anyone give me a clue, how to create/call function regular expression syntax in DB2 iSeries.
Example:
DECLARE VAL VARCHAR (16) DEFAULT 'abcde1235876e' ;
DECLARE RET INT DEFAULT 0;
I'm just checking VARIABLE VAL must only contain numeric value and return true/false
SET VAL = I_NEED_FUNCTION_REGEX(VAL);
IF (VAL = true) THEN
SET RET = 1;
ELSE
SET RET = 0;
END IF;
as simple as that, but i've been searching in IBM as follows:
http://www.ibm.com/developerworks/data/library/techarticle/0301stolze/0301stolze.html
but i don't quite understand.
Can u help me ?
UPDATE
I'm back to the old way and simple for now.
CREATE FUNCTION TEST.VALIDATE_NUMERIC (VAL CHARACTER VARYING(1))
RETURNS INTEGER
LANGUAGE SQL
SPECIFIC TEST.VALIDATE_NUMERIC
MODIFIES SQL DATA
CALLED ON NULL INPUT
FENCED
DISALLOW PARALLEL
NO EXTERNAL ACTION
BEGIN ATOMIC
DECLARE RET INT DEFAULT 0 ;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION , SQLWARNING , NOT FOUND
IF ( VAL IS NOT NULL ) THEN
CASE VAL
WHEN 0 THEN -- (0)
SET RET = 1 ;
WHEN 1 THEN -- (1)
SET RET = 1 ;
WHEN 2 THEN -- (2)
SET RET = 1 ;
WHEN 3 THEN -- (3)
SET RET = 1 ;
WHEN 4 THEN -- (4)
SET RET = 1 ;
WHEN 5 THEN -- (5)
SET RET = 1 ;
WHEN 6 THEN -- (6)
SET RET = 1 ;
WHEN 7 THEN -- (7)
SET RET = 1 ;
WHEN 8 THEN -- (8)
SET RET = 1 ;
WHEN 9 THEN -- (9)
SET RET = 1 ;
ELSE
SET RET = 0 ;
END CASE ;
END IF ;
RETURN RET ;
END
GO
Thanks
MRizq
Out-of-the-box, DB2 does not come with the capability to handle regex. There are some functions to handle some pattern matching, but it's severly restricted.
The article you linked is how to set up a UDF (user-defined function) to call out to an external (C) library to provide this functionality. While the steps are shown for LUW, the iSeries version should be roughly equivalent; you're going to have to talk your DBAs into implementing the call out to relevant libraries.
You can use LOCATE(VAL, '0123456789') to return a 0 if not numeric and the digit + 1 if found:
CASE LOCATE(VAL, '0123456789') WHEN > 0 THEN 1 ELSE 0 END
For a multi-character string you can use the following:
CASE WHEN TRANSLATE(TRIM(VAL), '0', '0123456789', '0')
= REPEAT('0', LENGTH(TRIM(VAL)))
THEN 1 ELSE 0 END
I have recovered some Word documents from a corrupted hard drive using a piece of software called photorec. The problem is that the documents' names can't be recovered; they are all renamed by a sequence of numbers. There are over 2000 documents to sort through and I was wondering if I could rename them using some automated process.
Is there a script I could use to find the first 10 letters in the document and rename it with that? It would have to be able to cope with multiple documents having the same first 10 letters and so not write over documents with the same name. Also, it would have to avoid renaming the document with illegal characters (such as '?', '*', '/', etc.)
I only have a little bit of experience with Python, C, and even less with bash programming in Linux, so bear with me if I don't know exactly what I'm doing if I have to write a new script.
How about VBScript? Here is a sketch:
FolderName = "C:\Docs\"
Set fs = CreateObject("Scripting.FileSystemObject")
Set fldr = fs.GetFolder(Foldername)
Set ws = CreateObject("Word.Application")
For Each f In fldr.Files
If Left(f.name,2)<>"~$" Then
If InStr(f.Type, "Microsoft Word") Then
MsgBox f.Name
Set doc = ws.Documents.Open(Foldername & f.Name)
s = vbNullString
i = 1
Do While Trim(s) = vbNullString And i <= doc.Paragraphs.Count
s = doc.Paragraphs(i)
s = CleanString(Left(s, 10))
i = i + 1
Loop
doc.Close False
If s = "" Then s = "NoParas"
s1 = s
i = 1
Do While fs.FileExists(s1)
s1 = s & i
i = i + 1
Loop
MsgBox "Name " & Foldername & f.Name & " As " & Foldername & s1 _
& Right(f.Name, InStrRev(f.Name, "."))
'' This uses copy, because it seems safer
f.Copy Foldername & s1 & Right(f.Name, InStrRev(f.Name, ".")), False
'' MoveFile will copy the file:
'' fs.MoveFile Foldername & f.Name, Foldername & s1 _
'' & Right(f.Name, InStrRev(f.Name, "."))
End If
End If
Next
msgbox "Done"
ws.Quit
Set ws = Nothing
Set fs = Nothing
Function CleanString(StringToClean)
''http://msdn.microsoft.com/en-us/library/ms974570.aspx
Dim objRegEx
Set objRegEx = CreateObject("VBScript.RegExp")
objRegEx.IgnoreCase = True
objRegEx.Global = True
''Find anything not a-z, 0-9
objRegEx.Pattern = "[^a-z0-9]"
CleanString = objRegEx.Replace(StringToClean, "")
End Function
Word documents are stored in a custom format which places a load of binary cruft on the beginning of the file.
The simplest thing would be to knock something up in Python that searched for the first line beginning with ASCII chars. Here you go:
#!/usr/bin/python
import glob
import os
for file in glob.glob("*.doc"):
f = open(file, "rb")
new_name = ""
chars = 0
char = f.read(1)
while char != "":
if 0 < ord(char) < 128:
if ord("a") <= ord(char) <= ord("z") or ord("A") <= ord(char) <= ord("Z") or ord("0") <= ord(char) <= ord("9"):
new_name += char
else:
new_name += "_"
chars += 1
if chars == 100:
new_name = new_name[:20] + ".doc"
print "renaming " + file + " to " + new_name
f.close()
break;
else:
new_name = ""
chars = 0
char = f.read(1)
if new_name != "":
os.rename(file, new_name)
NOTE: if you want to glob multiple directories you'll need to change the glob line accordingly. Also this takes no account of whether the file you're trying to rename to already exists, so if you have multiple docs with the same first few chars then you'll need to handle that.
I found the first chunk of 100 ASCII chars in a row (if you look for less than that you end up picking up doc keywords and such) and then used the first 20 of these to make the new name, replacing anything that's not a-z A-Z or 0-9 with underscores to avoid file name issues.