'NEWEST NEWEST 5/23/2010: finally got around to fixing that "round-off" '(ie: finish-handling that was bad: XX NOT THERE, due to error and ON ERROR) ' NEWEST VERSION 12/23/2004: changed the two-zero string-break to one zero ' abt line 29 (see comment), added a byte-count limiter for files where you ' only want the header-area to be scanned. (might put an optional ' byte -sequence trigger into the next version). Added an "Append" option ' for the output file. Added command-line-driver for power-users! ' output lines longer than 80 are broken down to screen-size by breaking on ' spaces where possible for readability. ' extract strings binary files for analysis ' (filters out all the smiley-faces & non-Ascii garbage) DIM WORD$(20) 'get rid of that damn BL$ shit. that should have been a 1-time setup. BL$ = "" C$ = ENVIRON$("CMDLINE"): T = INSTR(C$, " "): W = 1: OK$ = "" MINWORD = 3'minimum length to qualify as a string IF LEFT$(LTRIM$(UCASE$(C$)), 6) = "QBASIC" THEN GOTO INPUTS IF T > 0 THEN ' new command-line feature DO UNTIL T = 0 WORD$(W) = LEFT$(C$, T - 1): W = W + 1: C$ = LTRIM$(MID$(C$, T, 99)): T = INSTR(C$, " ") IF T = 0 THEN WORD$(W) = C$ LOOP IFILE$ = WORD$(2): OFILE$ = WORD$(3): WL$ = WORD$(4): BL$ = WORD$(5) IF VAL(WL$) > 0 THEN MINWORD = VAL(WL$) OK$ = "A" IF INSTR(IFILE$, "?") > 0 OR IFILE$ = "/H" OR IFILE$ = "/h" THEN GOTO HELP PRINT "INPUTTING FROM " + IFILE$: PRINT "OUTPUTTING TO " + OFILE$ 'PRINT "OVERRIDE OPTION IS " + OK$ 'PRINT "BYTE LIMIT: " + BL$ PRINT "WORD LIMIT (WORD SIZE TO QUALIFY): ", WL$ 'FOR I = 1 TO W: PRINT WORD$(I): NEXT I ELSE HELP: PRINT "SEXT: String EXTractor filters printable strings out of a non-text," PRINT "or binary-type file (the 'input file' you wish to scan), and ignores" PRINT "the non-printable stuff." PRINT "The output file will be a text file consisting of the resulting set of" PRINT "strings extracted, one 'string' per line." PRINT "You will be warned politely if the output-file already exists," PRINT "and given the option to overwrite or append it." PRINT "You may also submit the specs via command-line as follows:" PRINT "" PRINT "SEXT input_path+filename output_path+filename word-size-limit byte-limit_spec" PRINT "" PRINT "where 'overwrite' is O,o,A,a and only relevant if out-file already exists." PRINT "'byte-limit_spec' is OPTIONAL output byte-limit, in-file-eof if not spec" INPUTS: INPUT "INPUT FILE: ", IFILE$ IF LEN(IFILE$) = 0 THEN SYSTEM INPUT "OUTPUT: ", OFILE$ 'INPUT "BYTE-LIMIT (OPTIONAL): ", BL$ OK$ = "A" 'newest addition: INPUT "MIN. LENGTH TO QUALIFY AS STRING (DEFAULT 3): ", MN$ IF VAL(MN$) > 0 THEN MINWORD = VAL(MN$) END IF BL = VAL(BL$) ON ERROR GOTO GETOUT PRINT "INPUTTING FROM: "; IFILE$ PRINT "APPENDING OUTPUT TO: "; OFILE$ PRINT "MIN WORD SIZE:"; STR$(MINWORD) 'PRINT "BYTE-LIMIT: "; BL$ OPEN IFILE$ FOR BINARY ACCESS READ AS #1 IF LEN(OFILE$) = 0 THEN OFILE$ = "CONSOLE": OK$ = "O" IF OK$ = "A" OR OK$ = "O" THEN GOTO OK ON ERROR GOTO OK OPEN "I", #2, OFILE$ GOTO NOTOK OK: CLOSE #2 IF BL$ <> "" THEN X$ = " FIRST " + BL$ + " BYTES ONLY" ELSE X$ = "" 'PRINT OK$; OFILE$ 'INPUT NUL$ ON ERROR GOTO 0 OPEN OK$, #2, OFILE$: PRINT #2, "STRINGS EXTRACTED FROM " + IFILE$ + X$ T$ = "": ZFLAG = 0: BC = 0 READLOOP: 'IF EOF(1) THEN CLOSE : SYSTEM: END IF EOF(1) THEN GOTO PRTOUT PREV.A = a X$ = INPUT$(1, #1): BC = BC + 1 'for some reason this next line is necessary on a binary read. IF LEN(X$) = 0 THEN GOTO READLOOP a = ASC(X$) ' disabled foll. for newest version IF a = 0 AND PREV.A = 0 THEN GOTO PRTOUT 'IF a = 0 THEN GOTO PRTOUT IF a = 0 THEN GOTO READLOOP IF a < 32 OR a > 126 THEN GOTO PRTOUT IF a <> 0 AND PREV.A = 0 THEN T$ = T$ + X$: GOTO READLOOP T$ = T$ + X$ GOTO READLOOP PRTOUT: IF LEN(T$) < MINWORD THEN GOTO DONET 'PRINT T$ LT = LEN(T$) IF LT < 2 THEN GOTO DONET IF LT < 78 THEN PRINT #2, T$: GOTO DONET PP = 1: LT = LEN(T$) LWID = 75: IF LWID > LT THEN LWID = LT - 1 P = PP + LWID NEXLINE: IF P > LT THEN P = LT FOR J = P TO PP STEP -1 IF MID$(T$, J, 1) = " " THEN P = J: GOTO SKIPJ IF MID$(T$, J, 1) = "." THEN P = J + 1: GOTO SKIPJ NEXT J P = PP + LWID SKIPJ: PRINT #2, MID$(T$, PP, P - PP + 1): PP = P + 1: P = PP + LWID IF P > LT THEN PRINT #2, MID$(T$, PP, LT - PP + 1) ELSE GOTO NEXLINE 'FOR J = 1 TO LEN(T$) STEP 75: PRINT #2, MID$(T$, J, 75): NEXT J DONET: T$ = "" IF BL <> 0 AND BC > BL THEN CLOSE : SYSTEM IF EOF(1) THEN CLOSE : END GOTO READLOOP NOTOK: PRINT "FILE " + OFILE$ + " ALREADY EXISTS, HIT 'O' TO OVERWRITE" INPUT "AND THUS DESTROY THE EXISTING FILE, 'A' TO APPEND TO IT: ", OK$ OK$ = UCASE$(OK$) IF OK$ <> "O" AND OK$ <> "A" THEN CLOSE : SYSTEM RESUME OK GETOUT: PRINT "INPUT FILE " + IFILE$ + " NOT THERE" SYSTEM RESUME