/*--------------------------------------------------------------------------------
Demo of a USER_LEXER for Oracle Text
Should be used in conjunction with user_lexer.sql
see comments in that file.
Tested on Windows, should work on Unix as well
To build using Microsoft Visual C++ V6.0:
File -> New -> Projects
Win32 Dynamic-Link Library (supply a project name, and folder if reqd)
What kind of DLL? -> An empty DLL project, Finish
Project -> Add to Project -> Files (navigate to and add this file)
Project -> Add to Project -> Files
"Files of type:" Library Files, add %ORACLE_HOME%\oci\lib\msvc\oci.lib
(specifying the full directory name)
To make sure the include file oci.h can be found:
Tools -> Options -> Directories -> Show Directories for include files
Press the "new" icon, and add the full directory for
%ORACLE_HOME%\oci\include
Then build the DLL with Build -> Build <projectname>.DLL
The CREATE OR REPLACE LIBRARY statement in user_lexer.sql must point to
the DLL created by MSVC++. If required, you can copy it to the target
location after compiling.
Unless your target location is in %ORACLE_HOME%\bin, you will have to
modify the EXTPROC entry in your listener.ora to something like this:
(SID_DESC =
(SID_NAME = PLSExtProc)
(ORACLE_HOME = G:\oracle\ora92)
(ENVS=EXTPROC_DLLS=c:\myprojects\user_lexer\debug\user_lexer.dll)
(PROGRAM = extproc)
Note the ENVS=EXTPROC_DLLS entry which points to the DLL you have built.
Restart the listener, using LSNRCTL from the command line, or through
Control Panel -> Services.
You are then ready to build the SQL*Plus part of the demo.
Edit user_lexer.sql, changing the library location to point to the
DLL created in MSVCC (note by default it will be in a directory /debug
under the specified project directory). Change the CTXSYS password if
necessary, and add a connect string to each CONNECT statement if needed
on your system.
Finally, go into SQL*Plus as a DBA user (eg SYSTEM) and run
SQL> grant create any directory to scott;
You are then ready to run the demo, with the command:
SQL> @user_lexer
Roger Ford, Oracle Text Group roger.ford@oracle.com
Version Date By Comments
Version 0.9 30 May 2003 roger.ford Initial Version, not fully tested
Version 1.0 2 Jun 2003 roger.ford Fully tested - more documentation
----------------------------------------------------------------------------- */
/* TODO: UTF-8 - amount and buflen will vary */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
#ifndef OCI_ORACLE
# include <oci.h>
#endif
#define MAX_TOKEN_SIZE 63
#define MAX_WILDCARDS 63
struct ocictx
{
OCIEnv *envhp; /* For OCI Environment Handle */
OCISvcCtx *svchp; /* For OCI Service Handle */
OCIError *errhp; /* For OCI Error Handle */
OCIStmt *stmtp; /* For OCI Statement Handle */
OCIStmt *stm1p; /* For OCI Statement Handle */
OCIBind *bnd1p; /* For OCI Bind Handle */
OCIBind *bnd2p; /* For OCI Bind Handle */
OCIBind *bnd3p; /* For OCI Bind Handle */
OCIDefine *dfn1p; /* For OCI Define Handle */
OCIDefine *dfn2p; /* For OCI Define Handle */
OCIDefine *dfn3p; /* For OCI Define Handle */
};
typedef struct ocictx ocictx;
/* function prototypes */
int ParseText (ocictx *context, OCILobLocator **tokenLob, ub4 loc_needed, char *buffer);
int WriteTokens (ocictx *context, OCILobLocator **tokenLob, char *tokens);
void check_err(char *str, int err, char *errbuf, ocictx *oci_ctxp );
#ifdef WIN32
__declspec(dllexport)
#endif
void c_index_proc (
OCIExtProcContext *with_context, /* With Context ptr */
OCILobLocator *text_in,
OCILobLocator **tokenLob,
sb4 loc_needed
)
{
ocictx oci_ctx;
ocictx *oci_ctxp = &oci_ctx;
int err;
ub4 in_amount;
ub4 in_offset;
char *in_buf; /* ptr to allocated buffer for input text */
ub4 in_buf_len;
char errbuf[512];
ub4 text_size;
/* Obtain OCI handle for SQL statement using the context passed. */
err = OCIExtProcGetEnv(with_context, /* With context */
&oci_ctxp->envhp,
&oci_ctxp->svchp,
&oci_ctxp->errhp);
check_err("OCIExtProcGetEnv", err, errbuf, oci_ctxp);
/* Get the size of the input lob */
err = OCILobGetLength(oci_ctxp->svchp,
oci_ctxp->errhp,
text_in,
&text_size);
check_err("OCILobRead", err, errbuf, oci_ctxp);
/* Allocate single buffer large enough for whole input LOB */
in_buf = (char *)malloc(text_size+1);
if (in_buf == (char *)NULL) {
fprintf(stdout, "Out of memory in user lexer\n");
return;
}
/* Now read the LOB */
in_amount = text_size;
in_offset = 1;
in_buf_len = text_size+1;
err = OCILobRead(oci_ctxp->svchp,
oci_ctxp->errhp,
text_in,
&in_amount,
in_offset,
(dvoid *) in_buf,
in_buf_len,
(dvoid *) 0,
(sb4 (*)(dvoid *, CONST dvoid *, ub4, ub1 )) 0,
(ub2) 0,
(ub1) SQLCS_IMPLICIT);
check_err("OCILobRead", err, errbuf, oci_ctxp);
/* null terminate the string in the buffer */
in_buf[in_amount] = (char)NULL;
/* debug output
fp = fopen("C:\\debug.txt", "a");
fprintf(fp, "source: %s\n", in_buf);
fclose (fp); */
/* Trim lob_out to have length = 0 */
err = (int) OCILobTrim(oci_ctxp->svchp,
oci_ctxp->errhp,
*tokenLob,
(ub4)0);
check_err("OCILobTrim", err, errbuf, oci_ctxp);
/* Parse the incoming text
The ParseText function will call back to the writer function
*/
err = ParseText(oci_ctxp, tokenLob, loc_needed, in_buf);
if (err) {
fprintf(stdout, "User lexer parser error\n");
return;
}
/* free temporary buffer */
free (in_buf);
}
/* Write Tokens function allows you to write in chunks to the return CLOB */
/* TODO: We're currently calling this function for each token.
Using OCILobWriteAppend in this manner is probably inefficient - we'd be
better off buffering it up and writing only when the buffer fills.
*/
int WriteTokens (ocictx *context, OCILobLocator **tokenLob, char *token_list)
{
static out_offset;
ub4 out_len;
ub4 err;
ub4 out_amount;
char errbuf[512];
FILE *fp;
/* FILE *fp; /* for debug only */
out_len = strlen(token_list);
out_amount = out_len;
out_offset = 1;
/* debug output
fp = fopen("C:\\debug.txt", "a");
fprintf(fp, "%s", token_list);
fclose (fp); */
err = OCILobWriteAppend(context->svchp,
context->errhp,
*tokenLob,
&out_amount,
/* out_offset, - only needed for write, not append */
(dvoid *) token_list,
(ub4) out_len,
OCI_ONE_PIECE,
(dvoid *)0,
(sb4 (*)(dvoid *, dvoid *, ub4 *, ub1 * )) 0,
(ub2)0, (ub1)SQLCS_IMPLICIT);
check_err("OCILobWrite", err, errbuf, context);
/* debug output */
fp = fopen("C:\\debug.txt", "a");
fprintf(fp, "end of WriteTokens\n");
fclose (fp);
return 0;
}
int isAlpha(char c) {
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
return 1;
}
else {
return 0;
}
}
int ParseText (ocictx *context, OCILobLocator **tokenLob, ub4 loc_needed, char *text_buff)
{
ub4 err;
ub4 token_length;
char token_buff[256]; /* TO DO: Check for overflow! */
char *startWord;
char *p, *o;
ub4 i;
char *endstr;
err = WriteTokens(context, tokenLob,
"<tokens>");
if (err) {
fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
}
p = text_buff;
endstr = p+strlen(p);
/* loop past any leading non-alpha chars */
while (!isAlpha(*p) && p <= endstr) {
p++;
}
startWord = p;
/* now loop through reading words */
while (p <= endstr) {
if (isAlpha(*p)) { /* note this may be pointing at terminating null */
p++;
}
else {
token_length = p-startWord;
if (token_length <= MAX_TOKEN_SIZE) { /* only process tokens < 64 chars */
strcpy (token_buff, "<word>",
startWord-text_buff, token_length);
}
else {
strcat(token_buff, ">");
}
/* copy the token, upper-casing it as we go */
o = token_buff+strlen(token_buff);
for (i = 0; i < token_length; i++) {
*o++ = (char)toupper((int)*(startWord+i));
}
*o = '\0';
strcat (token_buff, "</word>\n");
err = WriteTokens(context, tokenLob, token_buff);
if (err) {
fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
}
}
do {
p++;
}
while (!(isAlpha(*p)) && p <= endstr);
startWord = p;
}
}
err = WriteTokens(context, tokenLob, "</tokens>\n");
if (err) {
fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
}
return 0;
}
void check_err( str, err, errbuf, oci_ctxp )
char *str;
int err;
char *errbuf;
ocictx *oci_ctxp;
{
if ( err )
{
OCIErrorGet( (dvoid *)oci_ctxp->errhp, (ub4) 1, (text *)0, (sb4 *)&err,
(text *)errbuf, (ub4)sizeof( errbuf ), (ub4)OCI_HTYPE_ERROR );
fprintf(stdout,"%s returned errno: %d\nerrstr: %s\n", str, err, errbuf);
return;
}
else
fprintf(stdout,"%s returned: 0\n", str);
}
/* Function to get comma separated list of wildcard positions
and write them into a boolean flag array
*/
get_wildcards (boolean wildcards[], char *wc_offsets)
{
char tbuf[4000];
short i;
char *p;
short wc_pos;
/* clear the wildcard flag array */
for (i=0; i<max_token_size>envhp,
&oci_ctxp->svchp,
&oci_ctxp->errhp);
check_err("OCIExtProcGetEnv", err, errbuf, oci_ctxp);
/* Get the wildcard offsets into boolean array */
get_wildcards (wildcards, wc_offsets);
/* Debug */
fp = fopen("C:\\debug.txt", "w");
for (j=0; j<max_token_size><tokens><word>");
o = tokens+strlen(tokens);
for (j = 0; j < strlen(word); j++) {
*o++ = (char)toupper((int)*(word+j));
}
*o = '\0';
strcat(tokens, "</word></tokens>");
/* set the indicator to indicate not null */
*tok_ind = OCI_IND_NOTNULL;
/*
*length = strlen(tokens);
*maxlen = *length; */
}