/*--------------------------------------------------------------------------------
  
   Demo of a USER_LEXER for Oracle Text 
  
   Should be used in conjunction with user_lexer.sql
   see comments in that file.
  
   Tested on Windows, should work on Unix as well
   To build using Microsoft Visual C++ V6.0:
     File -> New -> Projects
       Win32 Dynamic-Link Library  (supply a project name, and folder if reqd)
       What kind of DLL? -> An empty DLL project, Finish
     Project -> Add to Project -> Files (navigate to and add this file)
     Project -> Add to Project -> Files 
       "Files of type:" Library Files, add %ORACLE_HOME%\oci\lib\msvc\oci.lib
       (specifying the full directory name)
     To make sure the include file oci.h can be found:
     Tools -> Options -> Directories -> Show Directories for include files
       Press the "new" icon, and add the full directory for 
       %ORACLE_HOME%\oci\include
       
     Then build the DLL with Build -> Build <projectname>.DLL

   The CREATE OR REPLACE LIBRARY statement in user_lexer.sql must point to
   the DLL created by MSVC++. If required, you can copy it to the target
   location after compiling.
  
   Unless your target location is in %ORACLE_HOME%\bin, you will have to 
   modify the EXTPROC entry in your listener.ora to something like this:

    (SID_DESC =
      (SID_NAME = PLSExtProc)
      (ORACLE_HOME = G:\oracle\ora92)
      (ENVS=EXTPROC_DLLS=c:\myprojects\user_lexer\debug\user_lexer.dll)
      (PROGRAM = extproc)

   Note the ENVS=EXTPROC_DLLS entry which points to the DLL you have built.

   Restart the listener, using LSNRCTL from the command line, or through
   Control Panel -> Services.

   You are then ready to build the SQL*Plus part of the demo.

   Edit user_lexer.sql, changing the library location to point to the
   DLL created in MSVCC (note by default it will be in a directory /debug
   under the specified project directory).  Change the CTXSYS password if
   necessary, and add a connect string to each CONNECT statement if needed
   on your system.

   Finally, go into SQL*Plus as a DBA user (eg SYSTEM) and run

   SQL> grant create any directory to scott;
  
   You are then ready to run the demo, with the command:
  
   SQL> @user_lexer
  
Roger Ford, Oracle Text Group  roger.ford@oracle.com

Version      Date         By          Comments
Version 0.9  30 May 2003  roger.ford  Initial Version, not fully tested
Version 1.0   2 Jun 2003  roger.ford  Fully tested - more documentation 

----------------------------------------------------------------------------- */

/* TODO: UTF-8 - amount and buflen will vary */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <time.h>

#ifndef OCI_ORACLE
# include <oci.h>
#endif

#define MAX_TOKEN_SIZE 63
#define MAX_WILDCARDS 63

struct ocictx
{
  OCIEnv     *envhp;                           /* For OCI Environment Handle */
  OCISvcCtx  *svchp;                               /* For OCI Service Handle */
  OCIError   *errhp;                                /* For OCI Error Handle  */
  OCIStmt    *stmtp;                             /* For OCI Statement Handle */
  OCIStmt    *stm1p;                             /* For OCI Statement Handle */
  OCIBind    *bnd1p;                                  /* For OCI Bind Handle */
  OCIBind    *bnd2p;                                  /* For OCI Bind Handle */
  OCIBind    *bnd3p;                                  /* For OCI Bind Handle */
  OCIDefine  *dfn1p;                                /* For OCI Define Handle */
  OCIDefine  *dfn2p;                                /* For OCI Define Handle */
  OCIDefine  *dfn3p;                                /* For OCI Define Handle */
};
typedef struct ocictx ocictx;

/* function prototypes */
int ParseText (ocictx *context, OCILobLocator **tokenLob, ub4 loc_needed, char *buffer);
int WriteTokens (ocictx *context,  OCILobLocator **tokenLob, char *tokens);
void check_err(char *str, int err, char *errbuf, ocictx *oci_ctxp );

#ifdef WIN32
__declspec(dllexport)
#endif

void c_index_proc (
                   OCIExtProcContext  *with_context,      /* With Context ptr */
                   OCILobLocator      *text_in,
                   OCILobLocator      **tokenLob,
                   sb4                loc_needed
                   )
{
  ocictx         oci_ctx;
  ocictx         *oci_ctxp = &oci_ctx;
  int            err;

  ub4            in_amount;
  ub4            in_offset;
  char           *in_buf;       /* ptr to allocated buffer for input text */
  ub4            in_buf_len;

  char           errbuf[512];
 
  ub4            text_size;


  /* Obtain OCI handle for SQL statement using the context passed. */
  err = OCIExtProcGetEnv(with_context,                       /* With context */
                         &oci_ctxp->envhp,
                         &oci_ctxp->svchp,
                         &oci_ctxp->errhp);
  check_err("OCIExtProcGetEnv", err, errbuf, oci_ctxp);

  /* Get the size of the input lob */

  err = OCILobGetLength(oci_ctxp->svchp,
                        oci_ctxp->errhp,
                        text_in,
                        &text_size);
  check_err("OCILobRead", err, errbuf, oci_ctxp);

  /* Allocate single buffer large enough for whole input LOB */

  in_buf = (char *)malloc(text_size+1);
  if (in_buf == (char *)NULL) { 
    fprintf(stdout, "Out of memory in user lexer\n");
    return;
  }

  /* Now read the LOB */

  in_amount = text_size;
  in_offset = 1;
  in_buf_len = text_size+1;

  err = OCILobRead(oci_ctxp->svchp,
                   oci_ctxp->errhp,
                   text_in,
                   &in_amount,
                   in_offset,
                   (dvoid *) in_buf,
                   in_buf_len,
                   (dvoid *) 0,
                   (sb4 (*)(dvoid *, CONST dvoid *, ub4, ub1 )) 0,
                   (ub2) 0, 
                   (ub1) SQLCS_IMPLICIT);
  check_err("OCILobRead", err, errbuf, oci_ctxp);

  /* null terminate the string in the buffer */
  in_buf[in_amount] = (char)NULL;

  /* debug output
  fp = fopen("C:\\debug.txt", "a");
  fprintf(fp, "source: %s\n", in_buf);
  fclose (fp); */

  /* Trim lob_out to have length = 0 */
  err = (int) OCILobTrim(oci_ctxp->svchp, 
                         oci_ctxp->errhp, 
                         *tokenLob,
                         (ub4)0);
                         check_err("OCILobTrim", err, errbuf, oci_ctxp);

  /* Parse the incoming text 
     The ParseText function will call back to the writer function
  */

  err = ParseText(oci_ctxp, tokenLob, loc_needed, in_buf);

  if (err) {
    fprintf(stdout, "User lexer parser error\n");
    return;
  }

  /* free temporary buffer */
  free (in_buf);

}

/* Write Tokens function allows you to write in chunks to the return CLOB */

/* TODO: We're currently calling this function for each token.
   Using OCILobWriteAppend in this manner is probably inefficient - we'd be 
   better off buffering it up and writing only when the buffer fills.
*/

int WriteTokens (ocictx *context, OCILobLocator **tokenLob, char *token_list)
{

  static out_offset;
  ub4 out_len;  
  ub4 err;
  ub4 out_amount;
  char errbuf[512];

  FILE *fp;

  /* FILE *fp;  /* for debug only */

  out_len = strlen(token_list);
  out_amount = out_len;

  out_offset = 1;

  /* debug output
  fp = fopen("C:\\debug.txt", "a");
  fprintf(fp, "%s", token_list);
  fclose (fp); */

  err = OCILobWriteAppend(context->svchp, 
                    context->errhp, 
                    *tokenLob, 
                    &out_amount, 
                          /* out_offset, - only needed for write, not append */
                    (dvoid *) token_list, 
                    (ub4) out_len, 
                    OCI_ONE_PIECE, 
                    (dvoid *)0, 
                    (sb4 (*)(dvoid *, dvoid *, ub4 *, ub1 * )) 0, 
                    (ub2)0, (ub1)SQLCS_IMPLICIT); 
  check_err("OCILobWrite", err, errbuf, context);

  /* debug output */
  fp = fopen("C:\\debug.txt", "a");
  fprintf(fp, "end of WriteTokens\n");
  fclose (fp); 

  return 0;
}

int isAlpha(char c) {
  if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
    return 1;
  }
  else {
    return 0;
  }
}  

int ParseText (ocictx *context, OCILobLocator **tokenLob, ub4 loc_needed, char *text_buff)
{
  ub4 err;

  ub4 token_length;
  char token_buff[256];  /* TO DO: Check for overflow! */

  char *startWord;
  char *p, *o;
  ub4  i;
  char *endstr;

  err = WriteTokens(context, tokenLob, 
           "<tokens>");
  if (err) {
    fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
  }

  p = text_buff;
  endstr = p+strlen(p);

  /* loop past any leading non-alpha chars */
  while (!isAlpha(*p) && p <= endstr) {
    p++;
  }

  startWord = p;

  /* now loop through reading words */
  while (p <= endstr) {
    if (isAlpha(*p)) {     /* note this may be pointing at terminating null */
      p++;
    }
    else {
      token_length = p-startWord;
      if (token_length <= MAX_TOKEN_SIZE) {   /* only process tokens < 64 chars */
        strcpy (token_buff, "<word>", 
                  startWord-text_buff, token_length);
        }
        else {
          strcat(token_buff, ">");
        }

        /* copy the token, upper-casing it as we go */
        o = token_buff+strlen(token_buff);
        for (i = 0; i < token_length; i++) {
          *o++ = (char)toupper((int)*(startWord+i));
        }
        *o = '\0';
        strcat (token_buff, "</word>\n"); 

        err = WriteTokens(context, tokenLob, token_buff);
        if (err) {
          fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
        }
      }
      do {
        p++;
      }
      while (!(isAlpha(*p)) && p <= endstr);
      startWord = p;
    }
  }

  err = WriteTokens(context, tokenLob, "</tokens>\n");
  if (err) {
    fprintf(stdout, "Error is user_lexer: WriteTokens\n"); return -1;
  }
  return 0;
}

void check_err( str, err, errbuf, oci_ctxp )
char    *str;
int     err;
char    *errbuf;
ocictx  *oci_ctxp;
{
    if ( err ) 
    { 
      OCIErrorGet( (dvoid *)oci_ctxp->errhp, (ub4) 1, (text *)0, (sb4 *)&err, 
         (text *)errbuf, (ub4)sizeof( errbuf ), (ub4)OCI_HTYPE_ERROR ); 
      fprintf(stdout,"%s returned errno: %d\nerrstr: %s\n", str, err, errbuf);
      return; 
    } 
    else 
      fprintf(stdout,"%s returned: 0\n", str);
}

/* Function to get comma separated list of wildcard positions 
   and write them into a boolean flag array 
*/
get_wildcards (boolean wildcards[], char *wc_offsets)
{
  char           tbuf[4000];
  short          i;
  char           *p;
  short          wc_pos;
  
  /* clear the wildcard flag array */
  for (i=0; i<max_token_size>envhp,
                         &oci_ctxp->svchp,
                         &oci_ctxp->errhp);
  check_err("OCIExtProcGetEnv", err, errbuf, oci_ctxp);

  /* Get the wildcard offsets into boolean array */
  get_wildcards (wildcards, wc_offsets);

  /* Debug */
  fp = fopen("C:\\debug.txt", "w");
  for (j=0; j<max_token_size><tokens><word>");
  o = tokens+strlen(tokens);
  for (j = 0; j < strlen(word); j++) {
    *o++ = (char)toupper((int)*(word+j));
  } 
  *o = '\0';
  strcat(tokens, "</word></tokens>");

  /* set the indicator to indicate not null */
 *tok_ind = OCI_IND_NOTNULL;

 /*
 *length = strlen(tokens);
 *maxlen = *length;  */
}
Oracle Open World 2014 Banner

In-Memory Replay Banner