Reading clob line by line with pl\sql

79,424

Solution 1

Here is a piece of code that works. I suggest that you use explicit cursors instead of implicit ones (FOR i IN (select...)), for performance purpose.

First here is the script to create testcase.

create table test (c clob);

insert into test (c) values (
'azertyuiop
qsdfghjklm
wxcvbn
');

Then here is the script to read line by line Clob :

/* Formatted on 28/08/2012 14:16:52 (QP5 v5.115.810.9015) */
declare
    nStartIndex number := 1;
    nEndIndex number := 1;
    nLineIndex number := 0;
    vLine varchar2(2000);

    cursor c_clob is
    select c from test;

    c clob;
    -------------------------------
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      i pls_integer := 1;
    begin
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      amount := instr(p_clob, chr(10), offset);
      while ( offset < len )
      loop
        dbms_lob.read(p_clob, amount, offset, lc_buffer);
        dbms_output.put_line('Line #'||i||':'||lc_buffer);
       offset := offset + amount;
       i := i + 1;
      end loop; 
          if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
    ---------------------------
begin
    dbms_output.put_line('-----------');
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;

'amount' variable is used to detect end of line position. Be carfull, in some case the end of line is CHR(10)||CHR(13) (CR + LF), and in some other cases it is only CHR(10).

Solution 2

While the SQL regexp / connect by level approach is probably the most elegant, it is quite bad performancewise (for my testcase on 11.2.0.3.0). Much faster is a simple parse like this.

procedure parse_clob(p_clob in clob) is
l_offset pls_integer:=1;
l_line varchar2(32767);
l_total_length pls_integer:=length(p_clob);
l_line_length pls_integer;
begin
  while l_offset<=l_total_length loop
    l_line_length:=instr(p_clob,chr(10),l_offset)-l_offset;
    if l_line_length<0 then
      l_line_length:=l_total_length+1-l_offset;
    end if;
    l_line:=substr(p_clob,l_offset,l_line_length);
    dbms_output.put_line(l_line); --do line processing
    l_offset:=l_offset+l_line_length+1;
  end loop;
end parse_clob;

Solution 3

In case... - you have APEX installed - and the clob is less than 32K you may also want to look into the following code:

declare
  l_text varchar2(32767) := '...';
  l_rows wwv_flow_global.vc_arr2;
begin
  l_rows := apex_util.string_to_table(l_text, chr(10));
  for i in 1 .. l_rows.count loop
    dbms_output.put_line(l_rows(i));
  end loop;
end;
/

Solution 4

A pipelined function with some additional options to drive the behavior. Tested/works on Windows, Oracle 11g (I have some suspicions it may fail in *nix environments because of how lines terminate).

CREATE OR REPLACE FUNCTION ETL_HELPER_PARSE
   (P_CLOB NCLOB, P_LINES_TO_SKIP INT DEFAULT 0, P_PUT_EMPTY_LINES CHAR DEFAULT 'N') RETURN SYS.ODCIVarchar2List PIPELINED
AS
  c_top_lines_to_skip  CONSTANT NUMBER  NOT NULL := P_LINES_TO_SKIP;
  c_output_empty_lines CONSTANT CHAR(1) NOT NULL := P_PUT_EMPTY_LINES; 
  --
  l_len     INT := DBMS_LOB.GETLENGTH(P_CLOB);
  l_hit     INT := 0;
  l_offset  INT := 1;
  l_amount  INT;  
  l_buffer  VARCHAR2(32767);
  l_cnt     INT := 1;  
BEGIN  
  WHILE ( l_offset < l_len )
  LOOP
    l_hit := DBMS_LOB.INSTR (
     lob_loc    => P_CLOB           -- IN   CLOB      CHARACTER SET ANY_CS
    ,pattern    => CHR(13)||CHR(10) -- IN   VARCHAR2  CHARACTER SET lob_loc%CHARSET
    ,offset     => l_offset         -- IN   INTEGER := 1
    ,nth        => 1                -- IN   INTEGER := 1
    );
    l_amount := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit - l_offset ELSE l_len - l_offset + 1 END;
    -- `l_amount=0` means a new empty line has been encountered
    IF l_cnt > c_top_lines_to_skip       
    THEN
      IF l_amount > 0
      THEN
        DBMS_LOB.READ(P_CLOB, l_amount, l_offset, l_buffer);
        PIPE ROW (l_buffer);
      ELSIF UPPER(c_output_empty_lines) = 'Y'
      THEN
        PIPE ROW ('');
      END IF;
    END IF;

    l_offset := CASE WHEN COALESCE(l_hit, 0) > 0  THEN l_hit + 2 ELSE l_len END;    
    l_cnt := l_cnt + 1;
  end loop;
EXCEPTION
  WHEN OTHERS THEN
     DBMS_OUTPUT.PUT_LINE('Error : '||SQLERRM);
END ETL_HELPER_PARSE;

Solution 5


sample for dynamicly length of rows

and alternative for UNIX and WIN files

and CR/LF on end of file or without it


Create table for TEST

drop table pbrev.test_SVT_tmp;
create table pbrev.test_SVT_tmp (xc clob);
insert into pbrev.test_SVT_tmp (xc) values (
--'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' || chr(13) || chr(10) );
'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' );
'azerty jhjh  huiop;11
qsdfgkj  hjklhhhhhhhhhhhm;7878
wxcvbn;0
dkjsk kjdsk5456 4654 5646 54645
FINISH'
);
delete from pbrev.test_SVT_tmp ;
select xc from pbrev.test_SVT_tmp;
--SET SERVEROUTPUT ON;
--SET SERVEROUTPUT OFF;
declare
    nStartIndex number := 1;
    nEndIndex number := 1;
    nLineIndex number := 0;
    vLine varchar2(2000);
    cursor c_clob is
    select xc from pbrev.test_SVT_tmp;
    c clob;
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      amount_last number := 0;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      line_seq pls_integer := 1;
      -- For UNIX type file - replace CHR(13) to NULL
      CR char := chr(13);
      --CR char := NULL;
      LF char := chr(10);      
      nCRLF number;
      sCRLF varchar2(2);
      b_finish boolean := true;
begin
      sCRLF := CR || LF;
      nCRLF := Length(sCRLF);
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      amount := instr(p_clob, sCRLF, offset);
      while ( offset < len )
      loop
        -- For without CR/LF on end file
        If amount < 0 then
          amount := len - offset + 1;
          b_finish := false;
        End If;
        dbms_lob.read(p_clob, amount, offset, lc_buffer);
        If b_finish then
          lc_buffer := SUBSTR(lc_buffer,1,Length(lc_buffer)-1);  
        End If;
        if (line_seq-1) > 0 then
          amount_last := amount_last + amount;
          offset := offset + amount; 
        else
          amount_last := amount;
          offset := amount + nCRLF;
        end if;
        amount := instr(p_clob, sCRLF, offset);
        amount := amount - amount_last;
        dbms_output.put_line('Line #'||line_seq||': '||lc_buffer);
        line_seq := line_seq + 1;
      end loop; 
      if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
begin
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;
Share:
79,424
ilya.stmn
Author by

ilya.stmn

Java developer

Updated on November 09, 2020

Comments

  • ilya.stmn
    ilya.stmn over 3 years

    In my project i use oracle as primary database and i've faced a problem with parsing clob. So suppose we have a clob with value

       aaaaaa
       cccccc
       bbbbbb
    

    And it's stored in table test ...

    I need to write plsql procedure to get this clob and split it so that i will have array with three items [aaaaaa,cccccccc,bbbbbbb].

    Is there any possible solutions?