Reading clob line by line with pl\sql

Solution 1:

Here is a piece of code that works. I suggest that you use explicit cursors instead of implicit ones (FOR i IN (select...)), for performance purpose.

First here is the script to create testcase.

create table test (c clob);

insert into test (c) values (
'azertyuiop
qsdfghjklm
wxcvbn
');

Then here is the script to read line by line Clob :

/* Formatted on 28/08/2012 14:16:52 (QP5 v5.115.810.9015) */
declare
    nStartIndex number := 1;
    nEndIndex number := 1;
    nLineIndex number := 0;
    vLine varchar2(2000);

    cursor c_clob is
    select c from test;

    c clob;
    -------------------------------
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      i pls_integer := 1;
    begin
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      amount := instr(p_clob, chr(10), offset);
      while ( offset < len )
      loop
        dbms_lob.read(p_clob, amount, offset, lc_buffer);
        dbms_output.put_line('Line #'||i||':'||lc_buffer);
       offset := offset + amount;
       i := i + 1;
      end loop; 
          if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
    ---------------------------
begin
    dbms_output.put_line('-----------');
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;

'amount' variable is used to detect end of line position. Be carfull, in some case the end of line is CHR(10)||CHR(13) (CR + LF), and in some other cases it is only CHR(10).

Solution 2:

While the SQL regexp / connect by level approach is probably the most elegant, it is quite bad performancewise (for my testcase on 11.2.0.3.0). Much faster is a simple parse like this.

procedure parse_clob(p_clob in clob) is
l_offset pls_integer:=1;
l_line varchar2(32767);
l_total_length pls_integer:=length(p_clob);
l_line_length pls_integer;
begin
  while l_offset<=l_total_length loop
    l_line_length:=instr(p_clob,chr(10),l_offset)-l_offset;
    if l_line_length<0 then
      l_line_length:=l_total_length+1-l_offset;
    end if;
    l_line:=substr(p_clob,l_offset,l_line_length);
    dbms_output.put_line(l_line); --do line processing
    l_offset:=l_offset+l_line_length+1;
  end loop;
end parse_clob;

Solution 3:

In case... - you have APEX installed - and the clob is less than 32K you may also want to look into the following code:

declare
  l_text varchar2(32767) := '...';
  l_rows wwv_flow_global.vc_arr2;
begin
  l_rows := apex_util.string_to_table(l_text, chr(10));
  for i in 1 .. l_rows.count loop
    dbms_output.put_line(l_rows(i));
  end loop;
end;
/

Solution 4:

A pipelined function with some additional options to drive the behavior. Tested/works on Windows, Oracle 11g (I have some suspicions it may fail in *nix environments because of how lines terminate).

CREATE OR REPLACE FUNCTION ETL_HELPER_PARSE
   (P_CLOB NCLOB, P_LINES_TO_SKIP INT DEFAULT 0, P_PUT_EMPTY_LINES CHAR DEFAULT 'N') RETURN SYS.ODCIVarchar2List PIPELINED
AS
  c_top_lines_to_skip  CONSTANT NUMBER  NOT NULL := P_LINES_TO_SKIP;
  c_output_empty_lines CONSTANT CHAR(1) NOT NULL := P_PUT_EMPTY_LINES; 
  --
  l_len     INT := DBMS_LOB.GETLENGTH(P_CLOB);
  l_hit     INT := 0;
  l_offset  INT := 1;
  l_amount  INT;  
  l_buffer  VARCHAR2(32767);
  l_cnt     INT := 1;  
BEGIN  
  WHILE ( l_offset < l_len )
  LOOP
    l_hit := DBMS_LOB.INSTR (
     lob_loc    => P_CLOB           -- IN   CLOB      CHARACTER SET ANY_CS
    ,pattern    => CHR(13)||CHR(10) -- IN   VARCHAR2  CHARACTER SET lob_loc%CHARSET
    ,offset     => l_offset         -- IN   INTEGER := 1
    ,nth        => 1                -- IN   INTEGER := 1
    );
    l_amount := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit - l_offset ELSE l_len - l_offset + 1 END;
    -- `l_amount=0` means a new empty line has been encountered
    IF l_cnt > c_top_lines_to_skip       
    THEN
      IF l_amount > 0
      THEN
        DBMS_LOB.READ(P_CLOB, l_amount, l_offset, l_buffer);
        PIPE ROW (l_buffer);
      ELSIF UPPER(c_output_empty_lines) = 'Y'
      THEN
        PIPE ROW ('');
      END IF;
    END IF;

    l_offset := CASE WHEN COALESCE(l_hit, 0) > 0  THEN l_hit + 2 ELSE l_len END;    
    l_cnt := l_cnt + 1;
  end loop;
EXCEPTION
  WHEN OTHERS THEN
     DBMS_OUTPUT.PUT_LINE('Error : '||SQLERRM);
END ETL_HELPER_PARSE;