施用PLSQL 订阅 itpub rss源

使用PLSQL 订阅 itpub rss源
实现功能:

订阅Oracle开发 板块的rss ,根据Title 排重 入库。

1. 创建rss源表以及序列。

create table rss_itpub (id number , 
                           title varchar2(2000) , 
                           link varchar2(2000) ,
                           description varchar2(2000) ,
                           category varchar2(2000) ,
                           author varchar2(2000) , 
                           pubDate date ,
                           enclosure varchar2(4000) ,
                           other varchar2(4000) 
                           ) ;
create sequence seq_rss_itpub ;


2. 创建包

create or replace package xml_type_util is
  /*
    用于刷新rss_dataguru表中的数据(更新rss源)
    exec xml_type_util.refresh_rss_table ;
  */
  procedure refresh_rss_table ;
end ;
/

create or replace package body xml_type_util is
  --rss表rss_dataguru
  type t_rss_tb_list is table of rss_itpub%rowtype ;
  --private function 
  function convert_to_DOMDocument (xmlclob  clob)  return xmldom.DOMDocument 
  is
    parser                    xmlparser.PARSER;
    result_document           xmldom.DOMDocument;
  begin
--    dbms_output.put_line(dbms_lob.Getlength(xmlclob)) ;
    parser := xmlparser.newParser;
    xmlparser.parseClob(parser, xmlclob);
    result_document := xmlparser.getDocument(parser);
    --set free
    xmlparser.freeParser(parser);
    return result_document ;
  end ;
  --private function 2
  function parse_xml_to_record (doc xmldom.DOMDocument) return t_rss_tb_list is
    lenUnit       integer;
    lenItem       integer;
    unitNodes     xmldom.DOMNodeList;
    itemNodes     xmldom.DOMNodeList;
    tempNode_unit xmldom.DOMNode;
    tempNode      xmldom.DOMNode;
    name          varchar2(1000);
    value         varchar2(1000);
    rss_tb_list   t_rss_tb_list := t_rss_tb_list() ;
  begin
    unitNodes := xmldom.getElementsByTagName(doc, 'item');
    lenUnit   := xmldom.getLength(unitNodes);
    rss_tb_list.extend(lenUnit) ;
    dbms_output.put_line(lenUnit) ;
    FOR i in 0 .. lenUnit - 1 LOOP
     tempNode_unit := xmldom.item( unitNodes, i );    
     itemNodes:=xmldom.getChildNodes(tempNode_unit); 
     lenItem := xmldom.getLength( itemNodes );
     rss_tb_list(i+1).id := seq_rss_itpub.nextval ;
     --遍历子元素,暂时没有找到更好的方法,使用case when 还可以获取其他异常情况,放到other字段中
     FOR j in 0..lenItem-1 LOOP
         tempNode := xmldom.item( itemNodes, j );         
         name := xmldom.getNodeName(tempNode);
         value := xmldom.getNodeValue(xmldom.getFirstChild(tempNode));
         case name 
         when 'title' then
           rss_tb_list(i+1).title := value ;
         when 'link' then
           rss_tb_list(i+1).link  := value ;
         when 'description' then
           rss_tb_list(i+1).description := value ;
         when 'category' then
           rss_tb_list(i+1).category := value ;
         when 'author' then
           rss_tb_list(i+1).author := value ;
         when 'pubDate' then
           --value : Mon, 24 Dec 2012 10:58:00 +0000 regexp_replace 截取字符串 因为默认显示的是0时区时间,北京为东八区,需要加8个小时
           rss_tb_list(i+1).pubDate := TO_DATE(regexp_replace(value,'.*, |\+.+',''),'dd MON YYYY hh24:mi:ss')+8/24 ;
         --附件信息
         when 'enclosure' then 
           value := xmldom.getAttribute(xmldom.makeElement(tempNode),'url') ;
           rss_tb_list(i+1).enclosure:=value ;
         --其他信息,未处理的元素信息
         else 
           rss_tb_list(i+1).other := rss_tb_list(i+1).other || ','||name||'@'||value ;
         end case ;
         --DBMS_output.PUT_LINE(i||j||name||value);
       end loop;
    end loop;
    --free
    xmldom.freeDocument(doc);
    return rss_tb_list ;
  end ;
  --主函数
  procedure refresh_rss_table is
    --Oracle 开发 rss 地址
    v_blog_url varchar2(4000) := 'http://www.itpub.net/forum.php?mod=rss'
                                 ||chr(38)||'fid=3'||chr(38)||
                                 'auth=004clZR9P033xjFZsM%2FJe2jTbI5m3ObYbosefQZAy11tziIeN1967GnsmVY1c34d' ;
    
    clobs clob ;
    doc xmldom.DOMDocument  ;
    rss_s t_rss_tb_list ;
  begin
--    DBMS_LOB.CREATETEMPORARY(clobs, true);
    clobs := httpuritype(v_blog_url).getClob; --获取目标url的xml内容
    If Dbms_Lob.Getlength(clobs) Is Null or Dbms_Lob.Getlength(clobs) = 0 Then
      --判断url是否存在
      raise_application_error(-20999, 'No xml.');
    End If;
    doc := convert_to_DOMDocument(clobs) ;
    rss_s := parse_xml_to_record(doc) ;
    --如果存在相同的title,不做操作,如果表中不存在,则执行插入操作
    forall i in 1 .. rss_s.last  
      execute immediate '
      merge into rss_itpub r 
      using (select :1 as title , 
                    :2 as link , 
                    :3 as description , 
                    :4 as category , 
                    :5 as author , 
                    :6 as pubDate,
                    :7 as enclosure , 
                    :8 as other ,
                    :9 as id from dual )l
      on (r.title=l.title) 
      when not matched then 
      insert (title,link,description,category,author,pubDate,enclosure,other,id)
             values (l.title,l.link,l.description,l.category,l.author,l.pubDate,l.enclosure,l.other,l.id)'
             using  rss_s(i).title,rss_s(i).link,rss_s(i).description,rss_s(i).category,rss_s(i).author,
                    rss_s(i).pubDate,rss_s(i).enclosure,rss_s(i).other,rss_s(i).id ;
      commit ;
  end ;
end;
/


3.执行

SQL> exec xml_type_util_2.refresh_rss_table ;
 
PL/SQL procedure successfully completed


4.查询

施用PLSQL 订阅 itpub rss源


附录ORA-24247解决办法

执行前三步


1 .create acl

begin
dbms_network_acl_admin.create_acl (
acl => 'connect_to_world_acl.xml',
description => 'CONNECT_TO_WORLD_ACL' ,
principal => 'DEXTER',       
is_grant => true,
privilege => 'resolve'
);
end;
/
commit ;
2. 指定权限
begin
dbms_network_acl_admin.add_privilege (
acl => 'connect_to_world_acl.xml',
principal => 'DEXTER',
is_grant => true,
privilege => 'connect'
);
end;
/
commit ;
3. 指派acl
begin
dbms_network_acl_admin.assign_acl(
acl => 'connect_to_world_acl.xml',
host => '*'
);
end;
/
commit ;

4. 查询
select host, lower_port, upper_port, acl from dba_network_acls;

select acl,
       principal,
       privilege,
       is_grant,
       to_char(start_date, 'dd-mon-yyyy') as start_date,
       to_char(end_date, 'dd-mon-yyyy') as end_date
  from dba_network_acl_privileges;

select any_path from resource_view where any_path like '/sys/acls/%.xml';
5. 删除

BEGIN
DBMS_NETWORK_ACL_ADMIN.drop_acl (
acl => 'connect_to_world_acl.xml');
COMMIT;
END;
/


如果想要订阅其他板块,只需要修改v_blog_url 参数即可。