施用PLSQL 订阅 itpub rss源
使用PLSQL 订阅 itpub rss源
实现功能:
实现功能:
订阅Oracle开发 板块的rss ,根据Title 排重 入库。
1. 创建rss源表以及序列。
create table rss_itpub (id number , title varchar2(2000) , link varchar2(2000) , description varchar2(2000) , category varchar2(2000) , author varchar2(2000) , pubDate date , enclosure varchar2(4000) , other varchar2(4000) ) ; create sequence seq_rss_itpub ;
2. 创建包
create or replace package xml_type_util is /* 用于刷新rss_dataguru表中的数据(更新rss源) exec xml_type_util.refresh_rss_table ; */ procedure refresh_rss_table ; end ; / create or replace package body xml_type_util is --rss表rss_dataguru type t_rss_tb_list is table of rss_itpub%rowtype ; --private function function convert_to_DOMDocument (xmlclob clob) return xmldom.DOMDocument is parser xmlparser.PARSER; result_document xmldom.DOMDocument; begin -- dbms_output.put_line(dbms_lob.Getlength(xmlclob)) ; parser := xmlparser.newParser; xmlparser.parseClob(parser, xmlclob); result_document := xmlparser.getDocument(parser); --set free xmlparser.freeParser(parser); return result_document ; end ; --private function 2 function parse_xml_to_record (doc xmldom.DOMDocument) return t_rss_tb_list is lenUnit integer; lenItem integer; unitNodes xmldom.DOMNodeList; itemNodes xmldom.DOMNodeList; tempNode_unit xmldom.DOMNode; tempNode xmldom.DOMNode; name varchar2(1000); value varchar2(1000); rss_tb_list t_rss_tb_list := t_rss_tb_list() ; begin unitNodes := xmldom.getElementsByTagName(doc, 'item'); lenUnit := xmldom.getLength(unitNodes); rss_tb_list.extend(lenUnit) ; dbms_output.put_line(lenUnit) ; FOR i in 0 .. lenUnit - 1 LOOP tempNode_unit := xmldom.item( unitNodes, i ); itemNodes:=xmldom.getChildNodes(tempNode_unit); lenItem := xmldom.getLength( itemNodes ); rss_tb_list(i+1).id := seq_rss_itpub.nextval ; --遍历子元素,暂时没有找到更好的方法,使用case when 还可以获取其他异常情况,放到other字段中 FOR j in 0..lenItem-1 LOOP tempNode := xmldom.item( itemNodes, j ); name := xmldom.getNodeName(tempNode); value := xmldom.getNodeValue(xmldom.getFirstChild(tempNode)); case name when 'title' then rss_tb_list(i+1).title := value ; when 'link' then rss_tb_list(i+1).link := value ; when 'description' then rss_tb_list(i+1).description := value ; when 'category' then rss_tb_list(i+1).category := value ; when 'author' then rss_tb_list(i+1).author := value ; when 'pubDate' then --value : Mon, 24 Dec 2012 10:58:00 +0000 regexp_replace 截取字符串 因为默认显示的是0时区时间,北京为东八区,需要加8个小时 rss_tb_list(i+1).pubDate := TO_DATE(regexp_replace(value,'.*, |\+.+',''),'dd MON YYYY hh24:mi:ss')+8/24 ; --附件信息 when 'enclosure' then value := xmldom.getAttribute(xmldom.makeElement(tempNode),'url') ; rss_tb_list(i+1).enclosure:=value ; --其他信息,未处理的元素信息 else rss_tb_list(i+1).other := rss_tb_list(i+1).other || ','||name||'@'||value ; end case ; --DBMS_output.PUT_LINE(i||j||name||value); end loop; end loop; --free xmldom.freeDocument(doc); return rss_tb_list ; end ; --主函数 procedure refresh_rss_table is --Oracle 开发 rss 地址 v_blog_url varchar2(4000) := 'http://www.itpub.net/forum.php?mod=rss' ||chr(38)||'fid=3'||chr(38)|| 'auth=004clZR9P033xjFZsM%2FJe2jTbI5m3ObYbosefQZAy11tziIeN1967GnsmVY1c34d' ; clobs clob ; doc xmldom.DOMDocument ; rss_s t_rss_tb_list ; begin -- DBMS_LOB.CREATETEMPORARY(clobs, true); clobs := httpuritype(v_blog_url).getClob; --获取目标url的xml内容 If Dbms_Lob.Getlength(clobs) Is Null or Dbms_Lob.Getlength(clobs) = 0 Then --判断url是否存在 raise_application_error(-20999, 'No xml.'); End If; doc := convert_to_DOMDocument(clobs) ; rss_s := parse_xml_to_record(doc) ; --如果存在相同的title,不做操作,如果表中不存在,则执行插入操作 forall i in 1 .. rss_s.last execute immediate ' merge into rss_itpub r using (select :1 as title , :2 as link , :3 as description , :4 as category , :5 as author , :6 as pubDate, :7 as enclosure , :8 as other , :9 as id from dual )l on (r.title=l.title) when not matched then insert (title,link,description,category,author,pubDate,enclosure,other,id) values (l.title,l.link,l.description,l.category,l.author,l.pubDate,l.enclosure,l.other,l.id)' using rss_s(i).title,rss_s(i).link,rss_s(i).description,rss_s(i).category,rss_s(i).author, rss_s(i).pubDate,rss_s(i).enclosure,rss_s(i).other,rss_s(i).id ; commit ; end ; end; /
3.执行
SQL> exec xml_type_util_2.refresh_rss_table ; PL/SQL procedure successfully completed
4.查询
附录ORA-24247解决办法
执行前三步
1 .create acl begin dbms_network_acl_admin.create_acl ( acl => 'connect_to_world_acl.xml', description => 'CONNECT_TO_WORLD_ACL' , principal => 'DEXTER', is_grant => true, privilege => 'resolve' ); end; / commit ; 2. 指定权限 begin dbms_network_acl_admin.add_privilege ( acl => 'connect_to_world_acl.xml', principal => 'DEXTER', is_grant => true, privilege => 'connect' ); end; / commit ; 3. 指派acl begin dbms_network_acl_admin.assign_acl( acl => 'connect_to_world_acl.xml', host => '*' ); end; / commit ; 4. 查询 select host, lower_port, upper_port, acl from dba_network_acls; select acl, principal, privilege, is_grant, to_char(start_date, 'dd-mon-yyyy') as start_date, to_char(end_date, 'dd-mon-yyyy') as end_date from dba_network_acl_privileges; select any_path from resource_view where any_path like '/sys/acls/%.xml'; 5. 删除 BEGIN DBMS_NETWORK_ACL_ADMIN.drop_acl ( acl => 'connect_to_world_acl.xml'); COMMIT; END; /
如果想要订阅其他板块,只需要修改v_blog_url 参数即可。