blob: 810b5cc534db85d6f2a0d354bbf764a70eb82df3 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// PageRank on the DBLP bibliography.
// You can download dblp.xml.gz from http://dblp.uni-trier.de/xml/
DBLP = source(xml,args[0],{"article","inproceedings"});
graph = select (key,select text(x) from x in c)
from a in DBLP,
c in a.cite
where text(c) <> "..."
group by key: a.@key;
store graph_size := count(graph);
// damping factor
factor = 0.85;
select (text(a.title),x.rank)
from x in (repeat nodes = select < id: key, rank: 1.0/graph_size as double, adjacent: al >
from (key,al) in graph
step select (< id: m.id, rank: n.rank, adjacent: m.adjacent >,
abs((n.rank-m.rank)/m.rank) > 0.1)
from n in (select < id: key,
rank: (1-factor)/graph_size+factor*sum(select x.rank from x in c) >
from c in ( select < id: a, rank: n.rank/count(n.adjacent) >
from n in nodes, a in n.adjacent )
group by key: c.id),
m in nodes
where n.id = m.id
limit 20),
a in DBLP
where a.@key=x.id
order by x.rank desc;