Skip to content

Commit

Permalink
Change wikipedia API calls to https and added "&continue=" in WikiApi
Browse files Browse the repository at this point in the history
fixes #399
  • Loading branch information
jimkont committed Jun 18, 2015
1 parent 771751b commit 4578610
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
10 changes: 5 additions & 5 deletions core/src/main/scala/org/dbpedia/extraction/util/Language.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ import org.dbpedia.extraction.ontology.RdfNamespace
* Use propertyUri.append("xy"), not string concatenation.
* @param baseUri URI prefix for this wiki, e.g. "http://be-x-old.wikipedia.org",
* "http://commons.wikimedia.org", "http://mappings.dbpedia.org".
* @param apiUri API URI for this wiki, e.g. "http://be-x-old.wikipedia.org/w/api.php",
* "http://commons.wikimedia.org/w/api.php", "http://mappings.dbpedia.org/api.php".
* @param apiUri API URI for this wiki, e.g. "https://be-x-old.wikipedia.org/w/api.php",
* "http://commons.wikimedia.org/w/api.php", "https://mappings.dbpedia.org/api.php".
*/
class Language private(
val wikiCode: String,
Expand Down Expand Up @@ -69,7 +69,7 @@ object Language extends (String => Language)
new DBpediaNamespace("http://"+code+".dbpedia.org/resource/"),
new DBpediaNamespace("http://"+code+".dbpedia.org/property/"),
"http://"+code+".wikipedia.org",
"http://"+code+".wikipedia.org/w/api.php"
"https://"+code+".wikipedia.org/w/api.php"
)
}

Expand Down Expand Up @@ -235,7 +235,7 @@ object Language extends (String => Language)
new DBpediaNamespace("http://commons.dbpedia.org/resource/"),
new DBpediaNamespace("http://commons.dbpedia.org/property/"),
"http://commons.wikimedia.org",
"http://commons.wikimedia.org/w/api.php"
"https://commons.wikimedia.org/w/api.php"
)

languages("wikidata") =
Expand All @@ -248,7 +248,7 @@ object Language extends (String => Language)
new DBpediaNamespace("http://wikidata.dbpedia.org/resource/"),
new DBpediaNamespace("http://wikidata.dbpedia.org/property/"),
"http://www.wikidata.org",
"http://www.wikidata.org/w/api.php"
"https://www.wikidata.org/w/api.php"
)


Expand Down
12 changes: 6 additions & 6 deletions core/src/main/scala/org/dbpedia/extraction/util/WikiApi.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ class WikiApi(url: URL, language: Language)
def retrievePagesByNamespace[U](namespace : Namespace, f : WikiPage => U, fromPage : String = "")
{
// TODO: instead of first getting the page ids and then the pages, use something like
// ?action=query&generator=allpages&prop=revisions&rvprop=ids|content&format=xml&gapnamespace=0
// ?action=query&continue=&generator=allpages&prop=revisions&rvprop=ids|content&format=xml&gapnamespace=0
// -> "generator" instead of "list" and "gapnamespace" instead of "apnamespace" ("gap" is for "generator all pages")

//Retrieve list of pages
val response = query("?action=query&format=xml&list=allpages&apfrom=" + URLEncoder.encode(fromPage, "UTF-8") + "&aplimit=" + pageListLimit + "&apnamespace=" + namespace.code)
val response = query("?action=query&continue=&format=xml&list=allpages&apfrom=" + URLEncoder.encode(fromPage, "UTF-8") + "&aplimit=" + pageListLimit + "&apnamespace=" + namespace.code)

//Extract page ids
val pageIds = for(p <- response \ "query" \ "allpages" \ "p") yield (p \ "@pageid").head.text.toLong
Expand Down Expand Up @@ -102,7 +102,7 @@ class WikiApi(url: URL, language: Language)
{
for(group <- ids.grouped(pageDownloadLimit))
{
val response = query("?action=query&format=xml&prop=revisions&"+param+"=" + group.mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
val response = query("?action=query&continue=&format=xml&prop=revisions&"+param+"=" + group.mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
processPages(response, proc)
}
}
Expand All @@ -119,7 +119,7 @@ class WikiApi(url: URL, language: Language)
{
for(titleGroup <- titles.grouped(pageDownloadLimit))
{
val response = query("?action=query&format=xml&prop=revisions&titles=" + titleGroup.map(formatWikiTitle).mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
val response = query("?action=query&continue=&format=xml&prop=revisions&titles=" + titleGroup.map(formatWikiTitle).mkString("|") + "&rvprop=ids|content|timestamp|user|userid")
processPages(response, proc)
}
}
Expand Down Expand Up @@ -161,7 +161,7 @@ class WikiApi(url: URL, language: Language)
*/
def retrieveTemplateUsages(title : WikiTitle, namespace: Namespace = Namespace.Main, maxCount : Int = 500) : Seq[WikiTitle] =
{
val response = query("?action=query&format=xml&list=embeddedin&eititle=" + title.encodedWithNamespace + "&einamespace=" + namespace.code + "&eifilterredir=nonredirects&eilimit=" + maxCount)
val response = query("?action=query&continue=&format=xml&list=embeddedin&eititle=" + title.encodedWithNamespace + "&einamespace=" + namespace.code + "&eifilterredir=nonredirects&eilimit=" + maxCount)

for(page <- response \ "query" \ "embeddedin" \ "ei";
title <- page \ "@title" )
Expand All @@ -182,7 +182,7 @@ class WikiApi(url: URL, language: Language)
var appropriateQuery = "";

do{
appropriateQuery = "?action=query&format=xml&list=embeddedin&eititle=" + title.encodedWithNamespace +
appropriateQuery = "?action=query&continue=&format=xml&list=embeddedin&eititle=" + title.encodedWithNamespace +
"&einamespace=0&eifilterredir=nonredirects&eilimit=" + maxCount;
//Since the call can return only 500 matches at most we must use the eicontinue parameter to
//get the other matches
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ class AbstractExtractorWikipedia(
extends AbstractExtractor (context)
{

override def apiUrl: String = "http://" + context.language.wikiCode + ".wikipedia.org/w/api.php"
override def apiUrl: String = "https://" + context.language.wikiCode + ".wikipedia.org/w/api.php"
}

0 comments on commit 4578610

Please sign in to comment.