-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit c5873fc
Showing
14 changed files
with
300 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<classpath> | ||
<classpathentry kind="src" output="target/classes" path="src/main/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="output" path="target/classes"/> | ||
</classpath> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<projectDescription> | ||
<name>pleonast.downloader</name> | ||
<comment></comment> | ||
<projects> | ||
</projects> | ||
<buildSpec> | ||
<buildCommand> | ||
<name>org.eclipse.jdt.core.javabuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
<buildCommand> | ||
<name>org.eclipse.m2e.core.maven2Builder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
</buildSpec> | ||
<natures> | ||
<nature>org.eclipse.jdt.core.javanature</nature> | ||
<nature>org.eclipse.m2e.core.maven2Nature</nature> | ||
</natures> | ||
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
eclipse.preferences.version=1 | ||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 | ||
org.eclipse.jdt.core.compiler.compliance=1.5 | ||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning | ||
org.eclipse.jdt.core.compiler.source=1.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
activeProfiles= | ||
eclipse.preferences.version=1 | ||
resolveWorkspaceProjects=true | ||
version=1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" | ||
> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>com.online201</groupId> | ||
<artifactId>pleonast.downloader</artifactId> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<name>Pleonast Archiver</name> | ||
<dependencies> | ||
<dependency> | ||
<groupId>net.sourceforge.htmlunit</groupId> | ||
<artifactId>htmlunit</artifactId> | ||
<version>2.11</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>joda-time</groupId> | ||
<artifactId>joda-time</artifactId> | ||
<version>2.1</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
34 changes: 34 additions & 0 deletions
34
src/main/java/com/online201/pleonast/downloader/Comment.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package com.online201.pleonast.downloader; | ||
|
||
import org.joda.time.DateTime; | ||
|
||
public class Comment { | ||
private String text; | ||
private DateTime date; | ||
private String comment; | ||
|
||
public String getComment() { | ||
return comment; | ||
} | ||
|
||
public DateTime getDate() { | ||
return date; | ||
} | ||
|
||
public String getText() { | ||
return text; | ||
} | ||
|
||
public void setComment(String comment) { | ||
this.comment = comment; | ||
} | ||
|
||
public void setDate(DateTime date) { | ||
this.date = date; | ||
} | ||
|
||
public void setText(String text) { | ||
this.text = text; | ||
} | ||
|
||
} |
45 changes: 45 additions & 0 deletions
45
src/main/java/com/online201/pleonast/downloader/Entry.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package com.online201.pleonast.downloader; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.joda.time.DateTime; | ||
|
||
public class Entry { | ||
private String title; | ||
private DateTime date; | ||
private String body; | ||
private List<Comment> comments = new ArrayList<Comment>(); | ||
|
||
public String getBody() { | ||
return body; | ||
} | ||
|
||
public List<Comment> getComments() { | ||
return comments; | ||
} | ||
|
||
public DateTime getDate() { | ||
return date; | ||
} | ||
|
||
public String getTitle() { | ||
return title; | ||
} | ||
|
||
public void setBody(String body) { | ||
this.body = body; | ||
} | ||
|
||
public void setComments(List<Comment> comments) { | ||
this.comments = comments; | ||
} | ||
|
||
public void setDate(DateTime date) { | ||
this.date = date; | ||
} | ||
|
||
public void setTitle(String title) { | ||
this.title = title; | ||
} | ||
} |
99 changes: 99 additions & 0 deletions
99
src/main/java/com/online201/pleonast/downloader/PleoDownloader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package com.online201.pleonast.downloader; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.commons.lang3.StringUtils; | ||
import org.joda.time.format.DateTimeFormat; | ||
import org.joda.time.format.DateTimeFormatter; | ||
|
||
import com.gargoylesoftware.htmlunit.BrowserVersion; | ||
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; | ||
import com.gargoylesoftware.htmlunit.WebClient; | ||
import com.gargoylesoftware.htmlunit.html.DomNode; | ||
import com.gargoylesoftware.htmlunit.html.DomText; | ||
import com.gargoylesoftware.htmlunit.html.HtmlElement; | ||
import com.gargoylesoftware.htmlunit.html.HtmlInput; | ||
import com.gargoylesoftware.htmlunit.html.HtmlPage; | ||
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; | ||
|
||
public class PleoDownloader { | ||
|
||
private static final DateTimeFormatter dateParser = DateTimeFormat.forPattern("MM/dd/yy HH:mmaa"); | ||
public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { | ||
WebClient client = new WebClient(BrowserVersion.FIREFOX_3_6); | ||
client.getOptions().setJavaScriptEnabled(false); | ||
client.getOptions().setCssEnabled(false); | ||
logIn(args, client); | ||
|
||
HtmlPage currentPage = client.getPage("http://pleonast.com/users/"+args[0]); | ||
|
||
List<HtmlElement> pagination = (List<HtmlElement>) currentPage.getByXPath("//div[@class='pagination']/span[@class='current']"); | ||
int numPages = Integer.parseInt(pagination.get(0).getTextContent().trim()); | ||
System.out.println("There are " + numPages +" pages worth of entries to parse"); | ||
|
||
List<Entry> entries = new ArrayList<Entry>(); | ||
|
||
|
||
entries.addAll(parseEntries(currentPage)); | ||
for (int i = 2; i <= numPages; i++) { | ||
currentPage = client.getPage("http://pleonast.com/users/"+args[0]+"?page="+i); | ||
entries.addAll(parseEntries(currentPage)); | ||
} | ||
} | ||
|
||
private static List<Entry> parseEntries(HtmlPage currentPage) { | ||
List<Entry> entries = new ArrayList<Entry>(); | ||
List<HtmlElement> pageEntries = (List<HtmlElement>) currentPage.getByXPath("//div[@class='entry']"); | ||
for (HtmlElement pageEntry : pageEntries) { | ||
Entry entry = new Entry(); | ||
entry.setTitle(pageEntry.getElementsByTagName("h1").get(0).getTextContent().trim()); | ||
String body = pageEntry.getElementsByAttribute("div", "class", "body").get(0).asXml(); | ||
body = StringUtils.substringAfter(body, ">"); | ||
body = StringUtils.substringBeforeLast(body, "<"); | ||
body = StringUtils.replace(body, " <br/>\n", "\n"); | ||
String date = pageEntry.getElementsByAttribute("div", "class", "byline").get(0).getElementsByTagName("span").get(0).getTextContent().trim(); | ||
entry.setDate(dateParser.parseDateTime(date.replace(" ", " "))); | ||
entry.setBody(body); | ||
entry.setComments(parseComments(pageEntry)); | ||
System.out.println(entry.getDate().toString() +" : " + entry.getTitle()+", " + entry.getComments().size() +" comments"); | ||
entries.add(entry); | ||
} | ||
return entries; | ||
} | ||
|
||
private static void logIn(String[] args, WebClient client) | ||
throws IOException, MalformedURLException { | ||
HtmlPage page = client.getPage("http://pleonast.com/login"); | ||
HtmlInput username = page.getElementByName("user_session[username]"); | ||
HtmlInput password = page.getElementByName("user_session[password]"); | ||
|
||
username.setValueAttribute(args[0]); | ||
password.setValueAttribute(args[1]); | ||
HtmlSubmitInput loginButton = (HtmlSubmitInput) page.getElementByName("commit"); | ||
loginButton.click(); | ||
} | ||
|
||
private static List<Comment> parseComments(HtmlElement pageEntry) { | ||
List<Comment> comments = new ArrayList<Comment>(); | ||
List<HtmlElement> pageComments = pageEntry.getElementsByAttribute("li", "class", "comment"); | ||
for (HtmlElement pageComment : pageComments) { | ||
Comment c = new Comment(); | ||
HtmlElement right = pageComment.getElementsByAttribute("div", "class", "right").get(0); | ||
HtmlElement body = right.getElementsByAttribute("div", "class", "body").get(0); | ||
HtmlElement meta = right.getElementsByAttribute("div", "class", "meta").get(0); | ||
String who = StringUtils.substringAfterLast(meta.getElementsByTagName("a").get(0).getAttribute("href"),"/"); | ||
for (DomNode node : meta.getChildren()) { | ||
String text = node.getTextContent().trim(); | ||
if (node instanceof DomText && text.startsWith("at ") && text.endsWith("M")) { | ||
c.setDate(dateParser.parseDateTime(StringUtils.substringAfter(text," ").replace(" ", " "))); | ||
} | ||
} | ||
c.setText(who); | ||
comments.add(c); | ||
} | ||
return comments; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
Manifest-Version: 1.0 | ||
Built-By: danwatt | ||
Build-Jdk: 1.6.0_37 | ||
Created-By: Maven Integration for Eclipse | ||
|
7 changes: 7 additions & 0 deletions
7
target/classes/META-INF/maven/com.online201/pleonast.downloader/pom.properties
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#Generated by Maven Integration for Eclipse | ||
#Wed Feb 20 22:14:42 CST 2013 | ||
version=0.0.1-SNAPSHOT | ||
groupId=com.online201 | ||
m2e.projectName=pleonast.downloader | ||
m2e.projectLocation=/Users/danwatt/Documents/workspace/pleonast.downloader | ||
artifactId=pleonast.downloader |
21 changes: 21 additions & 0 deletions
21
target/classes/META-INF/maven/com.online201/pleonast.downloader/pom.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" | ||
> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>com.online201</groupId> | ||
<artifactId>pleonast.downloader</artifactId> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<name>Pleonast Archiver</name> | ||
<dependencies> | ||
<dependency> | ||
<groupId>net.sourceforge.htmlunit</groupId> | ||
<artifactId>htmlunit</artifactId> | ||
<version>2.11</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>joda-time</groupId> | ||
<artifactId>joda-time</artifactId> | ||
<version>2.1</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+7.49 KB
target/classes/com/online201/pleonast/downloader/PleoDownloader.class
Binary file not shown.