Skip to content

Commit

Permalink
Fix: eight (#386)
Browse files Browse the repository at this point in the history
* Fix: eight

* Fix: no lazyloadx anymore?
  • Loading branch information
eight04 authored Nov 13, 2024
1 parent 898fa76 commit 2261470
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 76 deletions.
182 changes: 108 additions & 74 deletions comiccrawler/mods/eight.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,25 @@
import re
from urllib.parse import urljoin

from deno_vm import VM
from deno_vm import VM, eval

from ..core import Episode, grabhtml
from ..util import clean_tags
from ..url import update_qs

domain = ["www.8comic.com", "www.comicvip.com", "comicbus.com", "www.comicabc.com"]
name = "無限"
next_page_cache = {}
nview = None

def get_title(html, url):
return re.search('addhistory\("\d+","([^"]+)',html).group(1)
return re.search(r'addhistory\("\d+","([^"]+)',html).group(1)

def get_episodes(html, url):
html = html.replace("\n", "")

comicview_js = grabhtml(urljoin(url, "/js/comicview.js"))
js = """
function cview(...args) {
var output;
function getCookie() {}
function getcookie() {}
Expand All @@ -35,94 +36,127 @@ def get_episodes(html, url):
output = result;
}
};
var document = {
location: {
href: ""
}
};
""" + grabhtml(urljoin(url, "/js/comicview.js"))

const location = {set href(url) {output = url;}};
const document = {location};
const $ = () => $;
$.attr = $.html = $.text = $;
const addch = () => {};
""" + comicview_js + """
cview(...args);
return output;
}
"""

s = []
matches = re.finditer(
r'<a [^>]*?onclick="(cview[^"]+?);[^>]*>(.+?)</a>',
html, re.M
)
with VM(js) as vm:
with VM() as vm:
vm.run(js)
for match in matches:
cview, title = match.groups()

vm.run(cview)
ep_url = vm.run("output")
if "this" in cview:
continue

ep_url = vm.run(cview)
# ep_url = vm.run("location.href")
title = clean_tags(title)

e = Episode(title, urljoin(url, ep_url))
s.append(e)
return s

j_js = ""
lazy_js = ""

def get_images(html, url):
global nview
if not nview:
nview = re.search('src="([^"]*nview\.js[^"]*)"', html).group(1)
nview = urljoin(url, nview)
nview = grabhtml(nview)
global j_js
if not j_js:
j_js = re.search(r'src="([^"]*/j\.js[^"]*)"', html).group(1)
j_js = urljoin(url, j_js)
j_js = grabhtml(j_js)

try:
# http://www.comicbus.com/html/103.html
script = re.search('(var ch=.+?)spp\(\)', html, re.DOTALL).group(1)
except AttributeError:
# http://www.comicbus.com/html/7294.html
script = re.search('(var chs=.+?)</script>', html, re.DOTALL).group(1)
script = re.search('(function request.+?)</script>', html, re.DOTALL).group(1)

global lazy_js
if not lazy_js:
try:
lazy_js = re.search(r'src="([^"]*/lazyloadx\.js[^"]*)"', html).group(1)
except AttributeError:
pass
else:
lazy_js = urljoin(url, lazy_js)
lazy_js = grabhtml(lazy_js)
lazy_js = re.search(r'(var a=[\s\S]*?)o\.setAttribute', lazy_js).group(1)

js = """
var url,
images = [],
document = {
location: {
toString() {return url;},
get href() {return url;},
set href(_url) {url = _url;}
},
getElementById() {
return {
set src(value) {
images.push(value);
},
style: {}
};
}
},
navigator = {
userAgent: "",
language: ""
},
window = {},
alert = () => {};
(() => {
var url = """ + f"{url!r}" + """,
images = [],
document = {
documentElement: {},
location: {
toString() {
return url;
},
get href() {
return url;
},
set href(_url) {
url = _url;
},
},
getElementById() {
return {
set src(value) {
images.push(value);
},
style: {},
};
},
images: []
},
navigator = {
userAgent: "",
language: "",
},
window = { location: document.location,
document},
alert = () => {},
localStorage = {
getItem() {
return null;
},
setItem() {},
},
$ = () => $,
ps,
ci,
pi,
ni,
vv = "",
src;
$.attr = $.ready = $.on = $.click = $.hide = $.show = $.css = $.html = $.append = $.get = $.ajax = $.post = $;
""" + j_js + script + """
function scriptBody() {
initpage = () => {};
""" + nview + script + """
return [images[0], p, ps, ch];
}
function getImages(url) {
images = [];
document.location.href = url;
return scriptBody();
}
"""

with VM(js) as vm:
img, p, ps, ch = vm.call("getImages", url)
if p < ps:
if "/ReadComic/" in url:
# https://www.comicabc.com/ReadComic/6997/734/734_8d00xI27S.html?p=2
next_page_cache[url] = update_qs(url, {"p": p + 1})
else:
# https://www.comicabc.com/online/new-18117.html?ch=122-2
next_page_cache[url] = update_qs(url, {"ch": f"{ch}-{p + 1}"})

function *parseSrc() {
const rx = / s="([^"]+)"/g;
while ((m = rx.exec(xx))) {
yield m[1];
}
}
return urljoin(url, img)
return [...parseSrc()].map(src => {
""" + lazy_js + """
return unescape(src)
});
})();
"""
# import pathlib
# pathlib.Path("8comic.js").write_text(js)
imgs = eval(js)
return [urljoin(url, img) for img in imgs]

def get_next_page(html, url):
return next_page_cache.pop(url, None)
2 changes: 1 addition & 1 deletion comiccrawler/mods/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_title(html, url):
except AttributeError:
id = re.search("set=([^&]+)", url).group(1)
title = re.search("<title[^>]*>([^<]+)", html).group(1)
title = re.sub("\s+", " ", title)
title = re.sub(r"\s+", " ", title)
return unescape("{} ({})".format(title, id))

def get_episodes(html, url):
Expand Down
2 changes: 1 addition & 1 deletion comiccrawler/mods/sankaku_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def get_episodes(html, url):
return eps[::-1]

def get_images(html, url):
id = re.search("post/show/(\d+)", url).group(1)
id = re.search(r"post/show/(\d+)", url).group(1)
data = grabhtml("https://capi-v2.sankakucomplex.com/posts?lang=english&page=1&limit=1&tags=id_range:{}".format(id))
data = json.loads(data)
return data[0]["file_url"]
Expand Down

0 comments on commit 2261470

Please sign in to comment.