From d2765274b115e23951ee8d09ac0ef17573fb15fc Mon Sep 17 00:00:00 2001 From: bruno_messias Date: Thu, 24 Aug 2017 12:20:00 -0300 Subject: [PATCH 1/2] Allows downloading through a list of items --- README | 59 +++++++++++++++++++++++++++++++++++++-- README.md | 31 +++++++++++++++++--- scihub2pdf/__init__.py | 2 +- scihub2pdf/bin/scihub2pdf | 38 +++++++++++++++++++------ scihub2pdf/scihub.py | 7 ++--- setup.py | 4 +-- 6 files changed, 119 insertions(+), 22 deletions(-) diff --git a/README b/README index 7511ab8..d4cdf73 100755 --- a/README +++ b/README @@ -1,5 +1,5 @@ -SciHub to PDF -============= +SciHub to PDF(Beta) +=================== Description ----------- @@ -36,7 +36,14 @@ Given a title... :: - $ sci2bib --title An useful paper + $ scihub2pdf --title An useful paper + +Arxiv... + +:: + + $ scihub2pdf arxiv:0901.2686 + $ scihub2pdf --title arxiv:Periodic table for topological insulators Location folder as argument @@ -64,3 +71,49 @@ Libgen - No CAPTCHA - Slow +Download from list of items +--------------------------- + +Given a text file like + +:: + + 10.1038/s41524-017-0032-0 + 10.1063/1.3149495 + ..... + +download all pdf's + +:: + + $ scihub2pdf -i dois.txt --txt + +Given a text file like + +:: + + Some Title 1 + Some Title 2 + ..... + +download all pdf's + +:: + + $ scihub2pdf -i titles.txt --txt --title + +Given a text file like + +:: + + arXiv:1708.06891 + arXiv:1708.06071 + arXiv:1708.05948 + ..... + +download all pdf's + +:: + + $ scihub2pdf -i arxiv_ids.txt --txt + diff --git a/README.md b/README.md index c54c75f..796f8c4 100755 --- a/README.md +++ b/README.md @@ -61,16 +61,39 @@ $ scihub2pdf -i input.bib --uselibgen - No CAPTCHA - Slow -## Using bibcure modules +## Download from list of items Given a text file like + ``` +10.1038/s41524-017-0032-0 10.1063/1.3149495 -10.7717/peerj.3714 ..... ``` download all pdf's ``` -$ doi2bib -i input_dois.txt > refs.bib -$ scihub2pdf -i refs.bib +$ scihub2pdf -i dois.txt --txt +``` +Given a text file like + +``` +Some Title 1 +Some Title 2 +..... +``` +download all pdf's +``` +$ scihub2pdf -i titles.txt --txt --title +``` +Given a text file like + +``` +arXiv:1708.06891 +arXiv:1708.06071 +arXiv:1708.05948 +..... +``` +download all pdf's +``` +$ scihub2pdf -i arxiv_ids.txt --txt ``` diff --git a/scihub2pdf/__init__.py b/scihub2pdf/__init__.py index 15e7287..8b34414 100755 --- a/scihub2pdf/__init__.py +++ b/scihub2pdf/__init__.py @@ -1,2 +1,2 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/scihub2pdf/bin/scihub2pdf b/scihub2pdf/bin/scihub2pdf index 39593a9..2e4b2bc 100755 --- a/scihub2pdf/bin/scihub2pdf +++ b/scihub2pdf/bin/scihub2pdf @@ -8,7 +8,7 @@ import textwrap # from sci2pdf.libgen import download_from_title from scihub2pdf.scihub import download_pdf_from_bibs, download_from_doi from scihub2pdf.scihub import download_from_title, download_from_arxiv - +import re def main(): @@ -48,6 +48,7 @@ def main(): ) parser.add_argument( "--input", "-i", + dest="inputfile", type=argparse.FileType("r"), help="bibtex input file" ) @@ -67,13 +68,20 @@ def main(): "--location", "-l", help="folder, ex: -l 'folder/'" ) + parser.add_argument( + "--txt", + action="store_true", + help="Just create a file with DOI's or titles" + ) parser.set_defaults(title=False) parser.set_defaults(uselibgen=False) + parser.set_defaults(txt=False) parser.set_defaults(location="") args = parser.parse_known_args() title_search = args[0].title + is_txt = args[0].txt use_libgen = args[0].uselibgen inline_search = len(args[1]) > 0 location = args[0].location @@ -85,7 +93,7 @@ def main(): if inline_search: value = " ".join(args[1]) - is_arxiv = value.startswith("arxiv:") + is_arxiv = bool(re.match("arxiv:", value, re.I)) if is_arxiv: field = "ti" if title_search else "id" download_from_arxiv(value, field, location) @@ -94,13 +102,27 @@ def main(): else: download_from_doi(value, location, use_libgen) else: - bibtex = bibtexparser.loads(args[0].input.read()) - bibs = bibtex.entries - if len(bibs) == 0: - print("Input File is empty or corrupted.") - sys.exit(1) + if is_txt: + file_values = args[0].inputfile.read() + for value in file_values.split("\n"): + is_arxiv = bool(re.match("arxiv:", value, re.I)) + if value != "": + if is_arxiv: + field = "ti" if title_search else "id" + download_from_arxiv(value, field, location) + elif title_search: + download_from_title(value, location, use_libgen) + else: + download_from_doi(value, location, use_libgen) + + else: + bibtex = bibtexparser.loads(args[0].inputfile.read()) + bibs = bibtex.entries + if len(bibs) == 0: + print("Input File is empty or corrupted.") + sys.exit(1) - download_pdf_from_bibs(bibs, location, use_libgen) + download_pdf_from_bibs(bibs, location, use_libgen) if __name__ == "__main__": diff --git a/scihub2pdf/scihub.py b/scihub2pdf/scihub.py index a03a6b7..d94cddb 100755 --- a/scihub2pdf/scihub.py +++ b/scihub2pdf/scihub.py @@ -214,8 +214,8 @@ def download_from_title(title, location="", use_libgen=False): def download_from_arxiv(value, field="id", location=""): - - value = re.sub("arxiv\:", "", value) + print("Downloading...", value) + value = re.sub("arxiv\:", "", value, flags=re.I) found, pdf_link = get_arxiv_pdf_link(value, field) if found and pdf_link is not None: bib = {} @@ -226,5 +226,4 @@ def download_from_arxiv(value, field="id", location=""): s = requests.Session() download_pdf(bib, s) else: - print("Arxiv not found.") - + print(value, ": Arxiv not found.") diff --git a/setup.py b/setup.py index 6121b5f..798f02f 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="scihub2pdf", - version="0.2.0", + version="0.3.0", packages = find_packages(exclude=["build",]), scripts=["scihub2pdf/bin/scihub2pdf"], long_description = README_TEXT, @@ -22,7 +22,7 @@ description="Downloads pdfs via a DOI number(or arxivId), article title or a bibtex file, sci-hub", author="Bruno Messias", author_email="messias.physics@gmail.com", - download_url="https://github.com/bibcure/scihub2pdf/archive/0.2.0.tar.gz", + download_url="https://github.com/bibcure/scihub2pdf/archive/0.3.0.tar.gz", keywords=["bibtex", "sci-hub", "libgen", "doi", "science","scientific-journals"], classifiers=[ From 2028e5a3736eda49d2c0e458454c1e7b41fd26ea Mon Sep 17 00:00:00 2001 From: bruno_messias Date: Thu, 24 Aug 2017 12:29:06 -0300 Subject: [PATCH 2/2] Minor changes. --- scihub2pdf/bin/scihub2pdf | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/scihub2pdf/bin/scihub2pdf b/scihub2pdf/bin/scihub2pdf index 2e4b2bc..d7e8244 100755 --- a/scihub2pdf/bin/scihub2pdf +++ b/scihub2pdf/bin/scihub2pdf @@ -39,6 +39,42 @@ def main(): $ scihub2pdf --title arxiv:Periodic table for topological insulators + ## Download from list of items + + Given a text file like + + ``` + 10.1038/s41524-017-0032-0 + 10.1063/1.3149495 + ..... + ``` + download all pdf's + ``` + $ scihub2pdf -i dois.txt --txt + ``` + Given a text file like + + ``` + Some Title 1 + Some Title 2 + ..... + ``` + download all pdf's + ``` + $ scihub2pdf -i titles.txt --txt --title + ``` + Given a text file like + + ``` + arXiv:1708.06891 + arXiv:1708.06071 + arXiv:1708.05948 + ..... + ``` + download all pdf's + ``` + $ scihub2pdf -i arxiv_ids.txt --txt + ``` ----------------------------------------------------- @author: Bruno Messias @email: messias.physics@gmail.com