Google Scholar Fetch error


(Mario Fiorini) #1

Hi there,
since today I no longer manage to fetch citations from Google Scholar. I have read about the possible limitations imposed by Google but I was wondering whether you could tell me if this is the problem or if it is something else. I copy and paste the error log.
thanks,
Mario

12:54:10.990 [AWT-EventQueue-0] INFO  net.sf.jabref.logic.importer.OpenDatabase - Opening: /Users/102666/Dropbox/software/Tex/localtex/bibtex/bib/XBib.bib
12:54:56.410 [JabRef CachedThreadPool] WARN  net.sf.jabref.logic.net.URLDownload - Could not copy input
java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/IndexRedirect?continue=https://scholar.google.com/scholar%3Fhl%3Den%26oe%3DASCII%26num%3D20%26as_sdt%3D2006&hl=en&q=CGMSBIoZq4wY_-GsvwUiGQDxp4NLuiTwfwm7TsuXuA_u780W5B6Kl8g
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1840) ~[?:1.8.0_101]
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441) ~[?:1.8.0_101]
	at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:254) ~[?:1.8.0_101]
	at net.sf.jabref.logic.net.URLDownload.downloadToString(URLDownload.java:123) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.runConfig(GoogleScholarFetcher.java:166) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.processQueryGetPreview(GoogleScholarFetcher.java:82) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GeneralFetcher.lambda$actionPerformed$4(GeneralFetcher.java:191) ~[JabRef-3.6.jar:?]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_101]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_101]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_101]
12:54:56.413 [JabRef CachedThreadPool] WARN  net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher - Error fetching from Google Scholar
java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/IndexRedirect?continue=https://scholar.google.com/scholar%3Fhl%3Den%26oe%3DASCII%26num%3D20%26as_sdt%3D2006&hl=en&q=CGMSBIoZq4wY_-GsvwUiGQDxp4NLuiTwfwm7TsuXuA_u780W5B6Kl8g
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1840) ~[?:1.8.0_101]
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441) ~[?:1.8.0_101]
	at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:254) ~[?:1.8.0_101]
	at net.sf.jabref.logic.net.URLDownload.downloadToString(URLDownload.java:123) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.runConfig(GoogleScholarFetcher.java:166) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.processQueryGetPreview(GoogleScholarFetcher.java:82) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GeneralFetcher.lambda$actionPerformed$4(GeneralFetcher.java:191) ~[JabRef-3.6.jar:?]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_101]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_101]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_101]
12:55:34.061 [JabRef CachedThreadPool] WARN  net.sf.jabref.logic.net.URLDownload - Could not copy input
java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/IndexRedirect?continue=https://scholar.google.com/scholar%3Fhl%3Den%26oe%3DASCII%26num%3D20%26as_sdt%3D2006&hl=en&q=CGMSBIoZq4wYpeKsvwUiGQDxp4NL7mM-14-E6qsZ4vDi-_KsLUXKQjE
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1840) ~[?:1.8.0_101]
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441) ~[?:1.8.0_101]
	at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:254) ~[?:1.8.0_101]
	at net.sf.jabref.logic.net.URLDownload.downloadToString(URLDownload.java:123) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.runConfig(GoogleScholarFetcher.java:166) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.processQueryGetPreview(GoogleScholarFetcher.java:82) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GeneralFetcher.lambda$actionPerformed$4(GeneralFetcher.java:191) ~[JabRef-3.6.jar:?]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_101]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_101]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_101]
12:55:34.061 [JabRef CachedThreadPool] WARN  net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher - Error fetching from Google Scholar
java.io.IOException: Server returned HTTP response code: 503 for URL: https://ipv4.google.com/sorry/IndexRedirect?continue=https://scholar.google.com/scholar%3Fhl%3Den%26oe%3DASCII%26num%3D20%26as_sdt%3D2006&hl=en&q=CGMSBIoZq4wYpeKsvwUiGQDxp4NL7mM-14-E6qsZ4vDi-_KsLUXKQjE
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1840) ~[?:1.8.0_101]
	at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441) ~[?:1.8.0_101]
	at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:254) ~[?:1.8.0_101]
	at net.sf.jabref.logic.net.URLDownload.downloadToString(URLDownload.java:123) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.runConfig(GoogleScholarFetcher.java:166) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GoogleScholarFetcher.processQueryGetPreview(GoogleScholarFetcher.java:82) ~[JabRef-3.6.jar:?]
	at net.sf.jabref.gui.importer.fetcher.GeneralFetcher.lambda$actionPerformed$4(GeneralFetcher.java:191) ~[JabRef-3.6.jar:?]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_101]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_101]
	at java.lang.Thread.run(Thread.java:745) [?:1.8.0_101]

(Matthias Geiger ) #2

Thanks for your report!

We seem to have some trouble due to changes at the side of google scholar.

We try to fix it as soon as possible. For further information checkout the issue here at Github: https://github.com/JabRef/jabref/issues/1886


(Matthias Geiger ) #3

Short update here:
I tried to fixed the Google Scholar fetcher and - good news - it is working again in the development builds available at https://builds.jabref.org/master.

However, the number of downloadable results is now limited to only the first 10 Google hits. Trying to download more than those 10 hits would instantly block the usage of JabRef with your IP address.

We are working on a complete rewrite of the fetcher which will hopefully overcome this limitation.


Not work websearch
(Mario Fiorini) #4

Hi Matthias,
thanks for doing this. I can confirm that the fetcher works fine in the development build.
Will try to not use it too much to avoid hitting the limit!
Mario


(Cleinias) #5

I realize this is a rather stupid question, but could not find the answer in the docs. Put it simply:

How do I run the development snapshot?

This does not work:

java -jar NAME-OF-DEV-SNAPSHOT.jar

Thanks,


(Cleinias) #6

Never mind, solution found. It seems on Archlinux you need to use its own archinux-java-run command.

However, even in the latest snapshot, I am still unable to get Google Scholar fetching to work. Here is what I see in the console:
org.jabref.logic.importer.FetcherException: Error while fetching from Google Scholar
at org.jabref.logic.importer.fetcher.GoogleScholar.performSearch(GoogleScholar.java:153) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.gui.importer.fetcher.WebSearchPaneViewModel.lambda$search$2(WebSearchPaneViewModel.java:108) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]
Caused by: java.io.IOException: Server returned HTTP response code: 403 for URL: https://scholar.googleusercontent.com/scholar.bib?q=info:ysnV1lVTfRkJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAW65wV_64tNN9236gKVm2fVjKLuBg6s-&scisf=4&ct=citation&cd=0&hl=en
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) ~[?:1.8.0_181]
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) ~[?:1.8.0_181]
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) ~[?:1.8.0_181]
at java.lang.reflect.Constructor.newInstance(Constructor.java:423) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:1944) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection$10.run(HttpURLConnection.java:1939) ~[?:1.8.0_181]
at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection.getChainedException(HttpURLConnection.java:1938) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1508) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492) ~[?:1.8.0_181]
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:263) ~[?:1.8.0_181]
at org.jabref.logic.net.URLDownload.asString(URLDownload.java:203) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.net.URLDownload.asString(URLDownload.java:217) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.downloadEntry(GoogleScholar.java:170) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.addHitsFromQuery(GoogleScholar.java:164) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.performSearch(GoogleScholar.java:135) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
… 4 more
Caused by: java.io.IOException: Server returned HTTP response code: 403 for URL: https://scholar.googleusercontent.com/scholar.bib?q=info:ysnV1lVTfRkJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAW65wV_64tNN9236gKVm2fVjKLuBg6s-
&scisf=4&ct=citation&cd=0&hl=en
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1894) ~[?:1.8.0_181]
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492) ~[?:1.8.0_181]
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480) ~[?:1.8.0_181]
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getResponseCode(HttpsURLConnectionImpl.java:347) ~[?:1.8.0_181]
at org.jabref.logic.net.URLDownload.openConnection(URLDownload.java:319) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.net.URLDownload.asString(URLDownload.java:203) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.net.URLDownload.asString(URLDownload.java:217) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.downloadEntry(GoogleScholar.java:170) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.addHitsFromQuery(GoogleScholar.java:164) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
at org.jabref.logic.importer.fetcher.GoogleScholar.performSearch(GoogleScholar.java:135) ~[JabRef-5.0-dev–snapshot–2018-09-25–master–2f433d2a6.jar:?]
… 4 more