Files
linux-bin/sdoi.sh

116 lines
2.8 KiB
Bash
Raw Normal View History

2019-02-05 19:30:40 -08:00
#!/bin/bash
2020-02-13 12:11:29 -08:00
if [ "$1" == "-h" ] ; then
echo "
sdoi - search for doi guid on pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.
usage:
sdoi.sh 'doi'
sdoi.sh 'doi' download.pdf
depends:
xsltproc - xml processor, from GNOME project
pubmed2bibtex.xsl - xml processor stylesheet
defaults:
Set the three required default file locations (xsl file, bib file, pdf directory)
"
exit 0
fi
2019-02-05 19:30:40 -08:00
2021-05-04 19:23:54 -07:00
set -e #exit if an error
# set -v -x -e #debugging
2019-02-05 19:30:40 -08:00
#Setup defaults
2021-05-04 19:23:54 -07:00
doi=$1
fn=$2
2020-02-13 12:11:29 -08:00
styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
2019-02-05 19:30:40 -08:00
relPath=$(basename $pdfPathOut)
2021-05-04 19:23:54 -07:00
#define functions
import_bib() {
2021-04-08 20:52:17 -07:00
#decide whether to process and move an associated pdf or just exit
if [ -z "$fn" ]; then
append_bibfile
clean_up
else
extract_name
append_pdf
append_bibfile
clean_up
fi
}
2021-05-04 19:23:54 -07:00
fetchBib_pubmed() {
2021-04-08 20:52:17 -07:00
#request pubmed xml and transform into bibtex
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
}
2021-05-04 19:23:54 -07:00
fetchBib_doiDotOrg() {
2021-04-08 20:52:17 -07:00
echo "pubmed id not found, trying doi.org.."
curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
echo -e "\n" >> $tmpBib
}
2021-05-04 19:23:54 -07:00
extract_name() {
2019-02-05 19:30:40 -08:00
#extract some strings to make a nice filename for the pdf
key="LastName";
2021-04-08 20:52:17 -07:00
author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
2019-02-05 19:30:40 -08:00
key="MedlineTA";
2021-04-08 20:52:17 -07:00
journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
2019-02-05 19:30:40 -08:00
key1="PubDate";
2021-04-08 20:52:17 -07:00
key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")
2019-02-05 19:30:40 -08:00
2021-04-08 20:52:17 -07:00
}
2021-05-04 19:23:54 -07:00
append_bibfile() {
2021-04-08 20:52:17 -07:00
#import bibtex
#first grep for a uid (doi) in case its already in db
if [[ -z $(rg $doi $bibdFileOut) ]]; then
echo "importing $tmpBib"
cat $tmpBib >> $bibdFileOut
else
echo "$doi already found in $bibdFileOut, exiting"
fi
}
2021-05-04 19:23:54 -07:00
append_pdf() {
2019-02-05 19:30:40 -08:00
fn2=${author}_${journal}$year-$uid.pdf
2021-05-04 19:23:54 -07:00
#move pdf file to papers repository, add file name to bibtex url field
2019-02-05 19:30:40 -08:00
mv $fn $pdfPathOut/$fn2
echo "moved to $pdfPathOut/$fn2"
2021-05-04 19:23:54 -07:00
sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
2021-04-08 20:52:17 -07:00
}
2019-02-05 19:30:40 -08:00
2021-05-04 19:23:54 -07:00
clean_up() {
2019-02-05 19:30:40 -08:00
#clean up
2021-04-08 20:52:17 -07:00
rm -f $tmpBib $tmpBib.xml
exit 1
}
2021-05-03 21:03:40 -07:00
2021-05-04 19:23:54 -07:00
#main
2021-05-03 21:03:40 -07:00
uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
tmpBib=$(mktemp -p ./ --suffix=.bib)
if [ -z "$uid" ]; then
fetchBib_doiDotOrg
else
fetchBib_pubmed
fi
if [ -s "$tmpBib" ]; then
import_bib
else
echo "sorry, doi not found.."
clean_up
fi