dedupe fixes

2021-10-19 00:34:23 -07:00
parent 2c2f3bdf77
commit 0790cf9c4a
3 changed files with 26 additions and 14 deletions
--- a/reveal.sh
+++ b/reveal.sh
@@ -29,6 +29,11 @@ if [[ ! -e $basefn ]]; then
    ln -s $fn $basefn
 fi
 #add markdown filename into html title tag
 fn2=$(echo $basefn | sed -E 's/[0-9]{2,4}-//g' | sed 's/\.md//') #prep filename
 sed -i -E "s|(<title>).+(<\/title>)|\1$fn2\2|" index.html #swap in document title
 #add markdown filename to reveal placeholder start file
 sed -i -E "s|(<section data-markdown=\")[A-Za-z0-9\.-]*(\" )|\1$basefn\2|" index.html
 npm start -- --port=$portNumber
--- a/sdoi.sh
+++ b/sdoi.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 if [ "$1" == "-h" ] ; then
    echo "
-        sdoi - search for doi guid on pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.
+    sdoi - search for a unique identifier (doi or pmid) on doi.org and/or pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.
         usage:
          sdoi.sh 'doi'
@@ -49,18 +49,19 @@ fetchBib_pubmed() {
 }
 fetchBib_doiDotOrg() {
  #request bibtex from doi.org
  echo "pubmed id not found, trying doi.org.."
-  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
+  curl -LH 'Accept: application/x-bibtex' "https//dx.doi.org/"$doi >> $tmpBib
  echo -e "\n" >> $tmpBib
 }
 extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
+  author=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
  key="MedlineTA"; 
-  journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
+  journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
  key="Year";
  year=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
@@ -79,24 +80,31 @@ append_bibfile() {
 append_pdf() {
  fn2=${author}_${journal}$year-$uid.pdf
  echo $fn2
  #move pdf file to papers repository, add file name to bibtex url field
  fn2=${author}_${journal}$year-$uid.pdf
  echo "moving $fn to $pdfPathOut/$fn2"
  mv $fn $pdfPathOut/$fn2
-  echo "moved to $pdfPathOut/$fn2"
+  #insert local path to pdf into the retrieved bibtex url field
  sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
 }
 clean_up() {
  #clean up
-  rm -f $tmpBib $tmpBib.xml
+  rm -f $tmpBib.bib $tmpBib.bib.xml
  exit 1
 }
-#main
+#main function
-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
+##test whether the given unique identifier (doi) is an actual doi, else assume its a pmid 
 if [[ -z $(echo $doi | grep "^10." -) ]]; then
  searchField="pmid"
 else
  searchField="doi"
 fi
 uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=$searchField&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
 tmpBib=$(mktemp -p ./ --suffix=.bib)
@@ -110,6 +118,5 @@ if [ -s "$tmpBib" ]; then
  import_bib
 else
  echo "sorry, doi not found.."
  clean_up
 fi
--- a/spmid.sh
+++ b/spmid.sh
@@ -45,17 +45,17 @@ fetchBib_pubmed() {
 fetchBib_doiDotOrg() {
  echo "pubmed id not found, trying doi.org.."
-  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
+  curl -LH 'Accept: application/x-bibtex' "https//dx.doi.org/"$doi >> $tmpBib
  echo -e "\n" >> $tmpBib
 }
 extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
+  author=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
  key="MedlineTA"; 
-  journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
+  journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
  key="Year";
  year=$(xmllint --xpath "string(//$key)" $tmpBib.xml)