spring library

2021-04-08 20:52:17 -07:00
parent 6e92164bc3
commit 980e493cf7
5 changed files with 95 additions and 57 deletions
--- a/7
+++ b/7
@@ -33,11 +33,12 @@ fi
 if [ -z "$1" ]; then
    #FZF_DEFAULT_COMMAND=rg -i --files --glob "!.git/*"
-    fzf --delimiter : --preview 'less {1}' \
+    # fzf --delimiter : --preview 'less {1}' \
    fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \
        --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" 
 else
-
+    # rg $1 | fzf --delimiter : --preview 'less {1}' \
-    rg $1 | fzf --delimiter : --preview 'less {1}' \
+    rg $1 | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \
        --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" 
 fi
--- a/pdf2bib.sh
+++ b/pdf2bib.sh
@@ -31,23 +31,24 @@ echo "using $bibdFileOut"
 #try to extract doi from pdf and retrieve a pubmed id
 #for 'DOI:' syntax
-# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi:(.+)#\1#")
+# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi:(.+)|\1|")
-doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.*doi:? ?/?(10.+)#\1#")
+# search for doi string between first page last page 10
 doi=$(pdftotext -q -f 1 -l 10 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.*doi:? ?/?(10.+)|\1|")
 #for 'https://doi.org' syntax
 if [ -z "$doi" ]; then
-  doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org/(10.+)#\1#")
+  doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org/(10.+)|\1|")
 fi
 # for 'https://doi.org' syntax
 # if [ -z "$doi" ]; then
-  # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org\/(.+)#\1#")
+  # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org\/(.+)|\1|")
 # fi
 # 
 # if [ -z "$doi" ]; then
-# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi ?(.+)#\1#")
+# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi ?(.+)|\1|")
 # fi
 if [ -z "$doi" ]; then
@@ -57,7 +58,7 @@ fi
 ## TODO: dedupe this with sdoi.sh
-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
+uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
 if [ -z "$uid" ]; then
  echo "pubmed id not found"
@@ -70,13 +71,13 @@ xsltproc --novalid $styleSheet $uid.xml > $uid.bib
 #extract some strings to make a nice filename for the pdf
 key="LastName"; 
-author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+author=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
 key="MedlineTA"; 
-journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+journal=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
 key1="PubDate"; 
-key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")
 fn2=${author}_${journal}$year-$uid.pdf
--- a/pubmed2bibtex.xsl
+++ b/pubmed2bibtex.xsl
@@ -34,12 +34,12 @@
 <xsl:value-of select="MedlineCitation/Article/Journal/JournalIssue/PubDate/Year"/>
 <!-- <xsl:text>_</xsl:text><xsl:value-of select="MedlineCitation/PMID" /> -->
 <xsl:apply-templates select="MedlineCitation/Article"/>
-<xsl:apply-templates select="MedlineCitation/KeywordList"/>
+<!-- <xsl:apply-templates select="MedlineCitation/KeywordList"/> -->
-<!-- <xsl:apply-templates select="MedlineCitation/MeshHeadingList"/> -->
+<xsl:apply-templates select="MedlineCitation/MeshHeadingList"/>
 <xsl:apply-templates select="PubmedData/ArticleIdList/ArticleId"/>
 <!-- url = {https://www.ncbi.nlm.nih.gov/pubmed/</xsl:text><xsl:value-of select="MedlineCitation/PMID"/><xsl:text>}, -->
 <xsl:text>,
-  url = {https://www.ncbi.nlm.nih.gov/pubmed/</xsl:text><xsl:value-of select="MedlineCitation/PMID"/><xsl:text>},
+  url = {}</xsl:text>
  file = {}</xsl:text>
 <xsl:if test="string-length(MedlineCitation/MedlineJournalInfo/NlmUniqueID) > 0"><xsl:text>,
  nlmuniqueid = {</xsl:text><xsl:value-of select="MedlineCitation/MedlineJournalInfo/NlmUniqueID" /><xsl:text>}</xsl:text></xsl:if>
 <xsl:text>
@@ -108,7 +108,7 @@
 <xsl:apply-templates select="ForeName"/>
 </xsl:template>
-
+<!--
 <xsl:template match="KeywordList">
 <xsl:text>,
  keywords = {</xsl:text>
@@ -120,16 +120,18 @@
 </xsl:for-each>
 <xsl:text>}</xsl:text>
 </xsl:template>
 -->
 <!--
 <xsl:template match="MeshHeadingList">
 <xsl:text>,
-  mesh = {</xsl:text>
+  keywords = {</xsl:text>
-<xsl:for-each select="MeshHeading">
+<xsl:for-each select="/PubmedArticleSet/PubmedArticle/MedlineCitation/KeywordList/Keyword">
 <xsl:value-of select="."/><xsl:text>; </xsl:text>
 </xsl:for-each>
 <xsl:for-each select="/PubmedArticleSet/PubmedArticle/MedlineCitation/MeshHeadingList/MeshHeading">
 <xsl:value-of select="DescriptorName"/><xsl:text>; </xsl:text>
 </xsl:for-each>
 <xsl:text>}</xsl:text>
 </xsl:template>
 -->
 </xsl:stylesheet>
--- a/6
+++ b/6
@@ -12,8 +12,11 @@ if [ "$1" == "-h" ] ; then
         depends: 
          fzf
          git grep
          sed tail
          bat or less
          pandoc
          pandoc-citeproc
          echo
          wl-copy
         defaults:
@@ -42,7 +45,8 @@ fi
 # str=$(cat $fn | fzf)
 # str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'nl {1} --body-numbering=a' --preview-window=:up:70%:+{2}-5)
-str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5)
+# str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5)
 str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {1}' --preview-window=:up:70%:+{2}-5)
 # extract citation key from the fzf string
--- a/sdoi.sh
+++ b/sdoi.sh
@@ -27,57 +27,87 @@ fn=$2
 set -e #exit if an error
-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
+uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
 tmpBib=$(mktemp -p --suffix=.bib)
 if [ -z "$uid" ]; then
-  echo "pubmed id not found"
+  fetchBib_doiDotOrg
-  exit 1
+else
  fetchBib_pubmed
 fi
-#request pubmed xml and transform into bibtex
+if [ -s "$tmpBib" ]; then
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
+  import_bib
 xsltproc --novalid $styleSheet $uid.xml > $uid.bib
 #decide whether to process and move an associated pdf or just exit
 if [ -z "$fn" ]; then
  if [[ -z $(rg $uid $bibdFileOut) ]]; then
    #import bibtex
    echo "importing $uid.bib"
    cat $uid.bib >> $bibdFileOut
  else
    echo "$uid already found in $bibdFileOut, exiting"
  fi
  #clean up
  rm $uid.xml $uid.bib
  exit 1
 else
  echo "sorry, doi not found.."
  clean_up
 fi
 function import_bib {
  #decide whether to process and move an associated pdf or just exit
  if [ -z "$fn" ]; then
    append_bibfile
    clean_up
  else
    extract_name
    append_pdf
    append_bibfile
    clean_up
  fi
 }
 function fetchBib_pubmed {
  #request pubmed xml and transform into bibtex
  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
  xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
 }
 function fetchBib_doiDotOrg {
  echo "pubmed id not found, trying doi.org.."
  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
  echo -e "\n" >> $tmpBib
 }
 function extract_name {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
  key="MedlineTA"; 
-  journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
  key1="PubDate"; 
-  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")
 }
 function append_bibfile {
  #import bibtex
  #first grep for a uid (doi) in case its already in db
  if [[ -z $(rg $doi $bibdFileOut) ]]; then
    echo "importing $tmpBib"
    cat $tmpBib >> $bibdFileOut
  else
    echo "$doi already found in $bibdFileOut, exiting"
  fi
 }
 function append_pdf {
  fn2=${author}_${journal}$year-$uid.pdf
  #move pdf file to papers repository, add file name to bibtex file field
  mv $fn $pdfPathOut/$fn2
  echo "moved to $pdfPathOut/$fn2"
-  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib
+  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
 }
  if [[ -z $(rg $uid $bibdFileOut) ]]; then
    #import bibtex
    echo "importing $uid.bib"
    cat $uid.bib >> $bibdFileOut
  else
    echo "$uid already found in $bibdFileOut, exiting"
  fi
 function clean_up {
  #clean up
-  rm $uid.xml $uid.bib
+  rm -f $tmpBib $tmpBib.xml
-fi
+  exit 1
 }