Commit 3c891de1 authored by Kivou's avatar Kivou
Browse files

first try

parent d796dc61
biblio_*.html
stages:
- create
- deploy
get-manu-page:
stage: create
script:
- python ./create_html.py
artifacts:
paths:
- ./biblio_*.html
pages:
stage: deploy
script:
- mkdir public
- mv biblio*.html public/
artifacts:
paths:
- public
# 3sr-biblio
Create html pages for 3sr website bibliography
import requests
REQ_DOC_TYPE = "(docType_s:ART OR docType_s:OUV OR docType_s:COUV)"
BRIDGE_TYPES = {"ART": "Articles", "COMM": "Conferences", "COUV": "Books", "THESE": "These"}
SORT_BY = "producedDate_s" # "journalDate_s"
N_PUBLI_MAX = 100
def html_publi(publi):
# for k, v in publi.items():
# print(k, v)
# get names and urls
try:
namesLinked = []
searchUrl = "https://hal.archives-ouvertes.fr/search/index/q/*"
for _ in publi["authFullNameIdHal_fs"]:
name, idHal = _.split("_FacetSep_")
searchType = "authIdHal_s" if idHal else "authFullName_s"
searchString = idHal if idHal else name.replace(" ", "+")
url = "{}/{}/{}".format(searchUrl, searchType, searchString)
namesLinked.append('<a href="{}" target="_blank">{}</a>'.format(url, name))
cite = [', '.join(namesLinked)]
except:
cite = [', '.join(publi["authLastNameFirstName_s"])]
# get title and url
try:
cite.append('<b>"<a href="{}" target="_blank">{}</a>"</b>'.format(publi["uri_s"], publi["title_s"][0]))
except:
# print("uri_s not found in: ", publi["title_s"][0])
pass
# get journal
try:
cite.append('<em>{}</em>'.format(publi["journalTitle_s"]))
except:
# print("journalTitle_s not found in: ", publi["title_s"][0])
pass
# get volume
try:
cite.append('Vol. {}'.format(publi["volume_s"]))
except:
# print("volume_s not found in: ", publi["title_s"][0])
pass
# get pages
try:
cite.append('pp. {}'.format(publi["page_s"]))
except:
# print("page_s not found in: ", publi["title_s"][0])
pass
# get year
try:
cite.append('{}'.format(publi["producedDateY_i"]))
# cite.append('{}'.format(publi["journalDate_s"].split("-")[0]))
except:
# print("releasedDateY_i not found in: ", publi["title_s"][0])
pass
# get doi
try:
cite.append('&lt;<a href="https://doi.org/{doi}">{doi}</a>&gt;'.format(doi=publi["doiId_s"]))
# cite.append('{}'.format(publi["journalDate_s"].split("-")[0]))
except:
# print("doiId_s not found in: ", publi["title_s"][0])
pass
return ', '.join(cite) + '.'
def get_biblio_structure(structure_name, structure_id, n_publi=100):
req = "http://api.archives-ouvertes.fr/search/?q=({r} AND authStructId_i:{s})&sort={so} desc&rows={n}&fl=*".format(s=structure_id, r=REQ_DOC_TYPE, n=N_PUBLI_MAX, so=SORT_BY)
publis = requests.get(req).json()
def write_from_publi(publis, n):
with open(f"biblio_{structure_name}-{n}.html", 'w') as f:
print(f"write {structure_name} {n}")
f.write('<html>\n')
f.write('\t<head>\n')
f.write('\t\t<meta charset="UTF-8">\n')
f.write('\t\t<style>\n')
f.write('\t\t\tbody {color: #4c5154; word-wrap: break-word; width: 650px;}\n')
f.write('\t\t\ta {color: #bf3a8d; text-decoration: none;}\n')
f.write('\t\t\ta:hover {text-decoration: underline;}\n')
f.write('\t\t</style>\n')
f.write('\t</head>\n')
f.write('\t<body>\n')
f.write('\t\t<div id="contenu-encadres"><div class="liste-chevron"><ul>\n')
for publi in publis["response"]["docs"][:n]:
f.write(f'\t\t\t<li>{html_publi(publi)}</li>\n')
f.write('\t\t</ul></div></div>\n')
f.write('\t</body>\n')
f.write('</html>')
write_from_publi(publis, 3)
write_from_publi(publis, 5)
write_from_publi(publis, 10)
write_from_publi(publis, 15)
write_from_publi(publis, 50)
write_from_publi(publis, 100)
def get_biblio_idhal(idhal, n_publi=100):
req = "http://api.archives-ouvertes.fr/search/?q=(authIdHal_s:{s})&sort={so} desc&rows={n}&fl=*".format(s=idhal, n=N_PUBLI_MAX, so=SORT_BY)
publis_by_type = dict()
api_error = dict()
publis = requests.get(req).json()
publis = publis["response"]["docs"]
req_author = 'https://api.archives-ouvertes.fr/ref/author/?q=idHal_s:{s}'.format(s=idhal)
idhal_info = requests.get(req_author).json()
for i, publi in enumerate(publis):
type = BRIDGE_TYPES.get(publi.get("docType_s")) if BRIDGE_TYPES.get(publi.get("docType_s")) is not None else publi.get("docType_s")
if type in publis_by_type:
publis_by_type[type].append(publi)
else:
publis_by_type[type] = [publi]
# return render(request, 'publi.html', context)
with open(f"biblio_{idhal}.html", 'w') as f:
print(f"write {idhal}")
f.write('<html>\n')
f.write('\t<head>\n')
f.write('\t\t<meta charset="UTF-8">\n')
f.write('\t\t<style>\n')
f.write('\t\t\tbody {color: #4c5154; word-wrap: break-word; width: 650px;}\n')
f.write('\t\t\ta {color: #bf3a8d; text-decoration: none;}\n')
f.write('\t\t\ta:hover {text-decoration: underline;}\n')
f.write('\t\t</style>\n')
f.write('\t</head>\n')
f.write('\t<body>\n')
f.write('\t\t<div id="contenu-encadres">\n')
for type, publis in publis_by_type.items():
f.write(f'\t\t<h2>{type}</h2>\n')
f.write('\t\t<div><ul>\n')
for publi in publis[:n_publi]:
f.write(f'\t\t\t<li>{html_publi(publi)}</li>\n')
f.write('\t\t</ul></div>\n')
f.write('\t\t</div>\n')
f.write('\t</body>\n')
f.write('</html>')
get_biblio_structure("geo", 545340)
get_biblio_structure("comhet", 545341)
get_biblio_structure("rv", 545342)
get_biblio_structure("labo", 706)
get_biblio_idhal("eroubin", n_publi=50)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment