Initial Commit
This commit is contained in:
378
translators/Die Zeit.js
Normal file
378
translators/Die Zeit.js
Normal file
@@ -0,0 +1,378 @@
|
||||
{
|
||||
"translatorID": "312bbb0e-bfb6-4563-a33c-085445d391ed",
|
||||
"label": "Die Zeit",
|
||||
"creator": "Philipp Zumstein",
|
||||
"target": "^https?://www\\.zeit\\.de/",
|
||||
"minVersion": "3.0",
|
||||
"maxVersion": "",
|
||||
"priority": 100,
|
||||
"inRepository": true,
|
||||
"translatorType": 4,
|
||||
"browserSupport": "gcsibv",
|
||||
"lastUpdated": "2018-11-25 19:35:23"
|
||||
}
|
||||
|
||||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2015 Philipp Zumstein
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
/*
|
||||
This translator works only partially, because zeit.de uses some strange javascript that makes
|
||||
processDocuments return an error. If I just call scrape(doc, url) on a single document, it works.
|
||||
The way the translator is programmed now, it only works for multiples if JavaScript is turned off in the browser.
|
||||
For example at
|
||||
http://www.zeit.de/suche/index?q=Krise
|
||||
only the first reference can be scraped.
|
||||
*/
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
var pageType = doc.body.getAttribute("data-page-type");
|
||||
if (pageType == "article") {
|
||||
return "newspaperArticle";
|
||||
} else if (getSearchResults(doc, true)){ //ZU.xpath(doc, '//h4/a|//h2/a').length>0
|
||||
return "multiple";
|
||||
}
|
||||
}
|
||||
|
||||
function getSearchResults(doc, checkOnly) {
|
||||
var items = {};
|
||||
var found = false;
|
||||
var rows = doc.getElementsByClassName('archiveteaser');
|
||||
if (rows.length === 0) {
|
||||
rows = doc.getElementsByClassName('teaser-small__container');
|
||||
}
|
||||
for (var i=0; i<rows.length; i++) {
|
||||
var href = ZU.xpathText(rows[i], '(.//a/@href)[1]');
|
||||
var title = ZU.trimInternal( ZU.xpathText(rows[i], './/a/h4|.//a[span]') );
|
||||
if (!href || !title) continue;
|
||||
if (checkOnly) return true;
|
||||
found = true;
|
||||
items[href] = title;
|
||||
}
|
||||
return found ? items : false;
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
Zotero.selectItems(getSearchResults(doc, false), function (items) {
|
||||
if (!items) {
|
||||
return true;
|
||||
}
|
||||
var articles = [];
|
||||
for (var i in items) {
|
||||
articles.push(i);
|
||||
}
|
||||
ZU.processDocuments(articles, scrape);
|
||||
});
|
||||
} else {
|
||||
scrape(doc, url);
|
||||
}
|
||||
}
|
||||
|
||||
function scrape(doc, url) {
|
||||
var data = ZU.xpath(doc, '//script[@type="application/ld+json"]');
|
||||
let json;
|
||||
let found = false;
|
||||
for (let dataItem of data) {
|
||||
json = JSON.parse(dataItem.textContent);
|
||||
if (json["@type"] == "Article") {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
return;
|
||||
}
|
||||
// Z.debug(json);
|
||||
|
||||
var newItem = new Zotero.Item("newspaperArticle");
|
||||
newItem.url = url;
|
||||
newItem.title = json.headline;
|
||||
newItem.abstractNote = json.description;
|
||||
if (json.datePublished) {
|
||||
newItem.date = json.datePublished.replace(/T.+/, "");
|
||||
}
|
||||
if (json.author) {
|
||||
if (!Array.isArray(json.author)) {
|
||||
json.author = [json.author];
|
||||
}
|
||||
for (let author of json.author) {
|
||||
if (author.name) {
|
||||
newItem.creators.push(ZU.cleanAuthor(author.name, "author"));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var authorNode = ZU.xpath(doc, '//div[@class="byline"]');
|
||||
for (let i=0; i<authorNode.length; i++) {
|
||||
var authorName = authorNode[i].textContent;
|
||||
if (authorName){
|
||||
authorName = authorName.replace(/^\s*Von/, '');
|
||||
newItem.creators.push(ZU.cleanAuthor(authorName, "author"));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (let i=0; i<newItem.creators.length; i++) {
|
||||
if (newItem.creators[i].firstName === "") {
|
||||
newItem.creators[i].fieldMode = 1;
|
||||
delete newItem.creators[i].firstName;
|
||||
}
|
||||
}
|
||||
|
||||
var section = doc.getElementsByClassName("nav__ressorts-link--current");
|
||||
if (section.length > 0) {
|
||||
newItem.section = section[0].textContent;
|
||||
}
|
||||
|
||||
newItem.publicationTitle = "Die Zeit";
|
||||
newItem.ISSN = "0044-2070";
|
||||
newItem.language = "de-DE";
|
||||
newItem.place = "Hamburg";
|
||||
|
||||
var keywordsString = json.keywords;
|
||||
var keywords = keywordsString.split(',');
|
||||
for (let i=0; i<keywords.length; i++) {
|
||||
newItem.tags.push(
|
||||
keywords[i].trim()
|
||||
);
|
||||
}
|
||||
|
||||
// if present, use the link to show the whole content on a single page
|
||||
var snapshotNode = ZU.xpath(doc, '//li[@class="article-pager__all"]/a');
|
||||
var snapshotUrl = (snapshotNode.length > 0) ? snapshotNode[0].href : url;
|
||||
newItem.attachments.push({
|
||||
url : snapshotUrl,
|
||||
title : "Snapshot",
|
||||
mimeType : "text/html"
|
||||
});
|
||||
newItem.complete();
|
||||
}
|
||||
/** BEGIN TEST CASES **/
|
||||
var testCases = [
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://www.zeit.de/politik/ausland/2011-09/libyen-bani-walid",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "newspaperArticle",
|
||||
"title": "Libyen: Rebellen bereiten Angriff auf Bani Walid vor",
|
||||
"creators": [
|
||||
{
|
||||
"lastName": "AFP",
|
||||
"creatorType": "author",
|
||||
"fieldMode": 1
|
||||
},
|
||||
{
|
||||
"lastName": "dpa",
|
||||
"creatorType": "author",
|
||||
"fieldMode": 1
|
||||
}
|
||||
],
|
||||
"date": "2011-09-04",
|
||||
"ISSN": "0044-2070",
|
||||
"abstractNote": "Die von Gadhafi-Anhängern geführte Stadt ist von Rebellentruppen eingekreist. Gespräche über eine friedliche Übergabe sind gescheitert, ein Angriff steht offenbar bevor.",
|
||||
"language": "de-DE",
|
||||
"libraryCatalog": "Die Zeit",
|
||||
"place": "Hamburg",
|
||||
"publicationTitle": "Die Zeit",
|
||||
"section": "Politik",
|
||||
"shortTitle": "Libyen",
|
||||
"url": "https://www.zeit.de/politik/ausland/2011-09/libyen-bani-walid",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "Libyen"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://www.zeit.de/2011/36/Interview-Lahm-Rinke",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "newspaperArticle",
|
||||
"title": "Philipp Lahm: \"Hast du elf Freunde?\"",
|
||||
"creators": [
|
||||
{
|
||||
"firstName": "Moritz",
|
||||
"lastName": "Müller-Wirth",
|
||||
"creatorType": "author"
|
||||
}
|
||||
],
|
||||
"date": "2011-09-01",
|
||||
"ISSN": "0044-2070",
|
||||
"abstractNote": "Tschechow und Robben, Drama im Flutlicht und Wahrhaftigkeit bei der Arbeit. Der Fußballprofi und Autor Philipp Lahm im Gespräch mit dem Schriftsteller und Fußballer Moritz Rinke",
|
||||
"language": "de-DE",
|
||||
"libraryCatalog": "Die Zeit",
|
||||
"place": "Hamburg",
|
||||
"publicationTitle": "Die Zeit",
|
||||
"section": "Sport",
|
||||
"shortTitle": "Philipp Lahm",
|
||||
"url": "https://www.zeit.de/2011/36/Interview-Lahm-Rinke",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "Andreas Ottl"
|
||||
},
|
||||
{
|
||||
"tag": "Aristoteles"
|
||||
},
|
||||
{
|
||||
"tag": "Berlin"
|
||||
},
|
||||
{
|
||||
"tag": "Bielefeld"
|
||||
},
|
||||
{
|
||||
"tag": "FC Bayern München"
|
||||
},
|
||||
{
|
||||
"tag": "Fußball"
|
||||
},
|
||||
{
|
||||
"tag": "Hertha BSC"
|
||||
},
|
||||
{
|
||||
"tag": "Joachim Löw"
|
||||
},
|
||||
{
|
||||
"tag": "Max Frisch"
|
||||
},
|
||||
{
|
||||
"tag": "Philipp Lahm"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "http://www.zeit.de/suche/index?q=Krise",
|
||||
"items": "multiple"
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://www.zeit.de/2009/11/A-Drinnen",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "newspaperArticle",
|
||||
"title": "DRINNEN: Mixen aus Prinzip",
|
||||
"creators": [
|
||||
{
|
||||
"firstName": "Ernst",
|
||||
"lastName": "Schmiederer",
|
||||
"creatorType": "author"
|
||||
}
|
||||
],
|
||||
"date": "2009-03-05",
|
||||
"ISSN": "0044-2070",
|
||||
"abstractNote": "Ein Iraner in Wien. Der Fotograf Daniel Shaked, 31, gibt Österreichs einziges Hip-Hop-Magazin heraus",
|
||||
"language": "de-DE",
|
||||
"libraryCatalog": "Die Zeit",
|
||||
"place": "Hamburg",
|
||||
"publicationTitle": "Die Zeit",
|
||||
"section": "Politik",
|
||||
"shortTitle": "DRINNEN",
|
||||
"url": "https://www.zeit.de/2009/11/A-Drinnen",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "Band"
|
||||
},
|
||||
{
|
||||
"tag": "Eltern"
|
||||
},
|
||||
{
|
||||
"tag": "Familie"
|
||||
},
|
||||
{
|
||||
"tag": "Geschwister"
|
||||
},
|
||||
{
|
||||
"tag": "Hans Krankl"
|
||||
},
|
||||
{
|
||||
"tag": "Hip-Hop"
|
||||
},
|
||||
{
|
||||
"tag": "Iran"
|
||||
},
|
||||
{
|
||||
"tag": "Israel"
|
||||
},
|
||||
{
|
||||
"tag": "Musik"
|
||||
},
|
||||
{
|
||||
"tag": "Offenheit"
|
||||
},
|
||||
{
|
||||
"tag": "Reise"
|
||||
},
|
||||
{
|
||||
"tag": "Revolution"
|
||||
},
|
||||
{
|
||||
"tag": "Salzburg"
|
||||
},
|
||||
{
|
||||
"tag": "Teheran"
|
||||
},
|
||||
{
|
||||
"tag": "Wien"
|
||||
},
|
||||
{
|
||||
"tag": "Österreich"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "http://www.zeit.de/2009/11/index",
|
||||
"items": "multiple"
|
||||
}
|
||||
]
|
||||
/** END TEST CASES **/
|
||||
Reference in New Issue
Block a user