/
wikidata_p301_inverse.py
113 lines (98 loc) · 2.94 KB
/
wikidata_p301_inverse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Remove bad P373 links
# Mike Peel 17-Jun-2019 v1 - start
from __future__ import unicode_literals
import pywikibot
import numpy as np
import time
import string
from pywikibot import pagegenerators
import urllib
from pibot_functions import *
maxnum = 1000
nummodified = 0
wikidata_site = pywikibot.Site("wikidata", "wikidata")
repo = wikidata_site.data_repository() # this is a DataSite object
commons = pywikibot.Site('commons', 'commons')
debug = 0
attempts = 0
count = 0
for option in range(0,2):
candidates = []
if option == 1:
reportpage = pywikibot.Page(repo, 'Wikidata:Database reports/Constraint violations/P301')
text = reportpage.get()
text = text.split('== "Inverse" violations ==')[1].split('== "Single value" violations ==')[0]
lines = text.splitlines()
for line in lines:
try:
qid = line.split('* [[')[1].split(']]')[0]
# print(qid)
candidates.append(qid)
except:
continue
else:
# if option == 2:
# This times out, so isn't currently running
# ... except we no longer have a choice but to try it.
query = 'SELECT ?item ?itemLabel ?should_link_via_P910_to ?should_link_via_P910_toLabel '\
'WHERE {'\
'?should_link_via_P910_to wdt:P301 ?item .'\
'FILTER NOT EXISTS { ?item wdt:P910 ?should_link_via_P910_to } .'\
'SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } .'\
'}'
# else:
# This query no longer works per T274982
# query = 'SELECT DISTINCT ?item ?itemLabel WHERE {'\
# '?statement wikibase:hasViolationForConstraint wds:P301-0EC7969D-436B-4365-B6B5-59454795403E .'\
# '?item ?p ?statement .'\
# 'FILTER( ?item NOT IN ( wd:Q4115189, wd:Q13406268, wd:Q15397819 ) ) .'\
# 'SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } .'\
# '}'
if debug:
query = query + " LIMIT 10"
print(query)
generator = pagegenerators.WikidataSPARQLPageGenerator(query, site=wikidata_site)
for page in generator:
# for pageid in candidates:
# page = pywikibot.ItemPage(repo, pageid)
try:
item_dict = page.get()
except:
continue
qid = page.title()
print("\nhttp://www.wikidata.org/wiki/" + qid)
# print(item_dict)
try:
p301 = item_dict['claims']['P301']
except:
print('No P301')
continue
for clm in p301:
val = clm.getTarget()
print(val)
wd_id = val.title()
try:
target_dict = val.get()
except:
continue
try:
p910 = target_dict['claims']['P910']
continue
except:
print('No P910 in target')
newclaim = pywikibot.Claim(repo, 'P910')
newclaim.setTarget(page)
if debug == 1:
text = input("Save link? ")
else:
text = 'y'
if text != 'n':
val.addClaim(newclaim, summary=u'Adding reciprocal P910 value to match P301 in target')
nummodified += 1
if nummodified >= maxnum:
print('Reached the maximum of ' + str(maxnum) + ' entries modified, quitting!')
exit()
print('Done! Edited ' + str(nummodified) + ' entries')
# EOF