User:GreenC/software/urlchanger-skeleton-easy.nim

Sample skeleton code for WP:URLREQ move requests. This is the "easy" version for straight-forward moves.


urlchanger-skeleton-easy.nim

discard """

The MIT License (MIT)

Copyright (c) 2016-2021 by User:GreenC (at en.wikipedia.org)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE."""

# Search on "CUSTOM" for project-specific code

var
  ReoldA = "old[.]com"
  ReoldB = "old.com"
  RenewA = "new[.]com"
  RenewB = "new.com"

  Reold1 = "(?i)https?[:][/]{2}(([^.]+)[.])?" & ReoldA
  Reold2 = "http://" & ReoldB
  Reold3 = "http://www." & ReoldB
  Reold4 = "(?i)(www[.])?" & ReoldA

  Repr1 = "(?i)url[ ]*[=][ ]*[/]{2}" & ReoldA
  Repr2 = "(?i)url[ ]*[=][ ]*[/]{2}www[.]" & ReoldA
  Repr3 = "(?i)[[][ ]*[/]{2}" & ReoldA
  Repr4 = "(?i)[[][ ]*[/]{2}www[.]" & ReoldA

  Renew1 = "https://" & RenewB
  Renew2 = "https[:][/]{2}" & RenewA
  Renew3 = "(?i)https?[:][/]{2}(([^.]+)[.])?" & RenewA
  Renew4 = "(?i)(www[.])?" & RenewA
  Renew5 = RenewB   # base domain used for <ref name="new.com">

#
# Custom version of headerlocation() in medicapi.nim
#  For cases like https://dcms.lds.org/delivery/DeliveryManagerServlet?from=fhd&dps_pid=IE1170338
#  If Location doesn't have a domiain name, use the domain from the first Location 
#
proc headerlocation_urlchanger*(head: string, fl: varargs[string]): string =

  var
    mcache = newSeq[string](0)
    c, f, le: int
    flag, flag2, flag3, firstlocation = ""
    firstlocationtrap = false

  if len(fl) == 1:
    flag = fl[0]

  if len(fl) == 2:
    flag = fl[0]
    flag2 = fl[1]

  if len(fl) == 3:
    flag  = fl[0]
    flag2 = fl[1]
    flag3 = fl[2]

  c = awk.split(head, a, "\n")
  for i in 0..c - 1:
    if a[i] ~ "(?i)^[ ]{0,5}location[ ]?[:]":
      if not empty(flag): # get URLs
        awk.sub("(?i)^[ ]*location[ ]*[:][ ]*", "", a[i])

        if not firstlocationtrap and a[i] ~ "^http":  # get scheme+hostname of first Location: entry
          firstlocationtrap = true
          firstlocation = uriparseElement(a[i], "scheme")
          firstlocation = firstlocation & "://" & uriparseElement(a[i], "hostname")
        if a[i] !~ "^http":                           # If last Location: has no scheme+hostname then tack it on from the first Location:
          if not empty(flag3):                        # Otherwise use the scheme+hostname in flag3
            a[i] = flag3 & a[i]
          else:
            if firstlocation ~ "^http":
              a[i] = firstlocation & a[i]
            else:
              return ""
        if empty(flag2):                       
          if isarchiveorg(a[i]):
            mcache.add(strip(a[i]))
        else:
          mcache.add(strip(a[i]))
      else:  # get timestamps
        if awk.split(strip(a[i]), b, " ") > 1:
            f = awk.split(b[1], e, "/")
            for k in 0..f-1:
              if e[k] ~ "^[0-9]{14}$":
                mcache.add(e[k])
                break

  le = len(mcache)
  if le > 0:
    if len(mcache[le - 1]) > 0:  # Get the last HTTP response
      return mcache[le - 1]


#
# Return DEADLINK unless cite template is of type defined by skiptemplate
#
template checklinkredir_helper(tl, skiptemplate: string) =

           if empty(skiptemplate) or tl !~ skiptemplate:
             return "DEADLINK"
           return "SKIPDEADLINK"


#
# Follow a link to its redirect and return ultimate source. 
#
#   . Return new url if it can find one
#   . Return "" it can't find a redirect. Add an archive if url returns 404, otherwise if 200 leave untouched
#   . Return "DEADLINK" it can't find a redirect. Force adding archive regardless of url status. Useful if redirect is known homepage for example.
#   . Return "SKIPDEADLINK" it can't find a redirect. Do not add an archive no matter what.
#
proc checklinkredir*(url, tl: string): string =

     result = ""

     var
       url = url
       # CUSTOM
       skiptemplate = "(?i)[{]{2}[ ]*album[ -]?chart"    # Skip adding new archives for these templates or set to blank if none
       newurl = ""
       headres: int
       # CUSTOM
       fullurl = Reold1 & GX.endurlcs  # GX.endurlcs = "[^\\s\\]|}{<]*[^\\s\\]|}{<]*"
     
     if awk.match(url, fullurl, dest) > 0:
       #se("URL0 = " & url)
       #se("DEST0 = " & dest)

       # CUSTOM
       newurl = dest
       gsub(Reold1, Renew1, newurl)     # "(?i)https?[:][/]{2}(([^.]+)[.])?old[.]com[.]", "https://new.com"

       if(newurl ~ Renew2):              # "https[:][/]{2}new[.]com"
         var (head, bodyfilename) = getheadbody(newurl)
         bodyfilename = "" # supress compile warn
         headres = headerresponse(head)

         if headres == 200:                      # OK
           return newurl
         elif headres == 404 or headres == -1:   # Dead
           checklinkredir_helper(tl, skiptemplate)
         elif headres == 301 or headres == 302:  # Redirect
           var redirurl = headerlocation_urlchanger(head)
           sendlog(Project.urlchanger, CL.name, url & " ---- " & redirurl & " ---- Redirect found: check it out ---- urlchanger7.1")
           if not empty(redirurl):
             var (head2, bodyfilename2) = getheadbody(redirurl)
             bodyfilename2 = "" # supress compile warn
             if headerresponse(head2) == 200:
               return redirurl
             elif headerresponse(head2) == 404:
               checklinkredir_helper(tl, skiptemplate)
             else:
               sendlog(Project.urlchanger, CL.name, url & " ---- " & redirurl & " ---- Redirect not working - aborting ---- urlchanger7.2")
               return "SKIPDEADLINK"
           else:
             sendlog(Project.urlchanger, CL.name, url & " ---- " & redirurl & " ---- Redirect not working - aborting ---- urlchanger7.5")  
             return "SKIPDEADLINK"  
         elif headres == 443 or headres == 500:  # Forbidden
           checklinkredir_helper(tl, skiptemplate)
         else:
           sendlog(Project.urlchanger, CL.name, url & " ---- Unknown response code - aborting ---- urlchanger7.3")
           return "SKIPDEADLINK"
       else:
         sendlog(Project.urlchanger, CL.name, url & " ---- Unknown problem: check it out ---- urlchanger7.4")
         checklinkredir_helper(tl, skiptemplate)

     if tl !~ skiptemplate:
       return ""
     else:
       return "SKIPDEADLINK"

#
# Last step whole article check and log missing cases
#
proc checklinkexists(): string {.discardable} =

  if Runme.urlchanger != true:
    return 

  var
    fullurl = Reold1 & GX.endurlcs  # GX.endurlcs = "[^\\s\\]|}{<]*[^\\s\\]|}{<]*"

  psplit(GX.articlework, fullurl, p):
      # skip archives and cite templates, imperfect method due to duplicates
      if awk.match(GX.articlework, "([/]|[?]url[=])https?" & escapeRe(gsubi("^https?", "", p.field[i])) ) == 0 and awk.match(GX.articlework, escapeRe(p.field[i]) & GX.space & GX.webarchive) == 0: 
        sendlog(Project.urlchanger, CL.name, p.field[i] & " ---- Link wasn't converted: check it out ---- checklinkexists1.1")

#
# Replace given domain with an archive.org/web/1899..
#
proc urlchanger(): bool {.discardable.} =

  if Runme.urlchanger != true:
    return false    

  var             
    url,res,archiveurl,webarchive,sourceurl,title,head,bodyfilename,fpHTML,prurl,urltype = ""
    tot = 0    

    fullurl = Reold1 & GX.endurlcs   # GX.endurlcs = "[^\\s\\]|}{<]*[^\\s\\]|}{<]*"

# CUSTOM
    addarchive = true  # if true then it will add archive URLs if link is dead

  psplit(GX.articlework, Repr1, p):    # "(?i)url[ ]*[=][ ]*[/]{2}old[.]com"
      p.field[i] = "url = " & Reold2   # "http://old.com"
      inc(p.ok)
  psplit(GX.articlework, Repr2, p):    # "(?i)url[ ]*[=][ ]*[/]{2}www[.]old[.]com"
      p.field[i] = "url = " & Reold3   # "http://www.old.com"
      inc(p.ok)
  psplit(GX.articlework, Repr3, p):    # "(?i)[[][ ]*[/]{2}old[.]com"
      p.field[i] = "[" & Reold2        # "http://old.com"
      inc(p.ok)
  psplit(GX.articlework, Repr4, p):    # "(?i)[[][ ]*[/]{2}www[.]old[.]com"
      p.field[i] = "[" & Reold3        # "http://www.old.com"
      inc(p.ok)

  # Convert cases like:
  #  ">http://www.highbeam.com/doc/1G1-9343909.html"
  #  "#http://www.highbeam.com/doc/1G1-9343909.html"
  #  "*http://www.highbeam.com/doc/1G1-9343909.html"
  psplit(GX.articlework, "[>#*]{1}[ ]*" & fullurl, p):
      if awk.match(p.field[i], "^[>#*]{1}[ ]*", dest1) > 0: 
        if awk.match(p.field[i], fullurl, dest2) > 0:
          p.field[i] = dest1 & "[" & dest2 & " " & Runme.urlchangerTag & "]"     
          sed("Converting bare to bracket: " & p.field[i], Debug.network)
          sendlog(Project.urlchanger, CL.name, p.field[i] & " ---- convert barelink to bracket ---- urlchanger0.1")
          inc(p.ok)
          inc(tot)

  # Replace in {{cite web |url}} ({{dead}}{{cbignore}})?

# CUSTOM template additions
  var citelist3 = GX.citelist & "|album[ -]?chart" 
  var cite3 = "(?i)([{][{][ ]*(" & citelist3 & ")[^}]+}})"

  psplit(GX.articlework, cite3 & "[ ]*(" & GX.dead & "[ ]*(" & GX.cbignore & ")?)?", p):

      url     = ""
      urltype = ""

      # find url, otherwise try alternatives like chapter-url etc..
      prurl = getarg("url", "clean", p.field[i])     
      if prurl ~ fullurl:
        urltype = "url"
        url = prurl
      else:
        awk.split("chapter-url contribution-url entry-url article-url section-url map-url conference-url transcript-url lay-url", a, " ")
        for k in 0..len(a) - 1:
          if isarg(a[k], "exists", p.field[i]):                 
            prurl = getarg(a[k], "clean", p.field[i])
            if prurl ~ fullurl:
              urltype = a[k]
              url = prurl
              break

      if url ~ fullurl:
        gsub("[#]$", "", url)
        res = checklinkredir(url, p.field[i])
        if not empty(res) and res !~ "DEADLINK$" and res != url and not empty(urltimestamp(getarg("archive-url", "clean", p.field[i]))):

          if isarg(urltype, "exists", p.field[i]):                                # swap in new URL
            p.field[i] = replacearg(p.field[i], urltype, res, "urlchanger1.1")

          if isarg("archive-url", "exists", p.field[i]):                          # move archive URL
            var tup: tuple[url: string, status: int, response: int]
            tup = queryapiget(res, urltimestamp(getarg("archive-url", "clean", p.field[i])) )
            if tup.status == 1:
              # p.field[i] = replacearg(p.field[i], "archive-url", "https://web.archive.org/web/18990101080101/" & res, "urlchanger1.1a")
              p.field[i] = replacearg(p.field[i], "archive-url", tup.url, "urlchanger1.1a")
              if isarg("url-status", "exists", p.field[i]):
                p.field[i] = replacearg(p.field[i], "url-status", "live", "urlchanger1.1b")
            else:
              sendlog(Project.urlchanger, CL.name, url & " ---- " & res & " ---- not removed archive ---- urlchanger1.6")
            #  awk.split("archive-url archive-date url-status", a, " ")               # delete existing archives
            #  for k in 0..len(a) - 1:
            #    if isarg(a[k], "exists", p.field[i]):
            #      p.field[i] = gsubs(getarg(a[k], "bar", p.field[i]), "", p.field[i])
            #      if a[k] ~ "archive-url":
            #        sendlog(Project.urlchanger, CL.name, url & " ---- " & res & " ---- removed archive ---- urlchanger1.6")

          gsub(GX.dead & "[ ]*" & GX.cbignore, "", p.field[i])
          gsub(GX.dead, "", p.field[i])

          p.ok += inclog("urlchanger1.1", GX.esurlchange, Project.syslog, url & " ---- " & res)
          inc(tot)

        else: # add archive if url= is dead

          if addarchive and urltype == "url" and res != "SKIPDEADLINK":

            if res != "DEADLINK":
              (head, bodyfilename) = getheadbody(url, "one") # check the orginal URL is dead

            if headerresponse(head) != 200 or res == "DEADLINK":
  
              gsub(GX.dead & "[ ]*" & GX.cbignore, "", p.field[i])
              gsub(GX.dead, "", p.field[i])

              archiveurl = getarg("archive-url", "clean", p.field[i])
              if empty(archiveurl):
                p.field[i] = replacearg(p.field[i], "url", "https://web.archive.org/web/18990101080101/" & url, "urlchanger1.1")
                sed("Converting to 1899 (1): " & p.field[i], Debug.network)
                inc(p.ok)
                inc(tot)
              else:  # Add/modify |url-status=dead 
                if isarg("url-status", "missing" , p.field[i]):
                  if isarg("url", "exists", p.field[i]):
                    addarg("url-status", "dead", "archive-url", p.field[i]):
                      p.ok += inclog("urlchanger1.2", GX.esurlchange, Project.urlchanger, url & " ---- add url-status status")
                      inc(tot)
#                    modelbar = getarg(firstarg(p.field[i]), "bar", p.field[i])
#                    locbar = getarg(notlastarg(p.field[i], "archive-url"), "bar", p.field[i])
#                    if not empty(modelbar):
#                      if not empty(modelfield(modelbar, "url-status", "dead")):
#                        gsubs(locbar, locbar & modelfield(modelbar, "url-status", "dead"), p.field[i])
#                        p.ok += inclog("urlchanger1.2", GX.esurlchange, Project.urlchanger, url & " ---- add url-status status")
#                        inc(tot)
                else:
                  if getarg("url-status", "clean", p.field[i]) !~ "(?i)dead":
                    p.field[i] = replacearg(p.field[i], "url-status", "dead", "urlchanger1.2")
                    p.ok += inclog("urlchanger1.3", GX.esurlchange, Project.urlchanger, url & " ---- modify url-status status")
                    inc(tot)

  # replace [state.gov] {{webarchive}}
  psplit(GX.articlework, "[[][ ]*" & fullurl & "[^]]*[]][ ]*" & GX.webarchive, p):
      if awk.match(p.field[i], GX.webarchive, webarchive) > 0 and awk.match(p.field[i], fullurl, url) > 0:
        res = checklinkredir(url, p.field[i])
        if not empty(res) and res !~ "DEADLINK$" and res != url and not empty(urltimestamp(getarg("url", "clean", webarchive))):
          var tup: tuple[url: string, status: int, response: int]
          tup = queryapiget(res, urltimestamp(getarg("url", "clean", webarchive)) )
          if tup.status == 1:
            let orig = webarchive
            webarchive = replacearg(webarchive, "url", tup.url, "urlchanger2.2")
            subs(orig, "", p.field[i])
            subs(url, res, p.field[i])
            p.field[i] = p.field[i] & webarchive
            p.ok += inclog("urlchanger2.1", GX.esurlchange, Project.syslog, url & " ---- " & res & " ---- delete webarchive (removed archive)")
            inc(tot)
          else:
            sendlog(Project.urlchanger, CL.name, url & " ---- " & res & " ---- not removed archive ---- urlchanger2.2")


  # Replace in [state.gov] ({dead}{cbignore})?
  psplit(GX.articlework, "[[][ ]*" & fullurl & "[^]]*[]][ ]*(" & GX.dead & "[ ]*(" & GX.cbignore & ")?)?", p):
      if awk.match(p.field[i], fullurl, url) > 0:

        res = checklinkredir(url, p.field[i])
        gsub(GX.dead & "[ ]*" & GX.cbignore, "", p.field[i])
        gsub(GX.dead, "", p.field[i])
        if not empty(res) and res !~ "DEADLINK$":
          gsubs(url, res, p.field[i])

#CUSTOM - changes to square-link title field
          gsub("(?i)chartstats[.](org|com)", "Official Charts Company", p.field[i])
          gsub("(?i)charts?[ ]?stats", "Official Charts Company", p.field[i])
          gsub("(?i)UK (singles|album) charts?", "Official Charts Company", p.field[i])
          gsub("[(]Link redirected to OCC website[)]", "", p.field[i])

          p.ok += inclog("urlchanger4.1", GX.esurlchange, Project.syslog, url & " ---- " & res & " ---- modify squarelink")
          inc(tot)

        else: # add archive
          if addarchive and res != "SKIPDEADLINK":
            if match(GX.articlework, escapeRe(p.field[i]) & GX.space & GX.webarchive, dest) == 0:  # skip if followed by {{webarchive}}
              if res != "DEADLINK":
                (head, bodyfilename) = getheadbody(url, "one") # check orginal URL is dead
              if headerresponse(head) != 200 or res == "DEADLINK":
                gsubs(url, "https://web.archive.org/web/18990101080101/" & url, p.field[i])
                sed("Converting to 1899 (2): " & p.field[i], Debug.network)
                inc(p.ok)
                inc(tot)

  # replace standalone {{webarchive}} - should come after the above for urlchanger3.2 to work
  psplit(GX.articlework, GX.webarchive, p):
      url = getarg("url", "clean", p.field[i])
      if url ~ fullurl:
        if awk.match(GX.articlework, "[]][ ]*" & escapeRe(p.field[i])) == 0:  # skip [state.gov] {{webarchive}}
          sourceurl = urlurl(url)
          res = checklinkredir(sourceurl, p.field[i])
          if not empty(res) and res !~ "DEADLINK$":
            title = getarg("title", "clean", p.field[i])
            if not empty(title):
              p.field[i] = "[" & res & " " & title & "]"
            else:
              p.field[i] = "[" & res & "]"
            p.ok += inclog("urlchanger3.1", GX.esurlchange, Project.syslog, sourceurl & " ---- " & res & " ---- replace webarchive")
            inc(tot)
            if countsubstring(GX.articlework, res) > 1:  # look for bugs 
              sendlog(Project.urlchanger, CL.name, url & " ---- " & res & " ---- bug in standalone webarchive conversion ---- urlchanger3.2")


  # Replace [archive.org/state.gov] with [state.gov] {{webarchive}}
  psplit(GX.articlework, "[[][ ]*https?[:][/]{2}(www[.]|web[.])?archive[.](org|today|is)[/](web[/])?[0-9]{14}[/]" & fullurl & "[^]]*[]]", p):
      if awk.match(p.field[i], fullurl, url) > 0:
        gsub("[/]$", "", url)
        awk.match(p.field[i], "https?[:][/]{2}(www[.]|web[.])?archive[.](org|today|is)[/](web[/])?[0-9]{14}[/]" & fullurl, archiveurl)
        res = checklinkredir(url, p.field[i])
        if not empty(res) and res !~ "DEADLINK$" and res != url and not empty(urltimestamp(archiveurl) ):
          var tup: tuple[url: string, status: int, response: int]
          tup = queryapiget(res, urltimestamp(archiveurl) )
          if tup.status == 1 and not empty(timestamp2numericdate(urltimestamp(archiveurl))):
            p.field[i] = "[" & res & "]" & "{{webarchive |url=" & archiveurl & " |date=" & timestamp2numericdate(urltimestamp(archiveurl)) & "}}"
            p.ok += inclog("urlchanger5.1", GX.esurlchange, Project.syslog, archiveurl & " ---- " & res & " ---- replace archive squarelink")
            inc(tot)
          else:
            sendlog(Project.urlchanger, CL.name, url & " ---- " & res & " ---- not removed archive ---- urlchanger5.2")

        #  gsubs(archiveurl, res, p.field[i])
        #  p.ok += inclog("urlchanger5.1", GX.esurlchange, Project.syslog, archiveurl & " ---- " & res & " ---- replace archived squarelink")
        #  inc(tot)

  # Replace [webcitation.org/query?url=https://state.gov] with [state.gov] (webcite.org/query?url=https://etc..)
  psplit(GX.articlework, "[[][ ]*https?[:][/]{2}(www[.]|web[.])?webcitation[.]org[/]query[?]url=" & fullurl & "[^]]*[]]", p):
      if awk.match(p.field[i], fullurl, url) > 0:
        gsub("[/]$", "", url)
        awk.match(p.field[i], "https?[:][/]{2}(www[.]|web[.])?webcitation[.]org[/]query[?]url=" & fullurl, archiveurl)
        res = checklinkredir(url, p.field[i])
        if not empty(res) and res !~ "DEADLINK$" and res != url:
          gsubs(archiveurl, res, p.field[i])
          p.ok += inclog("urlchanger5.2", GX.esurlchange, Project.syslog, archiveurl & " ---- " & res & " ---- replace webcitationquary" )
          inc(tot)

  # If url is already switched to new but archive-url and other metadata for old URL still exists
  psplit(GX.articlework, GX.cite2, p):
      prurl = getarg("url", "clean", p.field[i])
      if prurl ~ Renew3:                              #  "(?i)https?[:][/]{2}(([^.]+)[.])?new[.]com"
        var f = 0
        var g = 0
        if getarg("archive-url", "clean", p.field[i]) ~ fullurl:
          awk.split("archive-url archive-date url-status", a, " ")
          for k in 0..len(a) - 1:
            if isarg(a[k], "exists", p.field[i]):
              p.field[i] = gsubs(getarg(a[k], "bar", p.field[i]), "", p.field[i])
              inc(f)

# CUSTOM field changes                                                  # change text in work, publisher etc..
        awk.split("work website publisher title", a, " ")
        for k in 0..len(a) - 1:
          if isarg(a[k], "exists", p.field[i]):
            var cleanarg = getarg(a[k], "clean", p.field[i])
            if awk.match(cleanarg, Reold4, dest) > 0:         # "(?i)(www[.])?old[.]com"
              if a[k] !~ "(title|publisher)":
                p.field[i] = replacearg(p.field[i], a[k], "new.com", "urlchanger5.3.1") # replace whole arg value with new URL
                inc(g)
              else:
                cleanarg = gsubs(dest, "new.com", cleanarg)                             # replace string within arg value new URL
                p.field[i] = replacearg(p.field[i], a[k], cleanarg, "urlchanger5.3.2")
                inc(g)

              # add more cases here. See urlchanger-chartstats.nim for broader examples

# CUSTOM field changes
        # delete |publisher if |work has same info .. new URL .. old URL
        # Reold4 = "(?i)(www[.])?old[.]com" Renew4 = "(?i)(www[.])?new[.]com"
        if getarg("work", "clean", p.field[i]) ~ Reold4 and getarg("publisher", "clean", p.field[i]) ~ Renew4:   
          gsubs(getarg("publisher", "bar", p.field[i]), "", p.field[i])
          # p.field[i] = replacearg(p.field[i], "work", "[[Official Charts Company]]", "urlchanger5.3.3")
          inc(g)
        if getarg("website", "clean", p.field[i]) ~ Reold4 and getarg("publisher", "clean", p.field[i]) ~ Renew4:
          gsubs(getarg("publisher", "bar", p.field[i]), "", p.field[i])
          # p.field[i] = replacearg(p.field[i], "work", "[[Official Charts Company]]", "urlchanger5.3.4")
          inc(g)

        if f > 0:
          p.ok += inclog("urlchanger5.3", GX.esurlchange, Project.urlchanger, prurl & " ---- remove archive-url")
          inc(tot)
        if g > 0:
          p.ok += inclog("urlchanger5.3", GX.esurlchange, Project.urlchanger, prurl & " ---- update metadata")
          inc(tot)

  # If url (any type) doesn't match the domain-name in work|publisher for the custom domain 
  psplit(GX.articlework, GX.cite2, p):
      prurl = getarg("url", "clean", p.field[i])
      if prurl !~ Renew3:                              # "(?i)https?[:][/]{2}(([^.]+)[.])?new[.]com"
        awk.split("work website publisher", a, " ")
        for k in 0..len(a) - 1:
          if isarg(a[k], "exists", p.field[i]):
            var cleanarg = getarg(a[k], "clean", p.field[i])
            if awk.match(cleanarg, Renew4, dest) > 0:  # "(?i)(www[.])?new[.]com"
              p.field[i] = replacearg(p.field[i], a[k], uriparseElement(prurl, "hostname"), "urlchanger5.4") # replace whole arg value
              p.ok += inclog("urlchanger5.4", GX.esurlchange, Project.urlchanger, prurl & " ---- " & a[k] & " ---- remove stray domain in work.etc field")
              inc(tot)

  # change <ref name=string/>    
  psplit(GX.articlework, "<ref[^>]*>", p):      
      if p.field[i] ~ Reold4:   # "(?i)(www[.])?old[.]com"
        gsub(Reold4, Renew5, p.field[i])
        p.ok += inclog("urlchanger5.5", GX.esurlchange, Project.urlchanger, p.field[i] & " ---- change ref name=" & Renew5)             
        inc(tot)      


  # Bare URLs with no square bracket

  # step 1: Count bare links with no square brackets and save in associative-array aar[]

  var aar = initTable[string, int]()
  (head, bodyfilename) = getheadbody("https://en.wikipedia.org/wiki/" & quote(CL.name), "one")  # scrape body
  fpHTML = readfile(bodyfilename)
  if not empty(fpHTML):
    psplit(fpHTML, "[>]http[^<]+[<][/][Aa][>]", p):
        gsub("^[>]|[<][/][Aa][>]$", "", p.field[i])
        if awk.match(p.field[i], fullurl, dest) > 0:
          if len(p.field[i]) == len(dest) and GX.articlework !~ ("https://web.archive.org/web/18990101080101/" & dest):
            if hasKey(aar, p.field[i]):
              inc(aar[p.field[i]])
            else:
              aar[p.field[i]] = 1 
              aar[convertxml(p.field[i])] = 1  # catch all possibilities as URLs are sometimes HTML-encoded and sometimes not

  # step 2: make sure the number of bare links equals number of URLs otherwise log and skip
  #         replace all the URLs with gsub()

  for aurl in aar.keys:

    # se("AURL0 = " & aurl)
    # se("AURL1 = " & $aar[aurl])
    # se("AURL2 = " & $countsubstring(GX.articlework, aurl))

    if countsubstring(GX.articlework, aurl) == aar[aurl] and countsubstring(GX.articlework, "/" & aurl) == 0:

      # (CL.name & "---- " & aurl & " ---- Orphan link ---- checklinkexists1.1") >> Project.meta & logfile
      var res = checklinkredir(aurl, "")
      # se("RES = " & res)
      if (empty(res) or res == "DEADLINK") and res != "SKIPDEADLINK":
        if addarchive:
          gsubs(aurl, "[https://web.archive.org/web/18990101080101/" & aurl & "]", GX.articlework)
          sed("Converting to 1899 (3): " & aurl, Debug.network)
          inc(tot)
      elif not empty(res) and res !~ "DEADLINK$":
        for i in 1..aar[aurl]:
          inclog("urlchanger8.1", GX.esurlchange, Project.syslog, aurl & " ---- " & res)
          inc(tot)
        gsubs(aurl, res, GX.articlework)

    elif convertxml(aurl) == aurl and countsubstring(GX.articlework, aurl) > aar[aurl]:
      sendlog(Project.urlchanger, CL.name, aurl & " ---- Too many bare URLs ---- urlchanger8.2")
    elif convertxml(aurl) == aurl and countsubstring(GX.articlework, aurl) < aar[aurl]:
      sendlog(Project.urlchanger, CL.name, aurl & " ---- Bare URLs missing ---- urlchanger8.3")


#CUSTOM
  # split into <ref></ref> and take actions in them. This will catch hard to fix items like a domain name outside a square link
  let cc = awk.split(GX.articlework, bb, "<ref[^>]*>")    
  for z in 0..cc - 1:
    if(len(bb[z]) > 1):
      var endref = index(bb[z], "</ref>")
      if(endref > 1):
        var kk = substr(bb[z], 0, endref - 1)
        #se("KK = " & kk)
        Renew3 = "(?i)https?[:][/]{2}(([^.]+)[.])?new[.]com"
        if kk ~ Renew3 and kk ~ ("(?i)[ .,-]" & ReoldA):
          var orig = kk
          # see also urlchanger-msnbc 
          if match(kk, Renew3 & GX.endurlcs, hideurl) > 0:
            gsubs(hideurl, "__hideurl__", kk)
            gsub("(?i)(www)?[ .,-]" & RenewA, " " & RenewB, kk)
            gsubs("__hideurl__", hideurl, kk)
            #se("NEW = " & kk)
            GX.articlework = replacefullref(orig, orig, kk, "citeurlchanger1")
            inclog("urlchanger9.1", GX.esurlchange, Project.urlchanger, orig & " ---- " & kk & " ---- change floating cite")
            # Sometimes Love Just Ain't Enough
            inc(tot)

  if tot == 0:
    sendlog(Project.urlchanger, CL.name, " ---- None found ---- urlchanger9.2")

  return true