User:Monkbot/task 13: remove replace deprecated subscription registration parameters

Task 13 is a single use task that removes or replaces deprecated |subscription= and |registrarion= parameters in existing cs1|2 templates.

description edit

cs1|2 has deprecated |subscription= and |registration= at this RFC (aspect B3). This task:

  1. applies only to canonically named templates; redirects and template wrappers are not acknowledged
  2. ignores cs1|2 templates that have |subscription= and |registration= parameters that are not assigned one of the three allowed values (yes, y, true)
  3. does nothing when the citation template holds:
    any of these url parameters (with assigned values):
    • |url=, |article-url=, |chapter-url=, |entry-url=, |section-url= (the url list)
    AND holds any of these identifier parameters (with assigned values):
    • |doi=, |DOI=, |jstor=, |JSTOR=, |bibcode=, |hdl=, |HDL=, |ol=, |OL=, |osti=, |OSTI= (the identifier list)
    this because the task cannot know which of the url parameter or the identifier parameter the original editor intended to be 'marked' by the deprecated parameters (could be one, the other, or both)
  4. does nothing when the citation template holds more than one of the url list parameters; again, could be one, the other, or both
  5. removes |subscription= and |registration= parameters when the citation template does not have any of the url list parameters; cs1|2 identifier parameters are presumed to lie behind a paywall or registration barrier; cs1|2 does not highlight the norm so |subscription= and |registration= are superfluous in these citation templates
  6. replaces |subscription= and |registration= with the appropriate |<xxx->url-access= parameter when the citation template holds only one of the url list parameters

Task 13 skips pages that include {{bots|deny=Monkbot13}}.

ancillary tasks edit

Empty |subscription= and |registration= parameters are deleted. This task does not do awb general fixes.

script edit

// this script removes / replaces deprecated |subscription= and |registration= parameters from cs1|2 templates
//
// to make a list for awb use category: CS1 errors: deprecated parameters

	string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";

	bool gSkip_subscription = true;				// presume that we will skip this page
	bool gSkip_registration = true;

	string[] url_params = { "url", "article-url", "chapter-url", "entry-url", "section-url"};


//---------------------------< P R O C E S S A R T I C L E >--------------------------------------------------

public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
	{
	Skip = false;

//	gSkip_subscription = false;					// debug; for now we will not skip anything
//	gSkip_registration = false;

	string pattern;


//---------------------------< E M P T I E S >----------------------------------------------------------------
// delete empty |subscription=  and |registration= parameters

	ArticleText = Regex.Replace(ArticleText, @"\| *subscription *=\s*([\|\}])", "$1");
	ArticleText = Regex.Replace(ArticleText, @"\| *registration *=\s*([\|\}])", "$1");


//---------------------------< H I D E >----------------------------------------------------------------------
// HIDE TEMPLATES: find templates that are not CS1; replace the opening {{ with __0P3N__ and the closing }} with __CL0S3__

	while (Regex.Match (ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}").Success)
		{
		ArticleText = Regex.Replace(ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}", "__0P3N__$1__CL0S3__");
		}

// wikilinks with parenthetical disambiguation
	pattern = @"\[\[([^\|\]]+) +\(([^\)\|]+)\)\|([^\]]+)\]\]";
	ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__D4B_O__$2__D4B_C____P1P3__$3__WL_CL0S3__");

// link label wikilinks
	pattern = @"\[\[([^\|\]]+)\|([^\]]+)\]\]";
	ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__P1P3__$2__WL_CL0S3__");


//---------------------------< S U B S C R I P T I O N >------------------------------------------------------

	ArticleText = sup_reg_common (ArticleText, "subscription");


//---------------------------< R E G I S T R A T I O N >------------------------------------------------------

	ArticleText = sup_reg_common (ArticleText, "registration");


//---------------------------< U N H I D E >------------------------------------------------------------------

// UNHIDE: replace __WL_0P3N__ with [[, __P1P3__ with |, __WL_CL0S3__ with ]]
	ArticleText = Regex.Replace(ArticleText, @"__WL_0P3N__", "[[");
	ArticleText = Regex.Replace(ArticleText, @"__D4B_O__", " (");		// make sure that there is a space before the '('
	ArticleText = Regex.Replace(ArticleText, @"__D4B_C__", ")");
	ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|");
	ArticleText = Regex.Replace(ArticleText, @"__WL_CL0S3__", "]]");

// UNHIDE: replace __0P3N__ with {{
	ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{");

// UNHIDE: replace __CL0S3__ with }}
	ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}");


	Skip = gSkip_subscription && gSkip_registration;
//	Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: (developmental testing): ";
	Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ([[Wikipedia:Bots/Requests_for_approval/Monkbot_13|BRFA testing]]): ";
//	Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ";

	if (!gSkip_subscription && !gSkip_registration)
		Summary = Summary + "Fix deprecated |subscription= and |registration= in cs1|2 templates;";
	else if (!gSkip_subscription)
		Summary = Summary + "Fix deprecated |subscription= in cs1|2 templates;";
	else
		Summary = Summary + "Fix deprecated |registration= in cs1|2 templates;";

	gSkip_subscription = true;												// reset
	gSkip_registration = true;

	return ArticleText;
	}


//---------------------------< S U P _ R E G _ C O M M O N >--------------------------------------------------

string sup_reg_common (string ArticleText, string sr_param)
	{
	string pattern = @"(\{\{\s*" + IS_CS1 + @"[^\}]*)\|\s*" + sr_param + @"\s*=\s*(?:yes|true|y)([^\}]*)";
	ArticleText = Regex.Replace(ArticleText, pattern,
		delegate(Match match)
			{
			string raw_capture = match.Groups[0].Value;						// the whole captured citation
			string raw_prefix = match.Groups[1].Value;						// citation template up to the start of |subscription=
			string raw_postfix = match.Groups[2].Value;						// citation after |subscription=

			int url_count = 0;												// number of url-holding parameters to which |subscription= might apply
			string url_param = @"";											// will be assigned the last-found url-holding parameter name

			foreach (string param in url_params)
				{
				pattern = @"\|\s*" + param + @"\s*=\s*[^\|\}]";				// just looking for url-holding parameter with something in it
				if (Regex.Match (raw_capture, pattern).Success)				// look in the raw capture for url-holding parameters
					{
					url_count++;											// count this one
					url_param = param;										// save the parameter name
					}
				}

			if (1 < url_count)												// more than one url-holding parameter, can't know which parameter |subscription= was meant for
				return raw_capture;											// so do nothing

			pattern = @"\|\s*(?:doi|DOI|jstor|JSTOR|bibcode|hdl|HDL|ol|OL|osti|OSTI)\s*=\s*[^\|\}]";	// access params apply to these
			if (Regex.Match (raw_capture, pattern).Success)					// look for identifiers that have a value to which |subscription= might apply
				{
				if (0 == url_count)
					{
					if ("subscription" == sr_param)							// for edit summary
						gSkip_subscription = false;
					else
						gSkip_registration = false;
					return raw_prefix + raw_postfix;						// identifier without url-holding parameter; remove |subscription= because does not apply
					}
				else														// if here, url_count must be 1
					return raw_capture;										// identifier plus url-holding parameter; can't know to which |subscription= applies
				}
			else															// no identifiers found
				{
				if (0 == url_count)
					{
					if ("subscription" == sr_param)							// for edit summary
						gSkip_subscription = false;
					else
						gSkip_registration = false;
					return raw_prefix + raw_postfix;						// no identifier and no url-holding parameter; remove |subscription= because does not apply
					}
				else														// if here, url_count must be 1
					{
					if ("subscription" == sr_param)							// for edit summary
						gSkip_subscription = false;
					else
						gSkip_registration = false;
					return raw_prefix + @"|" + url_param + @"-access=" + sr_param + @" " + raw_postfix;	// replace |subscription= with |<url param>-access=subscription
					}
				}
			});

	return ArticleText;
	}