User:GreenC bot/Job 14/source
Appearance
Note: this is outdated but gives a general view how it works.
#!/usr/bin/gawk -bE # # popbot - a bot to add {{tld|<country_name> metadata Wikidata}} to infoboxes # Home: http://en.wiki.x.io/wiki/User:GreenC_bot/Job_14 # Dependencies: BotWikiAwk (GitHub) # # The MIT License (MIT) # # Copyright (c) April 2019 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. BEGIN { BotName = "popbot" } @include "botwiki.awk" @include "library.awk" BEGIN { Mode = "bot" # set to "find" and it will search only and exit with a 1 (found something) or 0 (found nothing) # in "find" mode, run via 'project -s' to search local cache for articles containing actionable matches # set to anything else and it will process the article. IGNORECASE = 1 ReSpace = "[\n\r\t]*[ ]*[\n\r\t]*[ ]*[\n\r\t]*" Country = "Spain" Template = Country " metadata Wikidata" ADDAREA = 0 # Set to "1" to add area + population fields. Set to "0" for population fields only Optind = Opterr = 1 while ((C = getopt(ARGC, ARGV, "hs:l:n:")) != -1) { opts++ if(C == "s") # -s <file> article.txt source to process. articlename = verifyval(Optarg) if(C == "l") # -l <dir/> Directory where logging is sent.. end with "/" logdir = verifyval(Optarg) if(C == "n") # -n <name> Wikipedia name of article wikiname = verifyval(Optarg) if(C == "h") { usage() exit } } if( ! opts || articlename == "" ) { stdErr("Error in popbot.awk (1)") print "0" exit } if(wikiname == "" || logdir == "") Logfile = "/dev/null" else { if(substr(logdir, length(logdir), 1) != "/") logdir = logdir "/" Logfile = logdir "logpopbot" } Count = 0 main() } function main( article,articlenew,articlenewname,editsummaryname,bn) { checkexists(articlename, "popbot.awk main()", "exit") article = readfile(articlename) if(length(article) < 10) { print "0" exit } articlenew = popbot(article) if(article != articlenew && length(articlenew) > 10 && Count > 0) { articlenewname = editsummaryname = articlename bn = basename(articlename) "$" gsub(bn, "article.popbot.txt", articlenewname) printf("%s", articlenew) > articlenewname close(articlenewname) gsub(bn, "editsummary.popbot.txt", editsummaryname) printf("Add {{[[Template:" Template "|" Template "]]}} (via [[User:GreenC bot/Job 14|popbot]])", Count) > editsummaryname # Customize the edit summary to be more specific close(editsummaryname) print Count exit } print "0" exit } # # popbot - main function # # . extract templates in article and do something to each. Return modified article. # function popbot(article, i,a,dest,G,k,point_area,point_pop,c,re,z,N,fp) { re = "[{]{2}" ReSpace "Infobox settlement" if(article !~ re) { print wikiname >> logdir "lognobox" return article } # population_total needed to orient where to insert fields. Skip and log if missing. if(article !~ /[|][ ]*population_total[ ]*[=][ ]*/) { print wikiname >> logdir "lognopop" return article } delete G # Existing fields default values G["population_total"] = "| population_total = {{" Template "|population_total}}" G["population_as_of"] = "| population_as_of = {{" Template "|population_as_of}}" G["population_footnotes"] = "| population_footnotes = {{" Template "|population_footnotes}}" if(ADDAREA) { G["area_footnotes"] = "| area_footnotes = {{" Template "|area_footnotes}}" G["area_total_km2"] = "| area_total_km2 = {{" Template "|area_total_km2}}" } # Existing fields actual values (if they exist) for(i = 1; i <= splitn(article, a, i); i++) { if(match(a[i], /^[ ]*[|][ ]*population_total[ ]*[=][ ]*[^$]*[^$]/, dest)) G["population_total"] = dest[0] else if(match(a[i], /^[ ]*[|][ ]*population_as_of[ ]*[=][ ]*[^$]*[^$]/, dest)) G["population_as_of"] = dest[0] else if(match(a[i], /^[ ]*[|][ ]*population_footnotes[ ]*[=][ ]*[^$]*[^$]/, dest)) G["population_footnotes"] = dest[0] else if(ADDAREA && match(a[i], /^[ ]*[|][ ]*area_footnotes[ ]*[=][ ]*[^$]*[^$]/, dest)) G["area_footnotes"] = dest[0] else if(ADDAREA && match(a[i], /^[ ]*[|][ ]*area_total_km2[ ]*[=][ ]*[^$]*[^$]/, dest)) G["area_total_km2"] = dest[0] } # New fields values PROCINFO["sorted_in"] = "@ind_str_asc" for(k in G) { if(G[k] !~ Template) { N[k] = subs(substr(G[k], index(G[k], "=") + 1, length(G[k])), "", G[k]) N[k] = N[k] " {{" Template "|" k "}}" N[k] = subs(k, k "2", N[k]) } else { N[k] = G[k] N[k] = subs(k, k "2", N[k]) } } i = split(article, a, "\n") # Find location of population_total re = "^[ ]*[|][ ]*population_total[ ]*[=][ ]*" point_pop = i for(c = 1; c <= i; c++) { if(a[c] ~ re) { point_pop = c } } if(point_pop >= i) { print wikiname >> logdir "lognopop" return article } # Find location of area_metro_km2 if(ADDAREA) { re = "^[ ]*[|][ ]*area_metro_km2[ ]*[=][ ]*" point_area = i for(c = 1; c <= i; c++) { if(a[c] ~ re) { point_area = c } } if(point_area >= i) point_area = 0 } # rebuild article with new fields in correct location within infobox # Add population and area fields if(ADDAREA) { for(c = 1; c <= i; c++) { if(c == point_pop) { if(point_area == 0) { # No area_metro_km2, add all fields together for(z in N) fp = fp "\n" N[z] } else { for(z in N) { # area_metro_km2 exists, add only the population fields if(z ~ /population/) fp = fp "\n" N[z] } } fp = fp "\n" a[c] } else if(c == point_area) { # area_metro_km2 exists, add only the area fields for(z in N) { if(z ~ /area/) fp = fp "\n" N[z] } fp = fp "\n" a[c] } else if(c == 1) # first line, don't add extra \n fp = a[1] else fp = fp "\n" a[c] } } # Population only, no area fields else { for(c = 1; c <= i; c++) { if(c == point_pop) { for(z in N) fp = fp "\n" N[z] } else if(c == 1) # first line, don't add extra \n fp = a[1] else fp = fp "\n" a[c] } } # delete the original fields for(z in G) fp = subs(G[z] "\n", "", fp) # remove the trailing "2" from new fields for(z in G) fp = subs(z "2", z, fp) # print fp > "o" Count++ article = fp return article }