User talk:Alecmconroy/Language study
This was code used to generate the various files. It's not meant to be reused and is more more an "interactive session log" than code. But you want to know how I went from the csv to the other files, this is how
Code
edit<?php
//echo "hi <br/>\n";
// import csv to assoc array
$arrResult = array();
//$arrLines = file('wm-language.csv');
$file_handle = fopen("wm-language.csv", "r");
$csv_contents= array();
$row_headings= array();
$resultsAssoc=array();
$connectionMatrix=array();
$langcodes=array();
$counter=0;
while (!feof($file_handle) ) {
$line_of_text = fgetcsv($file_handle, 1024);
//print $line_of_text[0] . $line_of_text[1]. $line_of_text[2] . "<BR>";
$csv_contents[]=$line_of_text;
$row_headings[]=$line_of_text[0];
if ($counter==0) {$column_headings=$line_of_text;}
if ($counter>4)
{
$langcodes[]=$line_of_text[0];
$row_lang=$line_of_text[0];
foreach ($line_of_text as $key=>$entry)
{
//echo $key;
//echo "\n";
//echo $entry;
if ($key>0)
{
$resultsAssoc[$line_of_text[0]][$column_headings[$key]]=$entry;
}
}
}
$counter++;
}
fclose($file_handle);
//print_r($resultsAssoc);
// save ***********************************************
/*$matrix_serialized = serialize($resultsAssoc);
file_put_contents('wm-language-matrix-serialized', $s);*/
// Sanity Check-- if everything went okay, this should look like Active Users ******************
/*
echo "which individual languages can instantly reach most active users??<br/>";
foreach ($langcodes as $langcode)
{
echo "$langcode , ". $resultsAssoc[$langcode][$langcode] ."<br/>";
}
// It did.
*/
// which language has most bilingual speakers
// technically incorrect, as a person who speaks five languages will get counted five times
// but this makes logical sense, as such a person is at least five times as valuable for communication
/*
echo "ACE active users: ".$resultsAssoc["ace"]["ace"] ."\n";
echo "which individual languages has the most multilingual active users??<br/>";
$total_multilingual_by_project= array();
foreach ($langcodes as $langcode)
{
$total_multilingual_by_project[$langcode]=0;
$sum=0;
foreach ($langcodes as $langcodeb)
{
$sum=$sum+$resultsAssoc[$langcode][$langcodeb];
if ($resultsAssoc[$langcode][$langcodeb]!=$resultsAssoc[$langcodeb][$langcode]) echo "error: $langcode $langcodeb\n";
}
$sum=$sum-$resultsAssoc[$langcode][$langcode];
$total_multilingual_by_project[$langcode]=$sum;
echo "$langcode , ".$sum."\n";
}
*/
// 'ru', 'ja', 'es', 'it', 'pl', 'zh', 'nl', 'pt', 'ar', 'hi', 'sv', 'he', 'hu', 'fi', 'cs', 'uk', 'ko', 'no', 'ca', 'tr', 'fa', 'da', 'bg', 'ro', 'id', 'th', 'vi', 'sr', 'el', 'eo', 'sk', 'simple', 'hr', 'lt', 'et', 'sl', 'ka', 'az', 'lv', 'eu', 'gl', 'ml', 'ta', 'la', 'mk', 'ms', 'b',e'',-'',x'',-'old', 'nn', 'be', 'cy', 'bs', 'oc', 'lb', 'ga', 'af', 'is', 'jv', 'sh', 'hy', 'tl', 'sq', 'an', 'bn', 'br', 'mr', 'ast', 'sw', 'tt', 'war', 'io', 'bar', 'lmo', 'qu', 'fy', 'kk', 'ku', 'te', 'z',h'',-'mi',n'',-'nan', 'als', 'ceb', 'scn', 'os', 'pms', 'z',h'',-'yue', 'ia', 'ur', 'cv', 'gd', 'kn', 'uz', 'vo', 'gv', 'hsb', 'ht', 'li', 'mn', 'nah', 'bpy', 'nap', 'tg', 'yi', 'yo', 'ba',t'',-'smg', 'fo', 'nds', 'su', 'arz', 'hif', 'vec', 'am', 'gan', 'my', 'pam', 'sah', 'pnb', 'se', 'wa', 'ksh', 'ne', 'sco', 'vls', 'gu', 'nd',s'',-'nl', 'sa', 'ang', 'co', 'frp', 'fur', 'ckb', 'mt', 'new', 'nov', 'si', 'wuu', 'bcl', 'diq', 'fi',u'',-'vro', 'kw', 'mhr', 'nrm', 'szl', 'csb', 'ps', 'so', 'tk', 'ba', 'lad', 'ln', 'z',h'',-'classical', 'cb',k'',-'zam', 'dsb', 'ie', 'lij', 'ma',p'',-'bms', 'mi', 'rm', 'stq', 'arc', 'ay', 'bo', 'crh', 'gn', 'km', 'kv', 'mg', 'nv', 'sc', 'bug', 'dv', 'eml', 'ext', 'ilo', 'kl', 'ky', 'pdc', 'udm', 'wo', 'xal', 'jbo', 'krc', 'mdf', 'pap', 'pa', 'ro',a'',-'rup', 'tet', 'ug', 'zea', 'cu', 'hak', 'haw', 'koi', 'mwl', 'myv', 'or', 'tpi', 'ace', 'av', 'ce', 'kab', 'mrj', 'mzn', 'pcd', 'pi', 'ro',a'',-'tara', 'ab', 'bh', 'bjn', 'ig', 'lo', 'na', 'pag', 'rw', 'sm', 'bm', 'ee', 'frr', 'ik', 'iu', 'kg', 'lbe', 'pfl', 'pnt', 'srn', 'ss', 'ty', 'kaa', 'pih', 'rmy', 'to', 'bi', 'cdo', 'chr', 'fj', 'gag', 'dz', 'ks', 'ltg', 'ten', 'ts', 'cr', 'ff', 'glk', 'got', 'kbd', 'ny', 'tum', 've', 'za', 'ak', 'as', 'bxr', 'ch', 'ha', 'lg', 'om', 'rn', 'sd', 'sg', 'st', 'ti', 'tn', 'xh');
$langlist= array('en','de', 'fr',// 'ru', 'ja', 'es', 'it', 'pl', 'zh', 'nl', 'pt', 'ar', 'hi', 'sv', 'he', 'hu', 'fi', 'cs', 'uk', 'ko', 'no', 'ca', 'tr', 'fa', 'da', 'bg', 'ro', 'id', 'th', 'vi', 'sr', 'el', 'eo', 'sk', 'simple', 'hr', 'lt', 'et', 'sl', 'ka', 'az', 'lv', 'eu', 'gl', 'ml', 'ta', 'la', 'mk', 'ms', 'b',e'',-'',x'',-'old', 'nn', 'be', 'cy', 'bs', 'oc', 'lb', 'ga', 'af', 'is', 'jv', 'sh', 'hy', 'tl', 'sq', 'an', 'bn', 'br', 'mr', 'ast', 'sw', 'tt', 'war', 'io', 'bar', 'lmo', 'qu', 'fy', 'kk', 'ku', 'te', 'z',h'',-'mi',n'',-'nan', 'als', 'ceb', 'scn', 'os', 'pms', 'z',h'',-'yue', 'ia', 'ur', 'cv', 'gd', 'kn', 'uz', 'vo', 'gv', 'hsb', 'ht', 'li', 'mn', 'nah', 'bpy', 'nap', 'tg', 'yi', 'yo', 'ba',t'',-'smg', 'fo', 'nds', 'su', 'arz', 'hif', 'vec', 'am', 'gan', 'my', 'pam', 'sah', 'pnb', 'se', 'wa', 'ksh', 'ne', 'sco', 'vls', 'gu', 'nd',s'',-'nl', 'sa', 'ang', 'co', 'frp', 'fur', 'ckb', 'mt', 'new', 'nov', 'si', 'wuu', 'bcl', 'diq', 'fi',u'',-'vro', 'kw', 'mhr', 'nrm', 'szl', 'csb', 'ps', 'so', 'tk', 'ba', 'lad', 'ln', 'z',h'',-'classical', 'cb',k'',-'zam', 'dsb', 'ie', 'lij', 'ma',p'',-'bms', 'mi', 'rm', 'stq', 'arc', 'ay', 'bo', 'crh', 'gn', 'km', 'kv', 'mg', 'nv', 'sc', 'bug', 'dv', 'eml', 'ext', 'ilo', 'kl', 'ky', 'pdc', 'udm', 'wo', 'xal', 'jbo', 'krc', 'mdf', 'pap', 'pa', 'ro',a'',-'rup', 'tet', 'ug', 'zea', 'cu', 'hak', 'haw', 'koi', 'mwl', 'myv', 'or', 'tpi', 'ace', 'av', 'ce', 'kab', 'mrj', 'mzn', 'pcd', 'pi', 'ro',a'',-'tara', 'ab', 'bh', 'bjn', 'ig', 'lo', 'na', 'pag', 'rw', 'sm', 'bm', 'ee', 'frr', 'ik', 'iu', 'kg', 'lbe', 'pfl', 'pnt', 'srn', 'ss', 'ty', 'kaa', 'pih', 'rmy', 'to', 'bi', 'cdo', 'chr', 'fj', 'gag', 'dz', 'ks', 'ltg', 'ten', 'ts', 'cr', 'ff', 'glk', 'got', 'kbd', 'ny', 'tum', 've', 'za', 'ak', 'as', 'bxr', 'ch', 'ha', 'lg', 'om', 'rn', 'sd', 'sg', 'st', 'ti', 'tn', 'xh');
'ru','ja','es','it','pl','zh','nl','pt','ar','hi','sv','he','hu','fi','cs','uk','ko','no','ca','tr','fa','da','bg', 'ro','id','th','vi','sr','el','eo','sk','simple','hr','lt','et','sl','ka','az','lv','eu','gl','ml','ta','la','mk','ms','be-x-old','nn','be','cy','bs','oc','lb','ga','af','is','jv','sh','hy','tl','sq','an','bn','br','mr','ast','sw','tt','war','io','bar','lmo','qu','fy','kk','ku','te','zh-min-nan', 'als', 'ceb', 'scn', 'os', 'pms', 'zh-yue', 'ia', 'ur', 'cv', 'gd', 'kn', 'uz', 'vo', 'gv', 'hsb', 'ht', 'li', 'mn', 'nah', 'bpy', 'nap', 'tg', 'yi', 'yo', 'bat-smg', 'fo', 'nds', 'su', 'arz', 'hif', 'vec', 'am', 'gan', 'my', 'pam', 'sah', 'pnb', 'se', 'wa', 'ksh', 'ne', 'sco', 'vls', 'gu', 'nds-nl', 'sa', 'ang', 'co', 'frp', 'fur', 'ckb', 'mt', 'new', 'nov', 'si', 'wuu', 'bcl', 'diq', 'fiu-vro', 'kw', 'mhr', 'nrm', 'szl', 'csb', 'ps', 'so', 'tk', 'ba', 'lad', 'ln', 'zh-classical', 'cbk-zam', 'dsb', 'ie', 'lij', 'map-bms', 'mi', 'rm', 'stq', 'arc', 'ay', 'bo', 'crh', 'gn', 'km', 'kv', 'mg', 'nv', 'sc', 'bug', 'dv', 'eml', 'ext', 'ilo', 'kl', 'ky', 'pdc', 'udm', 'wo', 'xal', 'jbo', 'krc', 'mdf', 'pap', 'pa', 'roa-rup', 'tet', 'ug', 'zea', 'cu', 'hak', 'haw', 'koi', 'mwl', 'myv', 'or', 'tpi', 'ace', 'av', 'ce', 'kab', 'mrj', 'mzn', 'pcd', 'pi', 'roa-tara', 'ab', 'bh', 'bjn', 'ig', 'lo', 'na', 'pag', 'rw', 'sm', 'bm', 'ee', 'frr', 'ik', 'iu', 'kg', 'lbe', 'pfl', 'pnt', 'srn', 'ss', 'ty', 'kaa', 'pih', 'rmy', 'to', 'bi', 'cdo', 'chr', 'fj', 'gag', 'dz', 'ks', 'ltg', 'ten', 'ts', 'cr', 'ff', 'glk', 'got', 'kbd', 'ny', 'tum', 've', 'za', 'ak', 'as', 'bxr', 'ch', 'ha', 'lg', 'om', 'rn', 'sd', 'sg', 'st', 'ti', 'tn', 'xh');
// unused **********************
//print_r($total_multilingual_by_project);
//include_once("wm-language-libraries.php");
//$sep=',';
//WriteCsvFile("wm-langage-multilingualsbyproj.csv",$total_multilingual_by_project,$sep);
// which language has greatest "connection diversity" (dimishing returns of additional bilingual) >50 in language
// For each language, what are their "conduit languages"-- languages that offer good changes at intercommunication with both them and en
// Which languages are more strongly connected to one of the other world languages than en?
//for each language, what's it's connect to en?
/*
foreach ($langlist as $langlistitem)
{
if ($langlistitem=='en') continue;
//if ($langlistitem=='hi') break;
echo "===$langlistitem===\n";
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$TopFew = array_slice($aTemp, 0, 3, true);
if ($resultsAssoc[$langlistitem]['en']>$TopFew[1]) {echo "'''";}
echo "Direct: $langlistitem <-".$resultsAssoc[$langlistitem]['en']."-> en ";
if ($resultsAssoc[$langlistitem]['en']>$TopFew[1]) {echo "'''";}
echo "<br/>\n";
$thislangarray= array($resultsAssoc[$langlistitem]);
asort($thislangarray[0]);
$thislangarray=array_reverse($thislangarray[0],true);
$counterb=0;
foreach ($thislangarray as $indirectlang => $val)
{ // print_r($thislangarray); echo "\n ^^ $indirectlang $val ^^"; exit();
if ($indirectlang!=$langlistitem && $indirectlang!='en')
{
if ($resultsAssoc[$langlistitem][$indirectlang]>$resultsAssoc[$langlistitem]['en']){echo "'''";}
echo "Indirect: $langlistitem <-".$resultsAssoc[$langlistitem][$indirectlang]."-> $indirectlang <-".$resultsAssoc[$indirectlang]['en']."-> en , multiplicative weight: ".$resultsAssoc[$indirectlang]['en']*$resultsAssoc[$langlistitem][$indirectlang];
if ($resultsAssoc[$langlistitem][$indirectlang]>$resultsAssoc[$langlistitem]['en']){echo "'''";}
echo " <br/>\n";
if ($counterb>5) break;
$counterb++;
}
}
}
*/
// make nodes.csv
/*
echo "id,Label,Weight\n";
foreach ($langlist as $key=>$langlistitem)
{
echo ($key+1)." , $langlistitem,".$resultsAssoc[$langlistitem][$langlistitem]."\n";
}
*/
// make edge length
/*
echo "Source,Target,Weight,Type\n";
foreach ($langlist as $key=>$langlistitem)
{
foreach ($langlist as $keyb=>$langlistitemb)
{
// each language pair
if ($langlistitem!=$langlistitemb && $key<$keyb && $resultsAssoc[$langlistitem][$langlistitemb]>0)
{
echo ($key+1)." , ".($keyb+1)." , ".$resultsAssoc[$langlistitem][$langlistitemb].",Undirected\n";
}
}
}
*/
/*
// Make wm-lang-topfewedges.gdf
// make nodes
echo "nodedef>name VARCHAR,weight INT\n";
//echo "id,Label,Weight\n";
foreach ($langlist as $key=>$langlistitem)
{
echo "$langlistitem,".$resultsAssoc[$langlistitem][$langlistitem]."\n";
}
// Make Edges
echo "edgedef>node1 VARCHAR,node2 VARCHAR,weight INT,directed BOOLEAN, topedge BOOLEAN\n";
foreach ($langlist as $key=>$langlistitem)
{
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$aTemp = array_reverse($aTemp);
$TopFew = array_slice($aTemp, 0, 3, true);
// print_r($TopFew);
foreach ($TopFew as $keyb=>$langlistitemb)
{
// each lang pair
if ($langlistitem!=$keyb && $resultsAssoc[$langlistitem][$keyb]>0) // no self-connections, no 0 weights.
{
echo "$langlistitem,$keyb,".$resultsAssoc[$langlistitem][$keyb].",False\n";
}
}
}
echo "nodedef>name VARCHAR,weight INT\n";
//echo "id,Label,Weight\n";
foreach ($langlist as $key=>$langlistitem)
{
echo "$langlistitem,".$resultsAssoc[$langlistitem][$langlistitem]."\n";
}
// Make Edges
echo "edgedef>node1 VARCHAR,node2 VARCHAR,weight INT,directed BOOLEAN, topedge BOOLEAN\n";
foreach ($langlist as $key=>$langlistitem)
{
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$aTemp = array_reverse($aTemp);
$TopFew = array_slice($aTemp, 0, 3, true);
// print_r($TopFew);
foreach ($TopFew as $keyb=>$langlistitemb)
{
// each lang pair
if ($langlistitem!=$keyb && $resultsAssoc[$langlistitem][$keyb]>0) // no self-connections, no 0 weights.
{
echo "$langlistitem,$keyb,".$resultsAssoc[$langlistitem][$keyb].",False\n";
}
}
}
*/
/*
// Make wm-lang-top-one
// todo later: world languages, weight according translation priorities
// todo later: parent language only, only world languages leaked to parent company.
echo "nodedef>name VARCHAR,weight INT\n";
//echo "id,Label,Weight\n";
foreach ($langlist as $key=>$langlistitem)
{
echo "$langlistitem,".$resultsAssoc[$langlistitem][$langlistitem]."\n";
}
// Make Edges
echo "edgedef>node1 VARCHAR,node2 VARCHAR,weight INT,directed BOOLEAN, topedge BOOLEAN\n";
foreach ($langlist as $key=>$langlistitem)
{
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$aTemp = array_reverse($aTemp);
$TopFew = array_slice($aTemp, 0, 3, true);
// print_r($TopFew);
foreach ($TopFew as $keyb=>$langlistitemb)
{
// each lang pair
if ($langlistitem!=$keyb && $resultsAssoc[$langlistitem][$keyb]>0) // no self-connections, no 0 weights.
{
echo "$langlistitem,$keyb,".$resultsAssoc[$langlistitem][$keyb].",true,True\n";
break;
}
}
}
*/
// Make wm-lang-binarytree
// todo later: world languages, weight according translation priorities
// todo later: parent language only, only world languages leaked to parent company.
echo "nodedef>name VARCHAR,weight INT\n";
//echo "id,Label,Weight\n";
foreach ($langlist as $key=>$langlistitem)
{
echo "$langlistitem,".$resultsAssoc[$langlistitem][$langlistitem]."\n";
}
// Make Edges
echo "edgedef>node1 VARCHAR,node2 VARCHAR,weight INT,directed BOOLEAN, topedge BOOLEAN\n";
/*
foreach ($langlist as $key=>$langlistitem)
{
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$aTemp = array_reverse($aTemp);
$TopFew = array_slice($aTemp, 0, 3, true);
// print_r($TopFew);
foreach ($TopFew as $keyb=>$langlistitemb)
{
// each lang pair
if ($langlistitem!=$keyb && $resultsAssoc[$langlistitem][$keyb]>0) // no self-connections, no 0 weights.
{
echo "$langlistitem,$keyb,".$resultsAssoc[$langlistitem][$keyb].",true,True\n";
break;
}
}
}
*/
$takenlangs=Array();
languagetree(Array('en'));
function languagetree($langlistitemArray)
{
global $resultsAssoc; global $takenlangs;
$resultArray=Array();
foreach ($langlistitemArray as $langlistitem) $takenlangs[$langlistitem]="taken"; // mark this level of nodes as taken;
// Process each node at this level
foreach ($langlistitemArray as $langlistitem)
{
//echo "$langlistitem:";
// get list of langitem's connections, sorted in order.
$aTemp = $resultsAssoc[$langlistitem];
asort($aTemp, SORT_NUMERIC);
$topchildren= array_reverse($aTemp);
$counter=0;
foreach($topchildren as $keyb=>$langlistitemb)
{
if ($takenlangs[$keyb]=="taken") continue;
if ($takenlangs[$keyb]!="taken" && $langlistitem!=$keyb && $resultsAssoc[$langlistitem][$keyb]>0)
{
echo "$langlistitem,$keyb,".$resultsAssoc[$langlistitem][$keyb].",true,True\n";
$resultArray[]=$keyb; $takenlangs[$keyb]="taken";
$counter++;
}
if ($counter==2) break;
}
}
languagetree($resultArray); return;
}